├── .gitignore
├── CLA.md
├── LICENSE.md
├── README.md
├── checkpoint
    ├── A2J
    │   └── README.md
    ├── CenterNet
    │   └── README.md
    └── SSD
    │   └── README.md
├── model
    ├── A2J
    │   ├── a2j.py
    │   ├── a2j_utilities
    │   │   ├── a2j_branchs.py
    │   │   ├── a2j_utils.py
    │   │   └── post_processing.py
    │   ├── back_bone
    │   │   ├── mobilenet.py
    │   │   └── resnet.py
    │   └── model.py
    ├── CenterNet
    │   └── centernet.py
    └── run_model.py
├── pipeline
    ├── azure_kinect.py
    ├── constants.py
    ├── model_setup.py
    └── utils.py
└── readme_files
    └── realtime_inference.gif


/.gitignore:
--------------------------------------------------------------------------------
 1 | # ignore pytorch saved models
 2 | *.pth
 3 | *.trt
 4 | 
 5 | # ignore python cache fils
 6 | *__pycache__*
 7 | 
 8 | # ingnore vscode config
 9 | *.vscode
10 | 
11 | 


--------------------------------------------------------------------------------
/CLA.md:
--------------------------------------------------------------------------------
 1 | ## Individual Contributor License Agreement (CLA)
 2 | 
 3 | **Thank you for submitting your contributions to this project.**
 4 | 
 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions
 6 | to the project.
 7 | 
 8 | ### License.
 9 | 
10 | You hereby represent that all present, past and future contributions are governed by the
11 | [MIT License](https://opensource.org/licenses/MIT)
12 | copyright statement.
13 | 
14 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights
15 | of the code or documents you contribute to the project itself or its maintainers.
16 | Furthermore you also represent that you have the authority to perform the above waiver
17 | with respect to the entirety of you contributions.
18 | 
19 | ### Moral Rights.
20 | 
21 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to
22 | assert, all of your “moral rights” in or relating to your contributions for the benefit of the project.
23 | 
24 | ### Third Party Content.
25 | 
26 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools,
27 | specifications, documentation, data, materials, feedback, information or other works of authorship that were not
28 | authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary
29 | rights associated with your Contribution (“Third Party Rights”),
30 | then you agree to include with the submission of your Contribution full details respecting such Third Party
31 | Content and Third Party Rights, including, without limitation, identification of which aspects of your
32 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the
33 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable
34 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater
35 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights
36 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
37 | 
38 | ### Representations.
39 | 
40 | You represent that, other than the Third Party Content and Third Party Rights identified by
41 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled
42 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were
43 | created in the course of your employment with your past or present employer(s), you represent that such
44 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer
45 | (s) has waived all of their right, title or interest in or to your Contributions.
46 | 
47 | ### Disclaimer.
48 | 
49 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is"
50 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied
51 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not
52 | required to provide support for your Contributions, except to the extent you desire to provide support.
53 | 
54 | ### No Obligation.
55 | 
56 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions
57 | into the project. The decision to use or incorporate your contributions into the project will be made at the
58 | sole discretion of the maintainers or their authorized delegates.
59 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019 Boshen Zhang <br/>
2 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.<br/>
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 3D_HandPose
 2 | 
 3 | This repository implements a realtime 3D hand posture estimation pipeline running on Jetson platform using a [**Azure Kinect** camera](https://azure.microsoft.com/en-us/services/kinect-dk/).<br/>
 4 | Please refer to the following repositories before getting started here:
 5 | - [centernet_kinet](https://github.com/NVIDIA-AI-IOT/centernet_kinect)
 6 | - [Hand Posture Estimation](https://github.com/NVIDIA-AI-IOT/a2j_handpose_3d)
 7 | 
 8 | <p align="center">
 9 | <img src="readme_files/realtime_inference.gif" alt="landing graphic" height="600px"/>
10 | </p>
11 | 
12 | 
13 | There are 2 stages to our pipeline
14 | 
15 | * ## [CenterNet Bounding Box](#centernet_bounding_box)
16 | * ## [A2J Posture Detection](#a2j_posture_detection)
17 | * ## [Run inference](#run_infrence)
18 | 
19 | <a name="centernet_bounding_box"></a>
20 | ## CenterNet Bounding Box
21 | 
22 | The first stage will localize the hand using a fusion of infrared and depth image.<br/>
23 | **NOTE:** more detail can be found in the centernet_kinect repository
24 | 
25 | <a name="a2j_posture_detection"></a>
26 | ## A2J Posture Detection
27 | 
28 | The second stage would perform 3D hand posture estimation on the region of intrest selected by the previous step.<br/>
29 | **NOTE:** for training a model please refer to the Hand Posture Estimation repository
30 | 
31 | <a name="run_infrence"></a>
32 | ## Run inference
33 | 
34 | - Initially configure the *pipeline/constants.py* file:
35 |   - **CENTERNET_MODEL_PATH** please place the centernet model weights in *"/checkpoint/CenterNet"*<br/>
36 |   with the naming convention that was provided in the original repository
37 |     - Configure the centernet portion of the file as its been described in the original [repository](https://github.com/NVIDIA-AI-IOT/centernet_kinect#get_pre_trained_weights).<br/>
38 |     if you are using the weights directly from the original repository you dont have to modify this section.
39 |   - **A2J_MODEL_PATH** please place the A2J model weights in *"/checkpoint/A2J"*<br/>
40 |   with the naming convention that was provided in the original repository
41 |     - Configure the a2j portion of the file as you have set up the training pipeline for [Hand Posture Estimation](https://github.com/NVIDIA-AI-IOT/a2j_handpose_3d).<br/>
42 |     - For Faster inference we use TensorRT inference engine to optimize the models. this will take some time to compile the models and create a TRT engine<br/>
43 | - Run realtime inference on a jetson platform.
44 |     ```bash
45 |     cd pipeline
46 |     python3 azure_kinect.py
47 |     
48 |     # Optional for faster inference
49 |     python3 azure_kinect.py --trt True # for optimizing the models with TensorRT fp16  
50 |     ```
51 | 


--------------------------------------------------------------------------------
/checkpoint/A2J/README.md:
--------------------------------------------------------------------------------
1 | # A2J checkpoint directory
2 | 


--------------------------------------------------------------------------------
/checkpoint/CenterNet/README.md:
--------------------------------------------------------------------------------
1 | # CenterNet checkpoint directory
2 | 


--------------------------------------------------------------------------------
/checkpoint/SSD/README.md:
--------------------------------------------------------------------------------
1 | # SSD checkpoint directory
2 | 


--------------------------------------------------------------------------------
/model/A2J/a2j.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2019 Boshen Zhang
 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 8 | '''
 9 | import os
10 | import sys
11 | import torch
12 | import torch.nn as nn
13 | 
14 | # PROJ ROOT DIR
15 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # A2J
16 | MODEL_PATH = os.path.join(DIR_PATH, os.path.pardir) # Model
17 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root
18 | 
19 | sys.path.append(ROOT_PATH)
20 | 
21 | # Import Project Library
22 | from model.A2J.back_bone.resnet import ResnetBackbone
23 | from model.A2J.back_bone.mobilenet import MobileNet
24 | from model.A2J.a2j_utilities.a2j_branchs import DepthRegression, OffsetRegression, JointClassification
25 | 
26 | A2J_BACKBONE_CONFIG = {
27 |     "resnet18": {"backbone": ResnetBackbone, "common_trunk": 256, "Regression_trunk": 512},
28 |     "resnet34": {"backbone": ResnetBackbone, "common_trunk": 256, "Regression_trunk": 512},
29 |     "resnet50": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048},
30 |     "resnet101": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048},
31 |     "resnet152": {"backbone": ResnetBackbone, "common_trunk": 1024, "Regression_trunk": 2048},
32 |     "mobilenet": {"backbone": MobileNet, "common_trunk": 512, "Regression_trunk": 1024},
33 | }
34 | 
35 | class BacknoneNetwork(nn.Module):
36 |     """
37 |     Backbone Network Base Class"
38 |     """
39 |     def __init__(self, backbone_name="resnet18", backbone_pretrained=True):
40 |         """
41 |         Class constructor
42 | 
43 |         :param backbone_name: the name of the backbone network
44 |         :param backbone_pretrained: load a pretrained backbone network
45 |         """
46 |         super(BacknoneNetwork, self).__init__()
47 |         self.model = A2J_BACKBONE_CONFIG[backbone_name]["backbone"](backbone_name="resnet18", backbone_pretrained=True)
48 |     
49 |     def forward(self, x):
50 |         x1, x2 = self.model(x)
51 |         return x1, x2
52 | 
53 | class A2J(nn.Module):
54 |     """
55 |     A2J model class
56 |     """
57 |     def __init__(self, num_joints=18, backbone_name="resnet18", backbone_pretrained=True):
58 |         """
59 |         Class constructor
60 | 
61 |         :param num_joints: number of joints to predict
62 |         :param backbone_name: the name of the backbone network
63 |         :param backbone_pretrained: load a pretrained backbone network
64 |         """
65 |         super(A2J, self).__init__()
66 |         Backbone_Model = A2J_BACKBONE_CONFIG[backbone_name]["backbone"]
67 |         
68 |         self.back_bone = Backbone_Model(name=backbone_name, pretrained=backbone_pretrained)
69 | 
70 |         self.offset_regression = OffsetRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints)
71 |         self.depth_regression = DepthRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints)
72 |         self.joint_classification = JointClassification(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["common_trunk"], num_joints=num_joints)
73 |         
74 |         # self.Backbone = Backbone_Model(name=backbone_name, pretrained=backbone_pretrained)
75 | 
76 |         # self.regressionModel = OffsetRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints)
77 |         # self.DepthRegressionModel = DepthRegression(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["Regression_trunk"], num_joints=num_joints)
78 |         # self.classificationModel = JointClassification(input_channels=A2J_BACKBONE_CONFIG[backbone_name]["common_trunk"], num_joints=num_joints)
79 |     
80 |     
81 |     def forward(self, x):
82 |         out3, out4 = self.back_bone(x)
83 |         offset_regression = self.offset_regression(out4)
84 |         depth_regression = self.depth_regression(out4)
85 |         joint_classification = self.joint_classification(out3)
86 | 
87 |         # out3, out4 = self.Backbone(x)
88 |         # offset_regression = self.regressionModel(out4)
89 |         # depth_regression = self.DepthRegressionModel(out4)
90 |         # joint_classification = self.classificationModel(out3)
91 | 
92 | 
93 |         return joint_classification, offset_regression, depth_regression
94 | 


--------------------------------------------------------------------------------
/model/A2J/a2j_utilities/a2j_branchs.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import os
 10 | import sys
 11 | import torch.nn as nn
 12 | 
 13 | # PROJ ROOT DIR
 14 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities
 15 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J
 16 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model
 17 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root
 18 | sys.path.append(ROOT_PATH)
 19 | 
 20 | # Import Project Library
 21 | from model.A2J.back_bone.resnet import get_ResNet
 22 | 
 23 | class DepthRegression(nn.Module):
 24 |     """
 25 |     Depth regression module
 26 | 
 27 |     regress the depth of the joints from the anchor points
 28 |     """
 29 |     def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18):
 30 |         """
 31 |         Class initializer
 32 | 
 33 |         :param input_channels: number of input channels
 34 |         :param output_channels: number of output channels
 35 |         :param num_anchors: total number of anchor points
 36 |         :param num_joints: total number of joints to predict
 37 |         """
 38 |         super(DepthRegression, self).__init__()
 39 |         self.num_joints = num_joints
 40 |         self.num_anchors = num_anchors
 41 | 
 42 |         self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1)
 43 |         self.bn1 = nn.BatchNorm2d(output_channels)
 44 | 
 45 |         self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
 46 |         self.bn2 = nn.BatchNorm2d(output_channels)
 47 | 
 48 |         self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
 49 |         self.bn3 = nn.BatchNorm2d(output_channels)
 50 | 
 51 |         self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
 52 |         self.bn4 = nn.BatchNorm2d(output_channels)
 53 | 
 54 |         self.output = nn.Conv2d(output_channels, num_anchors*num_joints, kernel_size=3, padding=1)
 55 | 
 56 |         # Activation Function
 57 |         self.relu = nn.LeakyReLU(inplace=True)
 58 | 
 59 |         self._initialize()
 60 |     
 61 |     def _initialize(self):
 62 |         for m in self.modules():
 63 |             if isinstance(m, nn.Conv2d):
 64 |                 nn.init.xavier_normal_(m.weight.data)
 65 |             elif isinstance(m, nn.BatchNorm2d):
 66 |                 m.weight.data.fill_(1)
 67 |                 m.bias.data.zero_()
 68 |     
 69 |     def forward(self, x):
 70 |          # (N, inChannels, 10, 9)
 71 |         out = self.conv1(x) # (N, 256, 10, 9)
 72 |         out = self.bn1(out) # (N, 256, 10, 9)
 73 |         out = self.relu(out) # (N, 256, 10, 9)
 74 | 
 75 |         out = self.conv2(out) # (N, 256, 10, 9)
 76 |         out = self.bn2(out) # (N, 256, 10, 9)
 77 |         out = self.relu(out) # (N, 256, 10, 9)
 78 | 
 79 |         out = self.conv3(out) # (N, 256, 10, 9)
 80 |         out = self.bn3(out) # (N, 256, 10, 9)
 81 |         out = self.relu(out) # (N, 256, 10, 9)
 82 | 
 83 |         out = self.conv4(out) # (N, 256, 10, 9)
 84 |         out = self.bn4(out) # (N, 256, 10, 9)
 85 |         out = self.relu(out) # (N, 256, 10, 9)
 86 | 
 87 |         out = self.output(out) # (N, num_joints*num_anchors, 10, 9)
 88 | 
 89 |         out = out.permute(0, 3, 2, 1) # (N, 9, 10, num_joints*num_anchors)
 90 |         batch_size, width, height, channels = out.shape
 91 |         out = out.view(batch_size, width, height, self.num_anchors, self.num_joints) # (N, 9, 10, num_anchors, num_joints)
 92 |         return out.contiguous().view(batch_size, -1, self.num_joints) # (N, 9*10*num_anchors, num_joint)
 93 | 
 94 | class OffsetRegression(nn.Module):
 95 |     """
 96 |     Offset Regression class
 97 | 
 98 |     estimate the joint offsets from the anchorpoints
 99 |     """
100 |     def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18):
101 |         """
102 |         Class initializer
103 | 
104 |         :param input_channels: number of input channels
105 |         :param output_channels: number of output channels
106 |         :param num_anchors: total number of anchor points
107 |         :param num_joints: total number of joints to predict
108 |         """
109 |         super(OffsetRegression, self).__init__()
110 |         
111 |         self.num_anchors = num_anchors
112 |         self.num_joints = num_joints
113 | 
114 |         self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1)
115 |         self.bn1 = nn.BatchNorm2d(output_channels)
116 | 
117 |         self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
118 |         self.bn2 = nn.BatchNorm2d(output_channels)
119 | 
120 |         self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
121 |         self.bn3 = nn.BatchNorm2d(output_channels)
122 | 
123 |         self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
124 |         self.bn4 = nn.BatchNorm2d(output_channels)
125 | 
126 |         self.output = nn.Conv2d(output_channels, num_anchors*num_joints*2, kernel_size=3, padding=1)
127 | 
128 |         # Activation Function
129 |         self.relu = nn.LeakyReLU(inplace=True)
130 | 
131 |         self._initialize()
132 | 
133 |     def _initialize(self):
134 |         for m in self.modules():
135 |             if isinstance(m, nn.Conv2d):
136 |                 nn.init.xavier_normal_(m.weight.data)
137 |             elif isinstance(m, nn.BatchNorm2d):
138 |                 m.weight.data.fill_(1)
139 |                 m.bias.data.zero_()
140 |             
141 |     def forward(self, x):
142 |         out = self.conv1(x) # (N, 256, 10, 9)
143 |         out = self.bn1(out) # (N, 256, 10, 9)
144 |         out = self.relu(out) # (N, 256, 10, 9)
145 | 
146 |         out = self.conv2(out) # (N, 256, 10, 9)
147 |         out = self.bn2(out) # (N, 256, 10, 9)
148 |         out = self.relu(out) # (N, 256, 10, 9)
149 | 
150 |         out = self.conv3(out) # (N, 256, 10, 9)
151 |         out = self.bn3(out) # (N, 256, 10, 9)
152 |         out = self.relu(out) # (N, 256, 10, 9)
153 | 
154 |         out = self.conv4(out) # (N, 256, 10, 9)
155 |         out = self.bn4(out) # (N, 256, 10, 9)
156 |         out = self.relu(out) # (N, 256, 10, 9)
157 | 
158 |         out = self.output(out) # (N, num_joints*num_anchors*2, 10, 9)
159 | 
160 |         out = out.permute(0, 3, 2, 1) # (N, 9, 10, num_joints*num_anchors*2)
161 |         batch_size, width, height, channels = out.shape
162 |         out = out.view(batch_size, width, height, self.num_anchors, self.num_joints, 2) # (N, 9, 10, num_anchors, num_joints, 2)
163 |         return out.contiguous().view(batch_size, -1, self.num_joints, 2) # (N, 9*10*num_anchors, num_joints, 2)
164 |     
165 | class JointClassification(nn.Module):
166 |     """
167 |     Joint classification class
168 |     """
169 |     def __init__(self, input_channels, output_channels=256, num_anchors=16, num_joints=18):
170 |         """
171 |         Class initializer
172 | 
173 |         :param input_channels: number of input channels
174 |         :param output_channels: number of output channels
175 |         :param num_anchors: total number of anchor points
176 |         :param num_joints: total number of joints to predict
177 |         """
178 |         super(JointClassification, self).__init__()
179 | 
180 |         self.num_anchors = num_anchors
181 |         self.num_joints = num_joints
182 |         
183 |         self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1)
184 |         self.bn1 = nn.BatchNorm2d(output_channels)
185 | 
186 |         self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
187 |         self.bn2 = nn.BatchNorm2d(output_channels)
188 | 
189 |         self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
190 |         self.bn3 = nn.BatchNorm2d(output_channels)
191 | 
192 |         self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1)
193 |         self.bn4 = nn.BatchNorm2d(output_channels)
194 | 
195 |         self.output = nn.Conv2d(output_channels, num_anchors*num_joints, kernel_size=3, padding=1)
196 | 
197 |         # Activation Function
198 |         self.relu = nn.LeakyReLU(inplace=True)
199 | 
200 |         self._initialize()
201 | 
202 |     def _initialize(self):
203 |         for m in self.modules():
204 |             if isinstance(m, nn.Conv2d):
205 |                 nn.init.xavier_normal_(m.weight.data)
206 |             elif isinstance(m, nn.BatchNorm2d):
207 |                 m.weight.data.fill_(1)
208 |                 m.bias.data.zero_()
209 |     
210 |     def forward(self, x):
211 |         out = self.conv1(x)
212 |         out = self.bn1(out)
213 |         out = self.relu(out)
214 | 
215 |         out = self.conv2(out)
216 |         out = self.bn2(out)
217 |         out = self.relu(out)
218 | 
219 |         out = self.conv3(out)
220 |         out = self.bn3(out)
221 |         out = self.relu(out)
222 | 
223 |         out = self.conv4(out)
224 |         out = self.bn4(out)
225 |         out = self.relu(out)
226 | 
227 |         out = self.output(out)
228 | 
229 |         out = out.permute(0, 3, 2, 1)
230 |         batch_size, width, height, channels = out.shape
231 |         out = out.view(batch_size, width, height, self.num_anchors, self.num_joints)
232 |         return out.contiguous().view(batch_size, -1, self.num_joints)
233 | 


--------------------------------------------------------------------------------
/model/A2J/a2j_utilities/a2j_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2019 Boshen Zhang
 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 8 | '''
 9 | import os
10 | import sys
11 | import torch
12 | import numpy as np
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | # PROJ ROOT DIR
17 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities
18 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J
19 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model
20 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root
21 | sys.path.append(ROOT_PATH)
22 | 
23 | # Import Project Libraries
24 | import pipeline.constants as const
25 | 
26 | 
27 | 
28 | def generate_anchors(p_h=None, p_w=None):
29 |     """
30 |     Generate anchor shape
31 | 
32 |     :param p_h: anchor hieght layout
33 |     :param p_w: anchor width layout
34 |     """
35 |     if p_h is None:
36 |         p_h = np.array([2, 6, 10, 14])
37 |     
38 |     if p_w is None:
39 |         p_w = np.array([2, 6, 10, 14])
40 |     
41 |     num_anchors = len(p_h) * len(p_w)
42 | 
43 |     # Initialize the anchor points
44 |     k = 0
45 |     anchors = np.zeros((num_anchors, 2))
46 |     for i in range(len(p_w)):
47 |         for j in range(len(p_h)):
48 |             anchors[k,1] = p_w[j]
49 |             anchors[k,0] = p_h[i]
50 |             k += 1
51 |     return anchors
52 | 
53 | def shift(shape, stride, anchor):
54 |     """
55 |     Create the locations of all the anchonrs in the in put image
56 | 
57 |     :param shape: common trunk (H, W)
58 |     :param stride: the downsampling factor from input to common trunk
59 |     :param anchor: anchor 
60 |     """
61 |     shift_h = np.arange(0, shape[0]) * stride # (shape[0]) 10
62 |     shift_w = np.arange(0, shape[1]) * stride # (shape[1]) 9
63 | 
64 |     shift_h, shift_w = np.meshgrid(shift_h, shift_w) # (shape[1], shape[0]) (9, 10), (shape[1], shape[0]) (9, 10)
65 |     shifts = np.vstack( (shift_h.ravel(), shift_w.ravel()) ).transpose() # (shape[0]*shape[1], 2) (90, 2)
66 | 
67 |     A = anchor.shape[0] # 16
68 |     K = shifts.shape[0] # (shape[0]*shape[1]) (90)
69 | 
70 |     all_anchors = (anchor.reshape(1,A,2) + shifts.reshape((1, K, 2)).transpose((1, 0, 2))) # (shape[0]*shape[1], A, 2)
71 |     all_anchors = all_anchors.reshape((K*A, 2)) # (shape[0]*shape[1]*A, 2)
72 |     return all_anchors
73 | 


--------------------------------------------------------------------------------
/model/A2J/a2j_utilities/post_processing.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2019 Boshen Zhang
 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 8 | '''
 9 | import os
10 | import sys
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | # PROJ ROOT DIR
16 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # a2j_utilities
17 | A2J_PATH = os.path.join(DIR_PATH, os.path.pardir) # A2J
18 | MODEL_PATH = os.path.join(A2J_PATH, os.path.pardir) # model
19 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root
20 | sys.path.append(ROOT_PATH)
21 | 
22 | # Import Project Library
23 | import pipeline.constants as const
24 | from model.A2J.a2j_utilities.a2j_utils import generate_anchors, shift
25 | 
26 | class PostProcess(nn.Module):
27 |     """
28 |     PosrProcessing class
29 |     """
30 |     def __init__(self, p_h=None, p_w=None, shape=[const.A2J_TARGET_SIZE[1]//16, const.A2J_TARGET_SIZE[0]//16],\
31 |                     stride=const.A2J_STRIDE):
32 |         """
33 |         Class constructior
34 | 
35 |         :param p_w: 
36 |         """
37 |         
38 |         super(PostProcess, self).__init__()
39 |         anchors = generate_anchors(p_h=p_h, p_w=p_w)
40 |         self.all_anchors = torch.from_numpy(shift(shape, stride, anchors)).float()
41 | 
42 |     def forward(self, joint_classifications, offset_regressions, depth_regressions):
43 |         """
44 |         forward pass through the module
45 | 
46 |         :param joint_classifications: type torch.tensor, joint classification output of the model
47 |         :param offset_regressions:  type torch.tensor, offset regression output of the model
48 |         :param depth_regressions:  type torch.tensor, depth rgression output of the model
49 |         """
50 |         DEVICE = joint_classifications.device
51 | 
52 |         batch_size = joint_classifications.shape[0]
53 |         anchor = self.all_anchors.to(DEVICE)  # (shape[0]*shape[1]*anchor_stride, 2) (1440, 2)
54 |         predictions = list()
55 | 
56 |         for i in range(batch_size):
57 |             joint_classification = joint_classifications[i] # (shape[0]*shape[1]*anchor_stride, num_joints) (1440, 18)
58 |             offset_regression = offset_regressions[i] # (shape[0]*shape[1]*anchor_stride, num_joints, 2) (1440, 18, 2)
59 |             depth_regression = depth_regressions[i] # (shape[0]*shape[1]*anchor_stride, num_joits) (1440, 18)
60 | 
61 |             # xy_regression: is the location of each anchor point + the offset
62 |             # offset_regression: is giving us the offset
63 |             xy_regression = torch.unsqueeze(anchor, 1).to(DEVICE) + offset_regression # (shape[0]*shape[1]*anchor_stride, 2) (1440, 18, 2)
64 | 
65 |             # reg_weight: is gining us the classification (importance) of each anchor point
66 |             reg_weight = F.softmax(joint_classification, dim=0) # (shape[0]*shape[1]*anchor_stride, num_joints) (1440, 18)
67 | 
68 |             # reg_weigh_xy: is reg_weight expanded to have to tensors to multiply to each x and y coordinates
69 |             reg_weight_xy = reg_weight.unsqueeze(2).expand(reg_weight.shape[0], reg_weight.shape[1], 2).to(DEVICE) # (shape[0]*shape[1]*anchor_stride, num_joints, 2) (1440, 18, 2)
70 | 
71 |             prediction_xy = (reg_weight_xy * xy_regression).sum(0)
72 |             prediction_depth = (reg_weight * depth_regression).sum(0)
73 | 
74 |             prediction_depth = prediction_depth.unsqueeze(1).to(DEVICE)
75 | 
76 |             prediction = torch.cat((prediction_xy, prediction_xy), 1)
77 |             predictions.append(prediction)
78 |         
79 |         return predictions
80 | 


--------------------------------------------------------------------------------
/model/A2J/back_bone/mobilenet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import torch
 10 | import torchvision
 11 | import torch.nn.functional as F
 12 | 
 13 | from torch import nn
 14 | from math import sqrt
 15 | from itertools import product as product
 16 | 
 17 | # Set the global device variable to cuda is GPU is avalible
 18 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 19 | 
 20 | class MobileNet(nn.Module):
 21 |     """
 22 |     MobileNet Bass class to produce lower lever features
 23 |     """
 24 |     def __init__(self, **kwargs):
 25 |         super(MobileNet, self).__init__()
 26 | 
 27 |         # Activation function
 28 |         self.relu = nn.LeakyReLU(0.01)
 29 | 
 30 |         # Standard MobileNet Convolution layers
 31 |         self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1)
 32 |         self.bn1_1 = nn.BatchNorm2d(32)
 33 |         self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32)
 34 |         self.bn1_2 = nn.BatchNorm2d(32)
 35 |         self.conv1_3 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)
 36 |         self.bn1_3 = nn.BatchNorm2d(64)
 37 |         self.conv1_4 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, groups=64)
 38 |         self.bn1_4 = nn.BatchNorm2d(64)
 39 | 
 40 |         self.conv2_1 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)
 41 |         self.bn2_1 = nn.BatchNorm2d(128)
 42 |         self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128)
 43 |         self.bn2_2 = nn.BatchNorm2d(128)
 44 |         self.conv2_3 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0)
 45 |         self.bn2_3 = nn.BatchNorm2d(128)
 46 |         self.conv2_4 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1, groups=128)
 47 |         self.bn2_4 = nn.BatchNorm2d(128)
 48 | 
 49 |         self.conv3_1 = nn.Conv2d(128, 256, kernel_size=1, stride=1, padding=0)
 50 |         self.bn3_1 = nn.BatchNorm2d(256)
 51 |         self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, groups=256)
 52 |         self.bn3_2 = nn.BatchNorm2d(256)
 53 |         self.conv3_3 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
 54 |         self.bn3_3 = nn.BatchNorm2d(512)
 55 |         self.conv3_4 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512)
 56 |         self.bn3_4 = nn.BatchNorm2d(512)
 57 |         self.conv3_5 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0)
 58 |         self.bn3_5 = nn.BatchNorm2d(512)
 59 |         self.conv3_6 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512)
 60 |         self.bn3_6 = nn.BatchNorm2d(512)
 61 |         self.conv3_7 = nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0)
 62 |         self.bn3_7 = nn.BatchNorm2d(512) #    <---
 63 |         self.conv3_8 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1, groups=512)
 64 |         self.bn3_8 = nn.BatchNorm2d(512)
 65 |         
 66 |         self.conv4_1 = nn.Conv2d(512, 1024, kernel_size=1, stride=1, padding=0)
 67 |         self.bn4_1 = nn.BatchNorm2d(1024)
 68 |         self.conv4_2 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024)
 69 |         self.bn4_2 = nn.BatchNorm2d(1024)
 70 |         self.conv4_3 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 71 |         self.bn4_3 = nn.BatchNorm2d(1024)
 72 |         self.conv4_4 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024)
 73 |         self.bn4_4 = nn.BatchNorm2d(1024)
 74 |         self.conv4_5 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 75 |         self.bn4_5 = nn.BatchNorm2d(1024)
 76 |         self.conv4_6 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024)
 77 |         self.bn4_6 = nn.BatchNorm2d(1024)
 78 |         self.conv4_7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 79 |         self.bn4_7 = nn.BatchNorm2d(1024)
 80 |         self.conv4_8 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024)
 81 |         self.bn4_8 = nn.BatchNorm2d(1024)
 82 |         self.conv4_9 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 83 |         self.bn4_9 = nn.BatchNorm2d(1024)
 84 |         self.conv4_10 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024)
 85 |         self.bn4_10 = nn.BatchNorm2d(1024)
 86 |         self.conv4_11 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 87 |         self.bn4_11 = nn.BatchNorm2d(1024) #    <---
 88 | 
 89 |         self._init_conv2d()
 90 |     
 91 |     def _init_conv2d(self):
 92 |         """
 93 |         Initialize convolution parameters.
 94 |         """
 95 |         for c in self.children():
 96 |             if isinstance(c, nn.Conv2d):
 97 |                 nn.init.xavier_uniform_(c.weight)
 98 |                 nn.init.constant_(c.bias, 0.)
 99 | 
100 | 
101 |     def forward(self, x):
102 |         out = self.relu(self.conv1_1(x)) # (N, 32, 150, 150)
103 |         out = self.bn1_1(out) # (N, 32, 150, 150)
104 |         out = self.relu(self.conv1_2(out)) # (N, 32, 150, 150)
105 |         out = self.bn1_2(out) # (N, 32, 150, 150)
106 |         out = self.relu(self.conv1_3(out)) # (N, 64, 150, 150)
107 |         out = self.bn1_3(out) # (N, 64, 150, 150)
108 |         
109 |         out = self.relu(self.conv1_4(out)) # (N, 64, 75, 75)
110 |         out = self.bn1_4(out) # (N, 64, 75, 75)
111 |         out = self.relu(self.conv2_1(out)) # (N, 128, 75, 75)
112 |         out = self.bn2_1(out) # (N, 128, 75, 75)
113 |         out = self.relu(self.conv2_2(out)) # (N, 128, 75, 75)
114 |         out = self.bn2_2(out) # (N, 128, 75, 75)
115 |         out = self.relu(self.conv2_3(out)) # (N, 128, 75, 75)
116 |         out = self.bn2_3(out) # (N, 128, 75, 75)
117 |         
118 |         out = self.relu(self.conv2_4(out)) # (N, 128, 38, 38)
119 |         out = self.bn2_4(out) # (N, 128, 38, 38)
120 |         out = self.relu(self.conv3_1(out)) # (N, 256, 38, 38)
121 |         out = self.bn3_1(out) # (N, 256, 38, 38)
122 |         out = self.relu(self.conv3_2(out)) # (N, 256, 38, 38)
123 |         out = self.bn3_2(out) # (N, 256, 38, 38)
124 |         out = self.relu(self.conv3_3(out)) # (N, 256, 38, 38)
125 |         out = self.bn3_3(out) # (N, 512, 38, 38)
126 |         out = self.relu(self.conv3_4(out)) # (N, 512, 38, 38)
127 |         out = self.bn3_4(out) # (N, 512, 38, 38)
128 |         out = self.relu(self.conv3_5(out)) # (N, 512, 38, 38)
129 |         out = self.bn3_5(out) # (N, 512, 38, 38)
130 |         out = self.relu(self.conv3_6(out)) # (N, 512, 38, 38)
131 |         out = self.bn3_6(out) # (N, 512, 38, 38)
132 |         out = self.relu(self.conv3_7(out)) # (N, 512, 38, 38)
133 |         out = self.bn3_7(out) # (N, 512, 38, 38)
134 |         out = self.relu(self.conv3_8(out)) # (N, 512, 19, 19)
135 |         out = self.bn3_8(out) # (N, 256, 19, 19)
136 |         conv3_8 = out 
137 |         
138 |         out = self.relu(self.conv4_1(out)) # (N, 1024, 19, 19)
139 |         out = self.bn4_1(out) # (N, 1024, 19, 19)
140 |         out = self.relu(self.conv4_2(out)) # (N, 1024, 19, 19)
141 |         out = self.bn4_2(out) # (N, 1024, 19, 19)
142 |         out = self.relu(self.conv4_3(out)) # (N, 1024, 19, 19)
143 |         out = self.bn4_3(out) # (N, 1024, 19, 19)
144 |         out = self.relu(self.conv4_4(out)) # (N, 1024, 19, 19)
145 |         out = self.bn4_4(out) # (N, 1024, 19, 19)
146 |         out = self.relu(self.conv4_5(out)) # (N, 1024, 19, 19)
147 |         out = self.bn4_5(out) # (N, 1024, 19, 19)
148 |         out = self.relu(self.conv4_6(out)) # (N, 1024, 19, 19)
149 |         out = self.bn4_6(out) # (N, 1024, 19, 19)
150 |         out = self.relu(self.conv4_7(out)) # (N, 1024, 19, 19)
151 |         out = self.bn4_7(out) # (N, 1024, 19, 19)
152 |         out = self.relu(self.conv4_8(out)) # (N, 1024, 19, 19)
153 |         out = self.bn4_8(out) # (N, 1024, 19, 19)
154 |         out = self.relu(self.conv4_9(out)) # (N, 1024, 19, 19)
155 |         out = self.bn4_9(out) # (N, 1024, 19, 19)
156 |         out = self.relu(self.conv4_10(out)) # (N, 1024, 19, 19)
157 |         out = self.bn4_10(out) # (N, 1024, 19, 19)
158 |         out = self.relu(self.conv4_11(out)) # (N, 1024, 19, 19)
159 |         out = self.bn4_11(out) # (N, 1024, 19, 19)    <-----
160 |         conv12_4 = out
161 | 
162 |         return conv3_8, conv12_4
163 | 


--------------------------------------------------------------------------------
/model/A2J/back_bone/resnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import torch.nn as nn
 10 | import torch.utils.model_zoo as model_zoo
 11 | 
 12 | PRETRAINED_MODELS = {
 13 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 14 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 15 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 16 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 17 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 18 | }
 19 | 
 20 | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
 21 |     """3x3 convolution with padding"""
 22 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation,
 23 |                      padding=dilation, bias=False)
 24 | 
 25 | 
 26 | def conv1x1(in_planes, out_planes, stride=1):
 27 |     """1x1 convolution"""
 28 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 29 | 
 30 | 
 31 | 
 32 | class BasicBlock(nn.Module):
 33 |     """
 34 |     Resnet Basic Residual Block
 35 |     """
 36 |     expansion = 1
 37 |     def __init__(self, input_channels, output_channels, stride=1, dilation=1, downsample=None):
 38 |         """
 39 |         Class constructor
 40 | 
 41 |         :param input_channels: number of input channels to the residual block
 42 |         :param output channels: number of putput channels of the residual block
 43 |         :param stride: stride of the first convolution in the residual block
 44 |         :param dilation: dilation of the second convolution in the residual block
 45 |         :param downsample: torch.nn function for down sampling the input x for concatenation in the residual layer
 46 |         """
 47 |         super(BasicBlock, self).__init__()
 48 |     
 49 |         self.conv1 = conv3x3(input_channels, output_channels, stride=stride)
 50 |         self.bn1 = nn.BatchNorm2d(output_channels)
 51 |         
 52 |         self.conv2 = conv3x3(output_channels, output_channels, dilation=dilation)
 53 |         self.bn2 = nn.BatchNorm2d(output_channels)
 54 |         
 55 |         self.downsample = downsample
 56 |         self.stride = stride
 57 | 
 58 |         # Actiation function
 59 |         self.relu = nn.LeakyReLU(inplace=True)
 60 | 
 61 |     
 62 |     def forward(self, x):
 63 |         identity = x
 64 | 
 65 |         out = self.conv1(x)
 66 |         out = self.bn1(out)
 67 |         out = self.relu(out)
 68 | 
 69 |         out = self.conv2(out)
 70 |         out = self.bn2(out)
 71 | 
 72 |         if self.downsample is not None:
 73 |             identity = self.downsample(x)
 74 |         
 75 |         out += identity
 76 |         out = self.relu(out)
 77 | 
 78 |         return out
 79 | 
 80 | class Bottleneck(nn.Module):
 81 |     """
 82 |     Resnet Bottleneck network
 83 |     """
 84 |     expansion = 4
 85 |     def __init__(self, input_channels, output_channels, stride=1, dilation=1, downsample=None):
 86 |         """
 87 |         Class constructor
 88 | 
 89 |         :param input_channels: number of input channels to the residual block
 90 |         :param output channels: number of putput channels of the residual block
 91 |         :param stride: stride of the second convolution in the residual block
 92 |         :param dilation: dilation of the second convolution in the residual block
 93 |         :param downsample: torch.nn function for down sampling the input x for concatenation in the residual layer
 94 |         """
 95 |         super(Bottleneck, self).__init__()
 96 | 
 97 |         self.conv1 = conv1x1(input_channels, output_channels)
 98 |         self.bn1 = nn.BatchNorm2d(output_channels)
 99 | 
100 |         self.conv2 = conv3x3(output_channels, output_channels, stride=stride, dilation=dilation)
101 |         self.bn2 = nn.BatchNorm2d(output_channels)
102 | 
103 |         self.conv3 = conv1x1(output_channels, output_channels*self.expansion)
104 |         self.bn3 = nn.BatchNorm2d(output_channels*self.expansion)
105 | 
106 |         self.downsample = downsample
107 |         self.stride = stride
108 | 
109 |         # Activation function
110 |         self.relu = nn.LeakyReLU(inplace=True)
111 | 
112 | 
113 |     
114 |     def forward(self, x):
115 |         identity = x
116 | 
117 |         out = self.conv1(x)
118 |         out = self.bn1(out)
119 |         out = self.relu(out)
120 | 
121 |         out = self.conv2(out)
122 |         out = self.bn2(out)
123 |         out = self.relu(out)
124 | 
125 |         out = self.conv3(out)
126 |         out = self.bn3(out)
127 | 
128 |         if self.downsample is not None:
129 |             identity = self.downsample(x)
130 | 
131 |         out += identity
132 |         out = self.relu(out)
133 | 
134 |         return out
135 | 
136 | 
137 | class ResNet(nn.Module):
138 |     """
139 |     ResNet Definition
140 | 
141 |     could create resnet (18, 34, 50, 101, 152) by setting the parameters
142 |     """
143 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
144 |         """
145 |         Class constructor
146 | 
147 |         :param block: type toch.nn, A residual block class instance (i.e. BasicBlock or Bottleneck)
148 |         :param layers: type list, A list holding the number of residual blocks in each ResNet layer
149 |         :param num_classes: if using a pretrained network make sure the number of classes are the same
150 |         :param zero_init_residual: Zero Initialiaze the last batchnorm in each residual layer for higher accuracy
151 |         """
152 |         super(ResNet, self).__init__()
153 |         
154 |         self.input_channels = 64
155 |         
156 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
157 |         self.bn1 = nn.BatchNorm2d(64)
158 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
159 | 
160 |         self.layer1 = self._make_resnet_layer(block, 64, layers[0])
161 |         self.layer2 = self._make_resnet_layer(block, 128, layers[1], stride=2)
162 |         self.layer3 = self._make_resnet_layer(block, 256, layers[2], stride=2)
163 |         self.layer4 = self._make_resnet_layer(block, 512, layers[3], stride=1, dilation=2)
164 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
165 |         self.fc = nn.Linear(512*block.expansion, num_classes)
166 | 
167 |         # Activation function
168 |         self.relu = nn.LeakyReLU(inplace=True)
169 | 
170 |         self._initialize()
171 |         if zero_init_residual:
172 |             self._zero_initialize()
173 |             
174 |     def _make_resnet_layer(self, block, output_channels, blocks, stride=1, dilation=1):
175 |         """
176 |         Method to create residual block layer in resnet
177 | 
178 |         :param block: type torch.nn, a residual block block class instance (i.e. BasicBlock or Bottleneck)
179 |         :param output_channels: type int, number of output channels of the residual block layer
180 |         :param blocks: type int, number of residual blocks in this layer
181 |         :param stride: type int
182 |         :param dilation: type int
183 |         """
184 |         downsample = None
185 |         
186 |         if (stride != 1) or (self.input_channels != output_channels*block.expansion):
187 |             downsample = nn.Sequential(
188 |                 conv1x1(self.input_channels, output_channels*block.expansion, stride=stride),
189 |                 nn.BatchNorm2d(output_channels*block.expansion),
190 |             )
191 |         
192 |         layers = list()
193 |         layers.append(block(self.input_channels, output_channels, stride=stride, downsample=downsample))
194 | 
195 |         self.input_channels = output_channels * block.expansion
196 |         
197 |         for _ in range(1, blocks):
198 |             layers.append(block(self.input_channels, output_channels, dilation=dilation))
199 |         
200 |         return nn.Sequential(*layers)
201 | 
202 |     def _initialize(self):
203 |         for m in self.modules():
204 |             if isinstance(m, nn.Conv2d):
205 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
206 |             elif isinstance(m, nn.BatchNorm2d):
207 |                 nn.init.constant_(m.weight, 1)
208 |                 nn.init.constant_(m.bias, 0)
209 |     
210 |     def _zero_initialize(self):
211 |         for m in self.modules():
212 |             for m in self.modules():
213 |                 if isinstance(m, Bottleneck):
214 |                     nn.init.constant_(m.bn3.weight, 0)
215 |                 elif isinstance(m, BasicBlock):
216 |                     nn.init.constant_(m.bn2.weight, 0)
217 | 
218 |     def forward(self, x):
219 |         x = self.conv1(x)
220 |         x = self.bn1(x)
221 |         x = self.relu(x)
222 |         x = self.maxpool(x)
223 | 
224 |         x = self.layer1(x)
225 |         x = self.layer2(x)
226 |         x = self.layer3(x)
227 |         x = self.layer4(x)
228 | 
229 |         x = self.avg_pool(x)
230 |         x = x.view(x.size(0), -1)
231 |         x = self.fc(x)
232 | 
233 |         return x
234 | 
235 | 
236 | def get_ResNet(resnet_model="resnet18", pretrained=False):
237 |     
238 |     resnet_setups = {
239 |         "resnet18": {"block": BasicBlock, "layers": [2, 2, 2, 2]},
240 |         "resnet34": {"block": BasicBlock, "layers": [3, 4, 6, 3]},
241 |         "resnet50": {"block": Bottleneck, "layers": [3, 4, 6, 3]},
242 |         "resnet101": {"block": Bottleneck, "layers": [3, 4, 23, 3]},
243 |         "resnet152": {"block": Bottleneck, "layers": [3, 8, 36, 3]},
244 |     }
245 |     model = ResNet(resnet_setups[resnet_model]["block"], resnet_setups[resnet_model]["layers"])
246 |     if pretrained:
247 |         model.load_state_dict(model_zoo.load_url(PRETRAINED_MODELS[resnet_model]))
248 |     
249 |     return model
250 | 
251 | class ResnetBackbone(nn.Module):
252 |     """
253 |     The Resnet Backbone module
254 |     """
255 |     def __init__(self, name="resnet18", pretrained=True):
256 |         """
257 |         Class constructor
258 | 
259 |         :param name: name of the resnet model to load
260 |         :param pretrained: weather or not to load the weight of a pretrained model on ImageNet
261 |         """
262 |         super(ResnetBackbone, self).__init__()
263 |         self.model = get_ResNet(resnet_model=name, pretrained=pretrained)
264 |     
265 |     def forward(self, x):
266 |         n, c, h, w = x.size()
267 | 
268 |         x = x[:,0:1,:,:] # depth
269 |         x = x.expand(n, 3, h, w)
270 | 
271 |         out = self.model.conv1(x)
272 |         out = self.model.bn1(out)
273 |         out = self.model.relu(out)
274 |         out = self.model.maxpool(out)
275 | 
276 |         out1 = self.model.layer1(out)
277 |         out2 = self.model.layer2(out1)
278 |         out3 = self.model.layer3(out2)
279 |         out4 = self.model.layer4(out3)
280 | 
281 |         return out3, out4
282 | 


--------------------------------------------------------------------------------
/model/A2J/model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import torch.nn as nn
 10 | from torch.nn import init
 11 | import torch.utils.model_zoo as model_zoo
 12 | 
 13 | 
 14 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 15 |            'resnet152']
 16 | 
 17 | 
 18 | model_urls = {
 19 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 20 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 21 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 22 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 23 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 24 | }
 25 | 
 26 | 
 27 | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
 28 |     """3x3 convolution with padding"""
 29 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation,
 30 |                      padding=dilation, bias=False)
 31 | 
 32 | 
 33 | def conv1x1(in_planes, out_planes, stride=1):
 34 |     """1x1 convolution"""
 35 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 36 | 
 37 | 
 38 | class BasicBlock(nn.Module):
 39 |     expansion = 1
 40 | 
 41 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
 42 |         super(BasicBlock, self).__init__()
 43 |         self.conv1 = conv3x3(inplanes, planes, stride)
 44 |         self.bn1 = nn.BatchNorm2d(planes)
 45 |         self.relu = nn.ReLU(inplace=True)
 46 |         self.conv2 = conv3x3(planes, planes, dilation=dilation)
 47 |         self.bn2 = nn.BatchNorm2d(planes)
 48 |         self.downsample = downsample
 49 |         self.stride = stride
 50 | 
 51 |     def forward(self, x):
 52 |         identity = x
 53 | 
 54 |         out = self.conv1(x)
 55 |         out = self.bn1(out)
 56 |         out = self.relu(out)
 57 | 
 58 |         out = self.conv2(out)
 59 |         out = self.bn2(out)
 60 | 
 61 |         if self.downsample is not None:
 62 |             identity = self.downsample(x)
 63 | 
 64 |         out += identity
 65 |         out = self.relu(out)
 66 | 
 67 |         return out
 68 | 
 69 | 
 70 | class Bottleneck(nn.Module):
 71 |     expansion = 4
 72 | 
 73 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
 74 |         super(Bottleneck, self).__init__()
 75 |         self.conv1 = conv1x1(inplanes, planes)
 76 |         self.bn1 = nn.BatchNorm2d(planes)
 77 |         self.conv2 = conv3x3(planes, planes, stride, dilation=dilation)
 78 |         self.bn2 = nn.BatchNorm2d(planes)
 79 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 80 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 81 |         self.relu = nn.ReLU(inplace=True)
 82 |         self.downsample = downsample
 83 |         self.stride = stride
 84 | 
 85 |     def forward(self, x):
 86 |         identity = x
 87 | 
 88 |         out = self.conv1(x)
 89 |         out = self.bn1(out)
 90 |         out = self.relu(out)
 91 | 
 92 |         out = self.conv2(out)
 93 |         out = self.bn2(out)
 94 |         out = self.relu(out)
 95 | 
 96 |         out = self.conv3(out)
 97 |         out = self.bn3(out)
 98 | 
 99 |         if self.downsample is not None:
100 |             identity = self.downsample(x)
101 | 
102 |         out += identity
103 |         out = self.relu(out)
104 | 
105 |         return out
106 | 
107 | 
108 | class ResNet(nn.Module):
109 | 
110 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
111 |         super(ResNet, self).__init__()
112 |         self.inplanes = 64
113 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
114 |                                bias=False)
115 |         self.bn1 = nn.BatchNorm2d(64)
116 |         self.relu = nn.ReLU(inplace=True)
117 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
118 |         self.layer1 = self._make_layer(block, 64, layers[0])
119 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
120 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
121 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1,dilation=2)
122 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
123 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
124 | 
125 |         for m in self.modules():
126 |             if isinstance(m, nn.Conv2d):
127 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
128 |             elif isinstance(m, nn.BatchNorm2d):
129 |                 nn.init.constant_(m.weight, 1)
130 |                 nn.init.constant_(m.bias, 0)
131 | 
132 |         # Zero-initialize the last BN in each residual branch,
133 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
134 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
135 |         if zero_init_residual:
136 |             for m in self.modules():
137 |                 if isinstance(m, Bottleneck):
138 |                     nn.init.constant_(m.bn3.weight, 0)
139 |                 elif isinstance(m, BasicBlock):
140 |                     nn.init.constant_(m.bn2.weight, 0)
141 | 
142 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
143 |         downsample = None
144 |         if stride != 1 or self.inplanes != planes * block.expansion:
145 |             downsample = nn.Sequential(
146 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
147 |                 nn.BatchNorm2d(planes * block.expansion),
148 |             )
149 | 
150 |         layers = []
151 |         layers.append(block(self.inplanes, planes, stride, downsample))
152 |         self.inplanes = planes * block.expansion
153 |         for _ in range(1, blocks):
154 |             layers.append(block(self.inplanes, planes, dilation=dilation))
155 | 
156 |         return nn.Sequential(*layers)
157 | 
158 |     def forward(self, x):
159 |         x = self.conv1(x)
160 |         x = self.bn1(x)
161 |         x = self.relu(x)
162 |         x = self.maxpool(x)
163 | 
164 |         x = self.layer1(x)
165 |         x = self.layer2(x)
166 |         x = self.layer3(x)
167 |         x = self.layer4(x)
168 | 
169 |         x = self.avgpool(x)
170 |         x = x.view(x.size(0), -1)
171 |         x = self.fc(x)
172 | 
173 |         return x
174 | 
175 | 
176 | def resnet18(pretrained=False, **kwargs):
177 |     """Constructs a ResNet-18 model.
178 |     Args:
179 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
180 |     """
181 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
182 |     if pretrained:
183 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
184 |     return model
185 | 
186 | 
187 | def resnet34(pretrained=False, **kwargs):
188 |     """Constructs a ResNet-34 model.
189 |     Args:
190 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
191 |     """
192 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
193 |     if pretrained:
194 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
195 |     return model
196 | 
197 | 
198 | def resnet50(pretrained=False, **kwargs):
199 |     """Constructs a ResNet-50 model.
200 |     Args:
201 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
202 |     """
203 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
204 |     if pretrained:
205 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
206 |     return model
207 | 
208 | 
209 | def resnet101(pretrained=False, **kwargs):
210 |     """Constructs a ResNet-101 model.
211 |     Args:
212 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
213 |     """
214 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
215 |     if pretrained:
216 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
217 |     return model
218 | 
219 | 
220 | def resnet152(pretrained=False, **kwargs):
221 |     """Constructs a ResNet-152 model.
222 |     Args:
223 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
224 |     """
225 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
226 |     if pretrained:
227 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
228 |     return model
229 | 
230 | class DepthRegressionModel(nn.Module):
231 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256):
232 |         super(DepthRegressionModel, self).__init__()
233 |         self.num_classes = num_classes
234 |         self.num_anchors = num_anchors
235 |         
236 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
237 |         self.bn1 = nn.BatchNorm2d(feature_size)
238 |         self.act1 = nn.ReLU()
239 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
240 |         self.bn2 = nn.BatchNorm2d(feature_size)
241 |         self.act2 = nn.ReLU()
242 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
243 |         self.bn3 = nn.BatchNorm2d(feature_size)
244 |         self.act3 = nn.ReLU()
245 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
246 |         self.bn4 = nn.BatchNorm2d(feature_size)
247 |         self.act4 = nn.ReLU()
248 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
249 |         for m in self.modules():
250 |             if isinstance(m, nn.Conv2d):
251 |                 nn.init.xavier_normal_(m.weight.data)
252 |             elif isinstance(m, nn.BatchNorm2d):
253 |                 m.weight.data.fill_(1)
254 |                 m.bias.data.zero_()
255 | 
256 |     def forward(self, x):
257 |         out = self.conv1(x)
258 |         out = self.bn1(out)
259 |         out = self.act1(out)
260 |         out = self.conv2(out)
261 |         out = self.bn2(out)
262 |         out = self.act2(out)
263 |         out = self.conv3(out)
264 |         out = self.bn3(out)
265 |         out = self.act3(out)
266 |         out = self.conv4(out)
267 |         out = self.bn4(out)
268 |         out = self.act4(out)
269 |         out = self.output(out)
270 | 
271 |         # out is B x C x W x H, with C = 3*num_anchors
272 |         out1 = out.permute(0, 3, 2, 1)
273 |         batch_size, width, height, channels = out1.shape
274 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
275 |         return out2.contiguous().view(out2.shape[0], -1, self.num_classes)
276 | 
277 | class RegressionModel(nn.Module):
278 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256):
279 |         super(RegressionModel, self).__init__()
280 |         self.num_anchors = num_anchors
281 |         self.num_classes = num_classes
282 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
283 |         self.bn1 = nn.BatchNorm2d(feature_size)
284 |         self.act1 = nn.ReLU()
285 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
286 |         self.bn2 = nn.BatchNorm2d(feature_size)
287 |         self.act2 = nn.ReLU()
288 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
289 |         self.bn3 = nn.BatchNorm2d(feature_size)
290 |         self.act3 = nn.ReLU()
291 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
292 |         self.bn4 = nn.BatchNorm2d(feature_size)
293 |         self.act4 = nn.ReLU()
294 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes*2, kernel_size=3, padding=1)
295 |         for m in self.modules():
296 |             if isinstance(m, nn.Conv2d):
297 |                 nn.init.xavier_normal_(m.weight.data)
298 |             elif isinstance(m, nn.BatchNorm2d):
299 |                 m.weight.data.fill_(1)
300 |                 m.bias.data.zero_()
301 | 
302 |     def forward(self, x):
303 |         out = self.conv1(x)
304 |         out = self.bn1(out)
305 |         out = self.act1(out)
306 |         out = self.conv2(out)
307 |         out = self.bn2(out)
308 |         out = self.act2(out)
309 |         out = self.conv3(out)
310 |         out = self.bn3(out)
311 |         out = self.act3(out)
312 |         out = self.conv4(out)
313 |         out = self.bn4(out)
314 |         out = self.act4(out)
315 |         out = self.output(out)
316 | 
317 |         # out is B x C x W x H, with C = 3*num_anchors
318 |         out1 = out.permute(0, 3, 2, 1)
319 |         batch_size, width, height, channels = out1.shape
320 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes, 2)
321 |         return out2.contiguous().view(out2.shape[0], -1, self.num_classes, 2)
322 | 
323 | class ClassificationModel(nn.Module):
324 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, prior=0.01, feature_size=256):
325 |         super(ClassificationModel, self).__init__()
326 |         self.num_classes = num_classes
327 |         self.num_anchors = num_anchors
328 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
329 |         self.bn1 = nn.BatchNorm2d(feature_size)
330 |         self.act1 = nn.ReLU()
331 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
332 |         self.bn2 = nn.BatchNorm2d(feature_size)
333 |         self.act2 = nn.ReLU()
334 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
335 |         self.bn3 = nn.BatchNorm2d(feature_size)
336 |         self.act3 = nn.ReLU()
337 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
338 |         self.bn4 = nn.BatchNorm2d(feature_size)
339 |         self.act4 = nn.ReLU()
340 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
341 |         for m in self.modules():
342 |             if isinstance(m, nn.Conv2d):
343 |                 nn.init.xavier_normal_(m.weight.data)
344 |             elif isinstance(m, nn.BatchNorm2d):
345 |                 m.weight.data.fill_(1)
346 |                 m.bias.data.zero_()
347 |                 
348 |     def forward(self, x):
349 |         out = self.conv1(x)
350 |         out = self.bn1(out)
351 |         out = self.act1(out)
352 |         out = self.conv2(out)
353 |         out = self.bn2(out)
354 |         out = self.act2(out)
355 |         out = self.conv3(out)
356 |         out = self.bn3(out)
357 |         out = self.act3(out)
358 |         out = self.conv4(out)
359 |         out = self.bn4(out)
360 |         out = self.act4(out)
361 |         out = self.output(out)
362 |     
363 |         # out is B x C x W x H, with C = n_classes + n_anchors
364 |         out1 = out.permute(0, 3, 2, 1)
365 |         batch_size, width, height, channels = out1.shape
366 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
367 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
368 | 
369 | 
370 | class ResNetBackBone(nn.Module):
371 |     def __init__(self):
372 |         super(ResNetBackBone, self).__init__()
373 |         
374 |         modelPreTrain50 = resnet50(pretrained=True)
375 |         self.model = modelPreTrain50
376 |         
377 |     def forward(self, x): 
378 |         n, c, h, w = x.size()  # x: [B, 1, H ,W]
379 |         
380 |         x = x[:,0:1,:,:]  # depth
381 |         x = x.expand(n,3,h,w)
382 |               
383 |         x = self.model.conv1(x)
384 |         x = self.model.bn1(x)
385 |         x = self.model.relu(x)
386 |         x = self.model.maxpool(x)
387 |         x1 = self.model.layer1(x)
388 |         x2 = self.model.layer2(x1)
389 |         x3 = self.model.layer3(x2)
390 |         x4 = self.model.layer4(x3)
391 |         
392 |         return x3,x4  
393 | 
394 | class A2J_model(nn.Module):
395 |     def __init__(self, num_classes, is_3D=True):
396 |         super(A2J_model, self).__init__()
397 |         self.is_3D = is_3D 
398 |         self.Backbone = ResNetBackBone() # 1 channel depth only, resnet50 
399 |         self.regressionModel = RegressionModel(2048, num_classes=num_classes)
400 |         self.classificationModel = ClassificationModel(1024, num_classes=num_classes)
401 |         if is_3D:
402 |             self.DepthRegressionModel = DepthRegressionModel(2048, num_classes=num_classes)        
403 |     
404 |     def forward(self, x): 
405 |         x3,x4 = self.Backbone(x)
406 |         classification  = self.classificationModel(x3)
407 |         regression = self.regressionModel(x4)
408 |         if self.is_3D:
409 |             DepthRegressionModel  = self.DepthRegressionModel(x4)
410 |             return (classification, regression, DepthRegressionModel)
411 |         return (classification, regression)
412 | 


--------------------------------------------------------------------------------
/model/CenterNet/centernet.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2019 Boshen Zhang
 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 8 | '''
 9 | import os
10 | import sys
11 | import torch
12 | import torchvision
13 | import torch.nn as nn
14 | 
15 | # Adding Project Path
16 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) # CenterNet
17 | MODEL_PATH = os.path.join(DIR_PATH, os.path.pardir) # Model
18 | ROOT_PATH = os.path.join(MODEL_PATH, os.path.pardir) # root
19 | 
20 | sys.path.append(ROOT_PATH)
21 | 
22 | # Importing Project Libraries
23 | import pipeline.constants as const
24 | 
25 | class CRBUp(nn.Module):
26 |     """
27 |     Convolution Residual Block Upsampling Class
28 |     """
29 |     def __init__(self, in_channels: int, out_channels: int):
30 |         super(CRBUp, self).__init__()
31 |         self.layers = nn.Sequential(
32 |             nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
33 |             nn.BatchNorm2d(out_channels),
34 |             nn.LeakyReLU()
35 |         )
36 | 
37 |     def forward(self, x):
38 |         return self.layers(x)
39 | 
40 | 
41 | class Resnet18FeatureExtractor(nn.Module):
42 | 
43 |     def __init__(self, num_classes=const.CENTERNET_NUM_CLASSES, pretrained=True):
44 |         super(Resnet18FeatureExtractor, self).__init__()
45 |         self.num_classes = num_classes
46 |         self.out_channels = 4 + num_classes
47 |         self.model = torchvision.models.resnet18(pretrained=pretrained)
48 |         
49 |         self.up_sample1 = CRBUp(512, 256)
50 |         self.up_sample2 = CRBUp(512, 128)
51 |         self.up_sample3 = CRBUp(256, 64)
52 |         self.up_sample4 = CRBUp(128, self.out_channels)
53 | 
54 |         self.sigmoid = nn.Sigmoid()
55 | 
56 | 
57 |     def forward(self, x):
58 |         x = self.model.conv1(x)
59 |         x = self.model.bn1(x)
60 |         x = self.model.relu(x)
61 |         x = self.model.maxpool(x)
62 | 
63 |         x1 = self.model.layer1(x)
64 |         x2 = self.model.layer2(x1)
65 |         x3 = self.model.layer3(x2)
66 |         x4 = self.model.layer4(x3)
67 | 
68 |         # Upsampling 
69 |         out = self.up_sample1(x4)
70 |         out = self.up_sample2(torch.cat([x3, out], 1))
71 |         out = self.up_sample3(torch.cat([x2, out], 1))
72 |         out = self.up_sample4(torch.cat([x1, out], 1))
73 | 
74 |         out = torch.cat([self.sigmoid(out[:,0:self.num_classes]), out[:, self.num_classes:]], dim=1)
75 |         
76 |         return out
77 | 


--------------------------------------------------------------------------------
/model/run_model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import os
 10 | import sys
 11 | import torch
 12 | import numpy as np
 13 | 
 14 | # PROJ ROOT DIR
 15 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
 16 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir)
 17 | sys.path.append(ROOT_PATH)
 18 | 
 19 | # Importing Project Library
 20 | from pipeline.model_setup import ModelSetup
 21 | from pipeline.utils import find_prediction_mask, get_bboxes, find_jaccard_overlap
 22 | 
 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 24 | 
 25 | def run_ssd(model_setup:ModelSetup, image: torch.tensor, trt_optim=False):
 26 |     """
 27 |     Perform inference on the image and return the boundiong boxes along with the images.
 28 | 
 29 |     :param model_setup: ModelSetup class instance (holds all teh informatioj about a model)
 30 |     :param image: a set of images (N, 1, 300, 300)
 31 |     :return: pred_boxes, pred_labels, pred_scores
 32 |     """
 33 |     model_setup.bb_model.eval()
 34 | 
 35 |     model_setup.bb_model.to(DEVICE)
 36 |     image = image.to(DEVICE)
 37 |     with torch.no_grad():
 38 |         pred_locs, pred_scores = model_setup.bb_model(image)
 39 |         pred_boxes, pred_labels, pred_scores = model_setup.priors.detect_objects(pred_locs, pred_scores)
 40 | 
 41 |     return pred_boxes[0].to("cpu"), pred_labels, pred_scores
 42 | 
 43 | def run_centernet(model_setup: ModelSetup, image: torch.tensor, trt_optim=False):
 44 |     """
 45 |     Run either training or validation on the model
 46 | 
 47 |     :param model_setup: ModelSetup, model setup state
 48 |     :param train: bool, run training or validation
 49 |     """
 50 |     model_setup.bb_model.to(DEVICE)
 51 |     model_setup.bb_model.eval()
 52 |     
 53 |     image = image.to(DEVICE)
 54 |     with torch.no_grad():
 55 |         if trt_optim:
 56 |             preds = model_setup.bb_model(image)
 57 |         else:
 58 |             preds = model_setup.bb_model(image)
 59 | 
 60 |     prediction = preds
 61 | 
 62 |     pred_heatmap = prediction[0][0:model_setup.centernet_num_classes].max(0)[0].float()
 63 |     pred_mask = find_prediction_mask(pred_heatmap)[0][0]
 64 |     pred_yx_locations = torch.nonzero(pred_mask)
 65 | 
 66 |     pred_height = prediction[0][-4][pred_mask]
 67 |     pred_width = prediction[0][-3][pred_mask]
 68 | 
 69 |     pred_offset_y = prediction[0][-2][pred_mask]
 70 |     pred_offset_x = prediction[0][-1][pred_mask]
 71 | 
 72 |     pred_bboxes = get_bboxes(pred_yx_locations, pred_height, pred_width, pred_offset_x, pred_offset_y)
 73 | 
 74 |     if pred_bboxes:
 75 |         pred_bboxes = torch.FloatTensor(pred_bboxes)
 76 |         # Do Non-Max suppression on the nearby boxes
 77 |         tmp_boxes = pred_bboxes.clone()
 78 |         tmp_boxes[:,2:4] += tmp_boxes[:,0:2]
 79 | 
 80 |         # Tensor of zeros for all valid boxes
 81 |         suppress = torch.zeros((tmp_boxes.size(0)), dtype=torch.uint8).to(DEVICE)
 82 |         # Over lap score [0-1]
 83 |         overlap = find_jaccard_overlap(tmp_boxes, tmp_boxes)
 84 |         for box in range(tmp_boxes.size(0)):
 85 |             if suppress[box] == 1:
 86 |                 continue
 87 |             suppress = torch.max(suppress, torch.as_tensor(overlap[box] > 0.3, dtype=torch.uint8).to(DEVICE))
 88 |             suppress[box] = 0
 89 | 
 90 |         # Get the list of the valid boxes
 91 |         pred_bboxes_list = []
 92 |         for i, elem in enumerate(suppress):
 93 |             if elem.item() == 0:
 94 |                 pred_bboxes_list.append(pred_bboxes[i].tolist())
 95 |         pred_bboxes = torch.FloatTensor(pred_bboxes_list)
 96 |     else:
 97 |         pred_bboxes = None
 98 |         
 99 |     return pred_bboxes, None, None
100 | 
101 | def run_a2j(model_setup:ModelSetup, image):
102 |     """
103 |     Perform inference on the image and return the boundiong boxes along with the images.
104 | 
105 |     :param model_setup: ModelSetup class instance (holds all teh informatioj about a model)
106 |     :param image: a set of images (N, 1, 144, 160)
107 |     :return: pred_boxes, pred_labels, pred_scores
108 |     """
109 |     model_setup.a2j_model.eval()
110 |     model_setup.a2j_model.to(DEVICE)
111 |     model_setup.post_process.to(DEVICE)
112 |     image = image.to(DEVICE)
113 | 
114 |     with torch.no_grad():
115 |         joint_classification, offset_regression, depth_regression = model_setup.a2j_model(image.type(torch.float32))
116 |         pred_points = model_setup.post_process(joint_classification, offset_regression, depth_regression)
117 | 
118 |     return pred_points
119 | 


--------------------------------------------------------------------------------
/pipeline/azure_kinect.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | from pyk4a import PyK4A
 10 | 
 11 | import os
 12 | import sys
 13 | import time
 14 | import argparse
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.patches as patches
 17 | 
 18 | # PROJ ROOT DIR
 19 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
 20 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir)
 21 | sys.path.append(ROOT_PATH)
 22 | 
 23 | # PROJ LIBRARY
 24 | import pipeline.constants as const
 25 | from pipeline.utils import *
 26 | from pipeline.model_setup import ModelSetup
 27 | from model.run_model import run_centernet, run_ssd, run_a2j
 28 | 
 29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 30 | 
 31 | def parse_arguments():
 32 |     """
 33 |     Argument parser function for main.py
 34 |     """
 35 |     parser = argparse.ArgumentParser()
 36 | 
 37 |     parser.add_argument('-t', '--trt',
 38 |                         type=bool,
 39 |                         default=False,
 40 |                         help="Set to True for trt optimization")
 41 | 
 42 |     args = parser.parse_args()
 43 |     return args
 44 | 
 45 | def run_camera_inferance(k4a, model_setup: ModelSetup, iterations=100, show_heatmap=False, trt_optim=False):
 46 |     """
 47 |     Run the model for N number of frames
 48 | 
 49 |     :param model_setup: ModelSetup
 50 |     :param iterations: the total number of frames to run the model
 51 |     :param show_heatmap: set to visualize prediction heat map and mask
 52 |     """
 53 |     fig = plt.figure(figsize=(6, 8))
 54 |     fig.suptitle(f"{const.NUM_JOINTS} Joints", fontsize=16)
 55 |     ax_1 = fig.add_subplot(2,1,1)
 56 |     ax_2 = fig.add_subplot(2, 1, 2, projection='3d')
 57 | 
 58 |     bb_summary = Summary()
 59 |     a2j_summary = Summary()
 60 | 
 61 |     for i in range(1000):
 62 |         capture = k4a.get_capture()
 63 |         ir_img = capture.ir
 64 |         depth_img = capture.depth
 65 | 
 66 |         w, h = ir_img.shape[1], ir_img.shape[0] # Image (width, height)
 67 |         transformed_image = centernet_img_transform(ir_image=ir_img, depth_image=depth_img) # Image transfered to (1, 1, 300, 300) float tensor
 68 | 
 69 |         start_time = time.time()
 70 |         pred_boxes, _, _ = run_centernet(model_setup, transformed_image) # Perform Inference
 71 |         end_time = time.time()
 72 |         bb_summary.update(end_time-start_time)
 73 | 
 74 |         pred_joints_collections = []
 75 |         median_depths = []
 76 | 
 77 |         if pred_boxes != None:
 78 |             # Normalizing the pred boxes to original dimentions
 79 |             original_dims = torch.FloatTensor([w, h, w, h]).unsqueeze(0)
 80 |             pred_boxes[:,2:4] += pred_boxes[:,0:2]
 81 |             pred_boxes /= 320
 82 | 
 83 |             pred_boxes *= original_dims
 84 | 
 85 |             bboxs = [] # list of (x0, y0, x1, y1)
 86 |             for i in range(pred_boxes.size(0)):
 87 |                 box_locs = pred_boxes[i].tolist()
 88 |                 x, y = box_locs[0], box_locs[1]
 89 |                 width, height = abs(box_locs[0] - box_locs[2]), abs(box_locs[1] - box_locs[3])
 90 |                 rect = patches.Rectangle((x,y),width,height,linewidth=1,edgecolor='g',facecolor='none')
 91 |                 ax_1.add_patch(rect)
 92 | 
 93 |                 bboxs.append([
 94 |                     int(box_locs[0]),
 95 |                     int(box_locs[1]),
 96 |                     int(box_locs[2]),
 97 |                     int(box_locs[3])
 98 |                 ])        
 99 | 
100 |             for bbox in bboxs:
101 |                 t_depth_image, median_depth = a2j_depth_image_transform(depth_img, bbox)
102 |                 # import pdb; pdb.set_trace()
103 |                 start_time = time.time()
104 |                 pred_points = run_a2j(model_setup, t_depth_image)
105 |                 end_time = time.time()
106 |                 a2j_summary.update(end_time-start_time)
107 | 
108 |                 pred_joints_collections.append(pred_points[0])
109 |                 median_depths.append(median_depth)
110 | 
111 |             normalized_joints = back_to_normal(pred_joints_collections, bboxs, median_depths)
112 |             scats = vizualize_frams(ax_2, normalized_joints)
113 |                         
114 |         ir_img[ir_img > 3000] = ir_img.mean()
115 |         ax_1.imshow(ir_img, interpolation='nearest', cmap ='gray')
116 | 
117 |         plt.draw()
118 |         plt.pause(0.001)
119 | 
120 |         ax_1.clear()
121 |         if pred_boxes != None:
122 |             [scat.remove() for scat in scats]
123 |         ax_2.clear()
124 |         
125 |         print(f"BB Infrence time: {bb_summary.avg:1.4f}	"\
126 |                 f"A2J Infrence time: {a2j_summary.avg:1.4f}	"\
127 |                 f"Total Infrence time: {a2j_summary.avg + bb_summary.avg:1.4f}")
128 | 
129 |         print(f"BB Infrence time FPS: {1/bb_summary.avg:1.0f}	"\
130 |                 f"A2J Infrence time FPS: {1/a2j_summary.avg:1.0f}	"\
131 |                 f"Total Infrence time FPS: {1/(a2j_summary.avg + bb_summary.avg):1.0f}")
132 | 
133 | 
134 | def main():
135 |     # Load camera with default config
136 |     k4a = PyK4A()
137 |     k4a.start()
138 |     
139 |     args = parse_arguments()
140 |     bbox_path, a2j_path = get_model()
141 |     
142 |     model_setup = ModelSetup(BBOX_MODEL_PATH=bbox_path, A2J_model_path=a2j_path, trt_optim=args.trt)
143 |     
144 |     run_camera_inferance(k4a, model_setup)
145 | 
146 | main()
147 | 


--------------------------------------------------------------------------------
/pipeline/constants.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2019 Boshen Zhang
 3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 8 | '''
 9 | import os
10 | 
11 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
12 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir)
13 | 
14 | CENTERNET_MODEL_PATH = os.path.join(ROOT_PATH, "checkpoint/CenterNet")
15 | A2J_MODEL_PATH = os.path.join(ROOT_PATH, "checkpoint/A2J")
16 | #############################################
17 | ############# CenterNet SETUP ###############
18 | #############################################
19 | CENTERNET_MODEL_NAME = "ResnetCenterNet"
20 | 
21 | # Setup the data to be used for training (Depth images/ fusion of IR and Depth images)
22 | CENTERNET_DATA_LOADER_SWITCHER = {
23 |     "depth": False,
24 |     "fused": True,
25 | }
26 | CENTERNET_DATA_LOADER = [[elem[0] for elem in CENTERNET_DATA_LOADER_SWITCHER.items() if elem[1]][0]] [0]
27 | 
28 | # Setup the heatmap loss MSE/Logistic loss
29 | CENTERNET_LOSS_SWITHCER = {
30 |     "MSE": False,
31 |     "Logistic": True,
32 | }
33 | CENTERNET_LOSS = [[elem[0] for elem in CENTERNET_LOSS_SWITHCER.items() if elem[1]][0]] [0]
34 | 
35 | CENTERNET_IMG_SHAPE = (320, 320)
36 | 
37 | CENTERNET_NUM_CLASSES = 1
38 | CENTERNET_STRIDE = 2
39 | 
40 | THRESHOLD_ACC = 0.3
41 | 
42 | INPUT_IMG_SIZE = (320, 320)
43 | 
44 | #############################################
45 | ################# A2J SETUP #################
46 | #############################################
47 | DATASET = "NYU" # "Personal", "NYU"
48 | 
49 | DATA_SEGMENT = "1" # ALL, 1
50 | # List of availiblke backbones set the one you wantto use to true and all else to false
51 | A2J_BACKBONE_NAME = {
52 |     "resnet18": False,
53 |     "resnet34": False,
54 |     "resnet50": True,
55 |     "resnet101": False,
56 |     "resnet152": False,
57 |     "mobilenet": False,
58 | }
59 | 
60 | A2J_TARGET_SIZE = (176, 176)
61 | DEPTH_THRESHOLD = 180
62 | A2J_STRIDE = 16
63 | NUM_JOINTS = 16 # 14, 16, 36, 21
64 | 


--------------------------------------------------------------------------------
/pipeline/model_setup.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import os
 10 | import torch
 11 | 
 12 | # PROJ LIBRARY
 13 | import pipeline.constants as const
 14 | 
 15 | from model.CenterNet.centernet import Resnet18FeatureExtractor
 16 | 
 17 | from model.A2J.a2j import A2J
 18 | from model.A2J.model import A2J_model
 19 | from model.A2J.a2j_utilities.post_processing import PostProcess
 20 | 
 21 | 
 22 | class ModelSetup(object):
 23 |     """
 24 |     Class to setup Both SSD and A2J model
 25 |     """
 26 |     def __init__(self, BBOX_MODEL_PATH:str, A2J_model_path=const.A2J_MODEL_PATH, trt_optim=False):
 27 |         """
 28 |         
 29 |         :param SSD_model_path: string, full path to ssd Model checkpoint
 30 |         :param A2J_model_path: string, full path to A2J Model checkpoint
 31 |         """
 32 |         self.bb_model_path = BBOX_MODEL_PATH
 33 |         self.a2j_path = A2J_model_path
 34 |     
 35 |         print("Loading CenterNet ...")
 36 |         centernet_check_point = torch.load(self.bb_model_path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
 37 |         self.centernet_model_name = centernet_check_point["model_name"]
 38 |         self.centernet_num_classes = centernet_check_point['num_classes']
 39 |         self.bb_model = Resnet18FeatureExtractor(num_classes=self.centernet_num_classes)
 40 |         self.bb_model.load_state_dict(centernet_check_point["model"])
 41 |         print("CenterNet Loading Finished! 
 42 | 
 43 | ")
 44 | 
 45 |         if trt_optim:
 46 |             import tensorrt as trt
 47 |             from torch2trt import torch2trt, TRTModule
 48 |             trt_model_path = self.bb_model_path.split(".")[0] + ".trt"
 49 |             if not os.path.exists(trt_model_path):
 50 |                 print("Creating TRT Bounding Box Model...")
 51 |                 i
 52 |                 x = torch.ones((1, 3, const.INPUT_IMG_SIZE[0], const.INPUT_IMG_SIZE[1])).cuda()
 53 | 
 54 |                 self.bb_model = torch2trt(self.bb_model.eval().cuda(), [x], fp16_mode=True)
 55 |                 torch.save(self.bb_model.state_dict(), trt_model_path)
 56 |                 print(f"TRT Bounding Box Model saved at:
 57 |  {trt_model_path}
 58 | ")
 59 |     
 60 |             print("Loading TRT Bounding Box Model...")
 61 |             del self.bb_model
 62 | 
 63 |             self.bb_model = TRTModule()
 64 |             self.bb_model.load_state_dict(torch.load(trt_model_path))  
 65 |             print("TRT Bounding Box Model loaded!
 66 | ")
 67 | 
 68 |         # Load A2J model
 69 |         print("Loading A2J ...")
 70 |         backbone_name = [elem[0] for idx, elem in enumerate(const.A2J_BACKBONE_NAME.items()) if elem[1]][0]
 71 |         a2j_check_point = torch.load(self.a2j_path, map_location=torch.device("cpu"))
 72 | 
 73 |         self.num_class = a2j_check_point["num_classes"]
 74 |         # self.a2j_model = A2J_model(num_classes=self.num_class)
 75 |         self.a2j_model = A2J(num_joints=self.num_class, backbone_name=backbone_name, backbone_pretrained=True)
 76 |         self.a2j_model.load_state_dict(a2j_check_point["model"])
 77 |         self.post_process =  PostProcess(shape=(const.A2J_TARGET_SIZE[1]//16, const.A2J_TARGET_SIZE[0]//16),\
 78 |                                             stride=const.A2J_STRIDE)
 79 | 
 80 |         if trt_optim:
 81 |             from torch2trt import torch2trt, TRTModule
 82 |             trt_a2j_model_path = self.a2j_path.split(".")[0] + ".trt"
 83 |             if not os.path.exists(trt_a2j_model_path):
 84 |                 print("Creating TRT A2J Model...")
 85 |                 x = torch.empty((1, 1, const.A2J_TARGET_SIZE[0], const.A2J_TARGET_SIZE[1])).cuda().float()
 86 | 
 87 |                 self.a2j_model = torch2trt(self.a2j_model.eval().cuda(), [x], fp16_mode=True)
 88 |                 torch.save(self.a2j_model.state_dict(), trt_a2j_model_path)
 89 |                 print(f"TRT A2J Model saved at:
 90 |  {trt_a2j_model_path}
 91 | ")
 92 |             
 93 |             print("Loading TRT A2J Model...")
 94 |             del self.a2j_model
 95 |             
 96 |             self.a2j_model = TRTModule()
 97 |             self.a2j_model.load_state_dict(torch.load(trt_a2j_model_path))  
 98 |             print("TRT A2J Model loaded!
 99 | ")
100 | 
101 |         print("A2J Loading Finished! 
102 | 
103 | ")
104 | 


--------------------------------------------------------------------------------
/pipeline/utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2019 Boshen Zhang
  3 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  8 | '''
  9 | import os
 10 | import sys
 11 | import cv2
 12 | import torch
 13 | import numpy as np
 14 | import torchvision.transforms.functional as FT
 15 | 
 16 | from glob import glob
 17 | from PIL import Image, ImageOps
 18 | 
 19 | # PROJ ROOT DIR
 20 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
 21 | ROOT_PATH = os.path.join(DIR_PATH, os.path.pardir)
 22 | sys.path.append(ROOT_PATH)
 23 | 
 24 | # PROJ LIBRARY
 25 | import pipeline.constants as const
 26 | 
 27 | 
 28 | # Set the global device variable to cuda is GPU is avalible
 29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 30 | 
 31 | 
 32 | # DATASET INFO
 33 | MEAN = -0.66877532
 34 | STD = 28.32958208
 35 | 
 36 | 
 37 | def xy_to_cxcy(xy):
 38 |     """
 39 |     Convert bounding boxes from boundary coordinates (x_min, y_min, x_max, y_max) to center sized coordinates (c_x, c_y, w, h)
 40 | 
 41 |     :param xy: bounding box coordinate a tensor of size (n_boxes, 4)
 42 |     :return: bounding boxes in bouindary coordinates, a tensor of size (n_boxes, 4)
 43 |     """
 44 |     return torch.cat([ (xy[:, 2:] + xy[:, :2])/2, # c_x, c_y 
 45 |                         xy[:, 2:] - xy[:, :2]], 1) # w, h
 46 |             
 47 | def cxcy_to_xy(cxcy):
 48 |     """
 49 |     Convert bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
 50 | 
 51 |     :param cxcy: bounding boxes in center-size coordinate (n_boxes, 4)
 52 |     :return: bounding boxes in boundary coordinates (n_boxes, 4)
 53 |     """
 54 | 
 55 |     return torch.cat([cxcy[:, :2] - (cxcy[:, 2:] / 2), # x_min, y_min
 56 |                         cxcy[:, :2] + (cxcy[:, 2:] / 2)], 1) # x_max, y_max
 57 | 
 58 | def cxcy_to_gcxgcy(cxcy, priors_cxcy):
 59 |     """
 60 |     Encode boundiong boxes (that are in center-size form) w.r.t the corresponding prior boxes.
 61 | 
 62 |     For the center coordinates, find the offset with respect to the prior box, and scale by the size of the prior box
 63 |     For the size coordinates, scale by the size of teh prior box, and convert to the log-space.
 64 | 
 65 |     In the model, we are predicting bounding box coordinates in this encoded form.
 66 | 
 67 |     :param cxcy: bounding boxes in center sized coordinates, (n_priors, 4)
 68 |     :param priors_xcxy: prior boxes with respect which the encoding must be preformed, (n_priors, 4)
 69 |     :return: encoded boundin boxes, (n_priors, 4)
 70 |     """
 71 |     cxcy = cxcy.to(DEVICE)
 72 |     priors_cxcy = priors_cxcy.to(DEVICE)
 73 |     return torch.cat(
 74 |                     [(cxcy[:, :2] - priors_cxcy[:, :2]) / (priors_cxcy[:, 2:] / 10), # g_c_x, g_c_y
 75 |                     torch.log(cxcy[:, 2:] / priors_cxcy[:, 2:]) * 5], 1) # g_w, g_h
 76 | 
 77 | def gcxgcy_to_cxcy(gcxgcy, priors_cxcy):
 78 |     """
 79 |     Decode bounding box coordinates predicted by thr model, sice they are encoded in the form mentioned above.
 80 | 
 81 |     They are decoded into center size coordinates.
 82 | 
 83 |     This is invers of the above functions
 84 | 
 85 |     :param gcxgcy: encoded bounding box (i.e. output of model) (n_priors, 4)
 86 |     :param priors_cxcy: prior boxes with respect to which the encoding is defined (n_priors, 4)
 87 |     :return: decoded bounding boxes in center size form (n_priors, 4)
 88 |     """
 89 |     gcxgcy = gcxgcy.to(DEVICE)
 90 |     priors_cxcy = priors_cxcy.to(DEVICE)
 91 |     return torch.cat(
 92 |         [gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2], # c_x, c_y
 93 |             torch.exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]], 1) # w, h
 94 | 
 95 | def find_intersection(set_1, set_2):
 96 |     """
 97 |     Find the intersection of every box combination betweeen 2 sets of boxes that are in boundary coordinates.
 98 | 
 99 |     :param set_1: set_1 (n1, 4)
100 |     :param set_2: set 2 (n2, 4)
101 |     :return: intersection of each of the boxes in set 1 with respect to each of the set 2 (n1, n2)
102 |     """
103 | 
104 |     lower_bounds = torch.max(set_1[:, :2].unsqueeze(1).to(DEVICE), set_2[:, :2].unsqueeze(0).to(DEVICE)) # (n1, n2, 2)
105 |     upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1).to(DEVICE), set_2[:, 2:].unsqueeze(0).to(DEVICE)) # (n1, n2, 2)
106 |     intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0) # (n1, n2, 2)
107 |     return intersection_dims[:, :, 0] * intersection_dims[:, :, 1] # (n1, n2)
108 | 
109 | def find_jaccard_overlap(set_1, set_2):
110 |     """
111 |     Find IoU of every box combination in between the 2 sets (boxes in boundary coordinates)
112 | 
113 |     :param set_1: set 1 (n1, 4)
114 |     :param set2: set 2 (n2, 4)
115 |     :return: Jaccard overlap of each of the boxes in the set 1 with respect to set 2 (n1, n2)
116 |     """
117 | 
118 |     intersection = find_intersection(set_1, set_2)
119 | 
120 |     area_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1]) # (n1)
121 |     area_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1]) # (n1)
122 | 
123 |     union = area_set_1.unsqueeze(1).to(DEVICE) + area_set_2.unsqueeze(0).to(DEVICE) - intersection # (n1, n2)
124 | 
125 |     return intersection / union
126 | 
127 | def decay_lr_rate(optim, scale):
128 |     """
129 |     Scale the lr rate by a factor.
130 | 
131 |     :param optim: optimizer (SGD)
132 |     :param scale: factor to scale the lr rate with.
133 |     """
134 |     for param_group in optim.param_groups:
135 |         param_group['lr'] = param_group['lr'] * scale
136 | 
137 | class Summary(object):
138 |     def __init__(self):
139 |         self.item = 0
140 |         self.sum = 0
141 |         self.len = 0
142 |         self.avg = 0.000001
143 |     
144 |     def update(self, value):
145 |         self.item = value
146 |         self.sum += value
147 |         self.len += 1
148 |         self.avg = self.sum / self.len
149 | 
150 | 
151 | def get_model():
152 |     """
153 |     The model weights are saved in CHECKPOINT_DIR specified in constants.py
154 |     this functions loos into that directory and returns the path to the model.
155 | 
156 |     Please set the correct paths in pipeline/constans.py if not using default:
157 |         SSD_MODEL_PATH
158 |         SSD_DATASET_NAME
159 |         SSD_MODEL_NAME
160 | 
161 |         A2J_BACKBONE_NAME
162 |         A2J_MODEL_PATH
163 |     
164 |     :return: str, str: path to SSD model, path to A2J model
165 |     """
166 |     centernet_model_path = const.CENTERNET_MODEL_PATH
167 |     bb_models = glob(f"{centernet_model_path}/{const.CENTERNET_LOSS}_{const.CENTERNET_MODEL_NAME}_{const.CENTERNET_DATA_LOADER}.pth")
168 |     if not bb_models:
169 |         print(f"
170 | There are no CenterNet Model check points at:\
171 |                 
172 | {centernet_model_path}\
173 |                 
174 | Please Train a model or change the directory on constants.py
175 | ")
176 |         exit(-1)
177 | 
178 |     backbone_name = [elem[0] for idx, elem in enumerate(const.A2J_BACKBONE_NAME.items()) if elem[1]][0]
179 |     a2j_model_path = const.A2J_MODEL_PATH
180 |     a2j_model_path = f"{a2j_model_path}/{const.DATASET}_{const.DATA_SEGMENT}_{backbone_name}_{const.NUM_JOINTS}_a2j.pth"
181 |     a2j_models = glob(a2j_model_path)
182 | 
183 |     if not a2j_models:
184 |         print(f"
185 | There are no A2J Model with {const.A2J_BACKBONE_NAME} backbone in check points at:\
186 |                 
187 | {a2j_model_path}\
188 |                 
189 | Please Train a model or change the directory on constants.py
190 | ")
191 |         exit(-1)    
192 |     
193 |     return bb_models[0], a2j_models[0]
194 | 
195 | # Image Transforms
196 | def normalize(image: np.array, img_shape=tuple):
197 |     """
198 |     Resize image to (300, 300)
199 | 
200 |     :param image: numpy array
201 |     :return: normalized image Casted to torch
202 |     """
203 |     image = cv2.resize(image, img_shape, interpolation=cv2.INTER_NEAREST)
204 |     mean = np.mean(image)
205 |     std = image.std()
206 |     if std==0:
207 |         std = 1 
208 |     new_image = (image - mean) / std
209 | 
210 |     # Cast to pytorch and expand dimentions for the model forward pass    
211 |     new_image = torch.from_numpy(new_image).type(torch.float32)
212 | 
213 |     new_image = new_image.unsqueeze(0)
214 | 
215 |     return new_image
216 | 
217 | def centernet_img_transform(depth_image: np.array, ir_image: np.array, img_shape=const.INPUT_IMG_SIZE, input_type=const.CENTERNET_DATA_LOADER):
218 |     """
219 |     Transform the images into the input images
220 | 
221 |     :param depth_img: np.array (uint16), depth image
222 |     :param ir_image: np.array (uint16), depth image
223 |     :param img_shape: tuple (h, w), image size
224 |     :param input_type: str, with input type (Fused, Depth) is the model using
225 |     :return: 
226 |     """
227 |     def depth_input(depth_image: np.array, **kwargs):
228 |         c, h, w = depth_image.size()
229 |         depth_image = depth_image[0:1,:,:] # depth
230 |         new_image = depth_image.expand(1, 3, h, w)
231 |         return new_image
232 | 
233 |     def fused_input(depth_image: np.array, ir_image:np.array):
234 |         c, h, w = depth_image.size()
235 |         new_image_c1 = depth_image.type(torch.float32)
236 |         new_image_c2 = ir_image.type(torch.float32)
237 |         new_image_c3 = (new_image_c1 + new_image_c2) / 2
238 |         new_image = torch.cat((new_image_c1, new_image_c2, new_image_c3), 0)
239 |         new_image = new_image.expand(1, 3, h, w)
240 |         return new_image
241 | 
242 |     input_switcher = {
243 |         "depth": depth_input,
244 |         "fused": fused_input,
245 |     }
246 |     depth_image = normalize(depth_image, img_shape=img_shape)
247 |     ir_image = normalize(ir_image, img_shape=img_shape)
248 | 
249 |     return input_switcher[input_type](depth_image=depth_image, ir_image=ir_image)
250 | 
251 | 
252 | #########################
253 | ##### Model Helpers #####
254 | #########################
255 | 
256 | def find_prediction_mask(pred_heatmap: torch.tensor, window_size=11, threshold=const.THRESHOLD_ACC):
257 |     """
258 |     Find the mask of a giver heatmap, Have this in mind the follwoing heatmap might not have values as larg as
259 |     1, and we need to fins the local maximas of the heatmap.
260 | 
261 |     :param pred_heatmap: torch.tensor, predicted heatmap by the model
262 |     :param window_size: int, size of the maxPooling window
263 |     :return: torch.tensor (mask of the heatmap)
264 |     """
265 |     pred_local_max = torch.max_pool2d(pred_heatmap[None, None, ...], kernel_size=window_size, stride=1, padding=window_size//2)
266 |     return (pred_local_max == pred_heatmap) * (pred_heatmap > threshold)
267 | 
268 | def get_bboxes(yx_locations: torch.tensor, height: torch.tensor, width: torch.tensor,\
269 |         offset_x: torch.tensor, offset_y: torch.tensor, stride=const.CENTERNET_STRIDE, img_shape=const.CENTERNET_IMG_SHAPE):
270 |     """
271 |     Create a list of bounding boxes [[xmin, ymin, xmax, ymax], ...]
272 | 
273 |     :param yx_locations: torch.tensor, X and Y locations in the heatmap has to be mutiplied by the stride to go back to original dims
274 |     :param height: torch.tensor, The height of the bbox 
275 |     :param width: torch.tensor, The width of the bbox
276 |     :param offset_x: torch.tensor, The X offset value
277 |     :param offst_y: torch.tensor, The Y offset value
278 |     """
279 |     yx_locations *= stride
280 |     bboxes = []
281 |     for i, yx_location in enumerate(yx_locations):
282 |         y_center = yx_location[0].item() + offset_y[i].item()
283 |         x_center = yx_location[1].item() + offset_x[i].item()
284 |         h = height[i].item()
285 |         w = width[i].item()
286 | 
287 |         x_min = max(0, x_center - w/2)
288 |         y_min = max(0, y_center - h/2)
289 | 
290 |         bboxes.append([x_min, y_min, w, h])
291 |     
292 |     return bboxes
293 | 
294 | def get_median_depth(img, xy_locs:list):
295 |     """
296 |     Get the median depth of the hand
297 | 
298 |     :param img: numpy array, depth image
299 |     :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box
300 |     :return: float, median depth
301 |     """
302 |     return np.median(img)
303 | 
304 | def a2j_depth_image_transform(img, xy_locs: list, target_size=const.A2J_TARGET_SIZE, depth_thresh=const.DEPTH_THRESHOLD):
305 |     """
306 |     Transform the depth image to appropriate format for running through the model
307 | 
308 |     :param img: numpy array, depth image
309 |     :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box
310 |     :param target_size: tuple, input target size of the A2J network
311 |     :paran depth_thresh: int, depth threshold to 0 out the unwanted pixels
312 |     :return: processed depth image to feed into the a2j
313 |     """
314 | 
315 |     img_output = np.ones((target_size[1], target_size[0], 1), dtype="float32")
316 | 
317 |     new_Xmin = xy_locs[0]
318 |     new_Ymin = xy_locs[1]
319 |     new_Xmax = xy_locs[2]
320 |     new_Ymax = xy_locs[3]
321 | 
322 |     img_crop = img[new_Ymin:new_Ymax, new_Xmin:new_Xmax]
323 |     median_depth = get_median_depth(img_crop, xy_locs)
324 | 
325 |     center_x = (new_Xmax+new_Xmin)/2
326 |     center_y = (new_Ymax+new_Ymin)/2
327 |     new_Xmin = int(max(center_x-110, 0))
328 |     new_Ymin = int(max(center_y-110, 0))
329 |     new_Xmax = int(min(center_x+110, img.shape[1]-1))
330 |     new_Ymax = int(min(center_y+110, img.shape[0]-1))
331 |     img_crop = img[new_Ymin:new_Ymax, new_Xmin:new_Xmax]
332 |     
333 |     img_resize = cv2.resize(img_crop, target_size, interpolation=cv2.INTER_NEAREST)
334 |     img_resize - np.asarray(img_resize, dtype="float32")
335 |     img_resize[np.where(img_resize >= median_depth + depth_thresh)] = median_depth 
336 |     img_resize[np.where(img_resize <= median_depth - depth_thresh)] = median_depth
337 |     img_resize = (img_resize - median_depth)
338 |     img_resize = (img_resize - MEAN)/STD
339 |     
340 |     img_output[:,:,0] = img_resize
341 | 
342 | 
343 |     img_output = np.asarray(img_output)
344 |     img_NCHW_out = img_output.transpose(2, 0, 1)
345 |     img_NCHW_out = np.asarray(img_NCHW_out)
346 | 
347 |     img_out = torch.from_numpy(img_NCHW_out)
348 |     img_out = img_out.unsqueeze(0)
349 | 
350 |     # n, c, h, w = img_out.size()
351 |     # img_out = img_out.expand(n, 3, h, w)
352 | 
353 |     return img_out, median_depth
354 | 
355 | def back_to_normal(pred_joints, xy_locs:list, median_depths:float, target_size=const.A2J_TARGET_SIZE):
356 |     """
357 |     Transform the predicted joint to the original space
358 | 
359 |     :param pred_joints: list of np.array, list of predicted joints
360 |     :param xy_locs: list, [x_min, y_min, x_max, y_max] locations of the bounding box
361 |     :param median_depth: float, the value of median depth
362 |     """
363 | 
364 |     normalized_joints = []
365 |     for i in range(len(pred_joints)):
366 |         pred_joint = pred_joints[i].cpu()
367 |         pred_joint = pred_joint.detach().numpy()
368 | 
369 |         xy_bb = xy_locs[i]
370 |         median_depth = median_depths[i]
371 | 
372 |         p_j = np.ones((const.NUM_JOINTS, 3))
373 |         x_len = abs(xy_bb[0] - xy_bb[2])
374 |         y_len = abs(xy_bb[1] - xy_bb[3])
375 | 
376 |         p_j[:,0] = ((pred_joint[:,1] * x_len) / target_size[0]) + xy_bb[0]
377 |         p_j[:,1] = ((pred_joint[:,0] * y_len) / target_size[1]) + xy_bb[1]
378 |         p_j[:,2] = pred_joint[:,2] + median_depth
379 | 
380 |         normalized_joints.append(p_j)
381 |     
382 |     return normalized_joints
383 | 
384 | 
385 | def get_xyz_lims(pred_joints_collections):
386 |     max_range = [0, 0, 0]  
387 |     min_range = [float("inf"), float("inf"), float("inf")] 
388 | 
389 |     for pred_joints in pred_joints_collections:
390 |         min_x = pred_joints[:,0].min()
391 |         if min_x < min_range[0]:  
392 |             min_range[0] = min_x  
393 |         min_y = pred_joints[:,1].min()
394 |         if min_y < min_range[1]:  
395 |             min_range[1] = min_y  
396 |         min_z = pred_joints[:,2].min()
397 |         if min_z < min_range[2]:  
398 |             min_range[2] = min_z  
399 | 
400 |         max_x = pred_joints[:,0].max()
401 |         if max_x > max_range[0]:  
402 |             max_range[0] = max_x  
403 |         max_y = pred_joints[:,1].max()
404 |         if max_y > max_range[1]:  
405 |             max_range[1] = max_y  
406 |         max_z = pred_joints[:,2].max()
407 |         if max_z > max_range[2]:  
408 |             max_range[2] = max_z
409 | 
410 |     return max_range, min_range
411 | 
412 | def vizualize_frams(ax_2, pred_joints_collections):
413 |     pred_joints_collections = np.array(pred_joints_collections)
414 |     
415 |     max_range, min_range = get_xyz_lims(pred_joints_collections)
416 |     
417 |     mid_x = (max_range[0] + min_range[0])/2 
418 |     mid_y = (max_range[1] + min_range[1])/2 
419 |     mid_z = (max_range[2] + min_range[2])/2
420 | 
421 |     # Second subplot
422 |     ax_2.grid(True)
423 |     ax_2.set_xticklabels([])
424 |     ax_2.set_yticklabels([]) 
425 |     ax_2.set_zticklabels([])
426 | 
427 |     ax_2.set_xlim(mid_x - max_range[0]/2, mid_x + max_range[0]/2) 
428 |     ax_2.set_ylim(mid_y - max_range[1]/2, mid_y + max_range[1]/2) 
429 |     ax_2.set_zlim(mid_z - max_range[2]/2, mid_z + max_range[2]/2) 
430 | 
431 |     scats = []
432 |     for pred_joints in pred_joints_collections:
433 |         ax_2.scatter(pred_joints[:,0], pred_joints[:,1], pred_joints[:,2], c='r', marker='^', s=10)
434 |         
435 |         # MY SCRIPT
436 |         if const.NUM_JOINTS == 36:
437 |             ax_2.plot(pred_joints[0:6,0], pred_joints[0:6,1], pred_joints[0:6,2], color='b')
438 |             ax_2.plot(pred_joints[6:12,0], pred_joints[6:12,1], pred_joints[6:12,2], color='b')
439 |             ax_2.plot(pred_joints[12:18,0], pred_joints[12:18,1], pred_joints[12:18,2], color='b')
440 |             ax_2.plot(pred_joints[18:24,0], pred_joints[18:24,1], pred_joints[18:24,2], color='b')
441 |             ax_2.plot(pred_joints[24:30,0], pred_joints[24:30,1], pred_joints[24:30,2], color='b')
442 |         
443 | 
444 |         # MY SCRIPT 16 JOINTS
445 |         if const.NUM_JOINTS == 16:
446 |             ax_2.plot(pred_joints[0:3,0], pred_joints[0:3,1], pred_joints[0:3,2], color='b')
447 |             ax_2.plot(pred_joints[3:6,0], pred_joints[3:6,1], pred_joints[3:6,2], color='b')
448 |             ax_2.plot(pred_joints[6:9,0], pred_joints[6:9,1], pred_joints[6:9,2], color='b')
449 |             ax_2.plot(pred_joints[9:12,0], pred_joints[9:12,1], pred_joints[9:12,2], color='b')
450 |             ax_2.plot(pred_joints[12:15,0], pred_joints[12:15,1], pred_joints[12:15,2], color='b')
451 |             ax_2.plot([pred_joints[2,0], pred_joints[15,0]], [pred_joints[2,1], pred_joints[15,1]], [pred_joints[2,2], pred_joints[15,2]], color='b')
452 |             ax_2.plot([pred_joints[5,0], pred_joints[15,0]], [pred_joints[5,1], pred_joints[15,1]], [pred_joints[5,2], pred_joints[15,2]], color='b')
453 |             ax_2.plot([pred_joints[8,0], pred_joints[15,0]], [pred_joints[8,1], pred_joints[15,1]], [pred_joints[8,2], pred_joints[15,2]], color='b')
454 |             ax_2.plot([pred_joints[11,0], pred_joints[15,0]], [pred_joints[11,1], pred_joints[15,1]], [pred_joints[11,2], pred_joints[15,2]], color='b')
455 |             ax_2.plot([pred_joints[14,0], pred_joints[15,0]], [pred_joints[14,1], pred_joints[15,1]], [pred_joints[14,2], pred_joints[15,2]], color='b')
456 |         
457 | 
458 | 
459 |     ax_2.view_init(-70, -70) 
460 | 
461 |     return scats
462 | 


--------------------------------------------------------------------------------
/readme_files/realtime_inference.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/realtime_handpose_3d/3f5ae9ccbf07defc39de7ce9e8b2213dda3be375/readme_files/realtime_inference.gif


--------------------------------------------------------------------------------