├── assert
    └── demo.gif
├── .gitignore
├── requirement.txt
├── config.py
├── README.md
├── dataset.py
├── cuboid_pnp_solver.py
├── cuboid.py
├── train.py
├── eval.py
├── demo.py
├── utils.py
└── network
    └── rtpose_vgg.py


/assert/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chinancheng/DOPE.pytorch/HEAD/assert/demo.gif


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | /data/*
3 | /temp/*
4 | /output/*
5 | /model/*
6 | /logs/*
7 | /__pycache__
8 | /network/__pycache__
9 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | torch==0.4.1
3 | torchvision==0.2.1
4 | numpy
5 | pyrr
6 | tensorboardX
7 | argparse
8 | scipy
9 | tqdm


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | class Config(object):
 2 | 
 3 |     stride = 8
 4 |     crop_size = 256
 5 |     sigma = 5.0
 6 |     test_sigma = 0.1
 7 |     thresh_map = 0.1
 8 |     thresh_points = 0.5
 9 |     thresh_angle = 0.1
10 |     grid = crop_size / stride
11 |     vec_width = 3.0
12 |     output_stage = 6
13 |     threshold = 0.1
14 |     @classmethod
15 |     def describe(cls):
16 |         text = 'Config:\n'
17 |         attrs = [attr for attr in dir(cls) if not callable(getattr(cls, attr)) and not attr.startswith('__')]
18 |         for attr in attrs:
19 |             text += '\t{:s} = {:s}\n'.format(attr, str(getattr(cls, attr)))
20 |         return text
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     def main():
25 |         print(Config.describe())
26 |     main()
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DOPE.pytorch 
 2 | This is an unofficial implementation of **[DOPE (Deep Object Pose Estimation for Semantic Robotic Grasping of Household Objects)](https://arxiv.org/abs/1809.10790)** trained on self-created synthetic bottle dataset.  
 3 | 
 4 | ![](./assert/demo.gif)
 5 | 
 6 | ## Requirement
 7 | ```
 8 | $ conda create -n DOPE python=3.6
 9 | $ pip install -r requirement.txt
10 | $ conda activate DOPE
11 | ```
12 | 
13 | ## Usage
14 | ### Train and Eval
15 | [WIP]
16 | 
17 | ### Demo
18 | * Logs
19 | [[Download](https://drive.google.com/drive/folders/1DtJ_yOZ946_tn-zGtEIdp2IkRRbtFzza?usp=sharing)]
20 |  ```
21 |   DOPE.pytorch
22 |     - logs
23 |       - Jack_Daniels-checkpoint.pth
24 |       - Jose_Cuervo-checkpoint.pth
25 |   ```
26 |    
27 |    
28 | * Data
29 | ```
30 | DOPE.pytorch
31 |   - data
32 |     - Real_bottle_sequence
33 |       - 000001.jpg
34 |       - 000002.jpg
35 |       ...
36 |       - _object_settings.json
37 |       - _camera_settings.json
38 | 
39 | ```
40 | * Run
41 | ```
42 | python demo.py
43 |   --path_to_data_dir ./data/Real_bottle_sequence
44 |   --class_name Jack_Daniels
45 |   --checkpoint ./logs/Jack_Daniels-checkpoint.pth
46 |   --plot
47 | ```
48 | 
49 | ## Reference
50 | * DOPE (Deep Object Pose Estimation for Semantic Robotic Grasping of Household Objects) [[paper link](https://arxiv.org/abs/1809.10790)]
51 | * Some code is borrowed from below repos.
52 |   * Official implementation from nvidia (inference code with ROS) [[Deep_Object_Pose](https://github.com/NVlabs/Deep_Object_Pose)]
53 |   * Realtime_Multi-Person_Pose_Estimation [[pytorch_Realtime_Multi-Person_Pose_Estimation](https://github.com/tensorboy/pytorch_Realtime_Multi-Person_Pose_Estimation)]
54 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import json
 3 | from torch.utils.data import dataset
 4 | import glob
 5 | import os
 6 | import cv2
 7 | from config import Config
 8 | from utils import *
 9 | import glob
10 | 
11 | 
12 | class Dataset(dataset.Dataset):
13 |     def __init__(self, path_to_data, class_name, split='train', img_prefix='png'):
14 |         self.split = split
15 |         self.img_prefix = img_prefix
16 |         self._path_to_data = path_to_data
17 |         self._path_to_sequences = glob.glob(os.path.join(path_to_data, split, '*.json'))
18 |         self.class_name = class_name
19 |         self._num_sequences = len(self._path_to_sequences)
20 |         self.crop_size = Config.crop_size
21 |         self.stride = Config.stride
22 |         self.grid = self.crop_size / self.stride
23 | 
24 |     def __len__(self):
25 |         return self._num_sequences
26 | 
27 |     def __getitem__(self, index):
28 |         label_file_path = self._path_to_sequences[index]
29 |         img_file_path = label_file_path.replace('json', self.img_prefix)
30 |         vertices = []
31 |         locations = []
32 |         img = cv2.imread(img_file_path)
33 |         img = crop(img)
34 |         self.ratio = img.shape[0]/self.crop_size
35 |         img = cv2.resize(img, (self.crop_size, self.crop_size))
36 |         with open(label_file_path) as f:
37 |             labels = json.load(f)
38 |         for label in labels['objects']:
39 |             if label['class'].find(self.class_name) != -1:
40 |                 vertex = label['projected_cuboid']
41 |                 centroid = label['projected_cuboid_centroid']
42 |                 location = label['location']
43 |                 vertex.append(centroid)
44 |                 vertices.append(vertex)
45 |                 locations.append(location)
46 |         heatmaps, pafs = self.get_ground_truth(vertices)
47 |         img = preprocess(img).float()
48 |         heatmaps = heatmaps.transpose((2, 0, 1))
49 |         heatmaps = torch.tensor(heatmaps, dtype=torch.float)
50 |         pafs = pafs.transpose((2, 0, 1))
51 |         pafs = torch.tensor(pafs, dtype=torch.float)
52 |         if len(locations) != 0:
53 |             locations = torch.tensor(locations, dtype=torch.float)
54 | 
55 |         return img, heatmaps, pafs, locations, self.ratio
56 | 
57 |     def get_ground_truth(self, vertices):
58 |         heatmaps = np.zeros((int(self.grid), int(self.grid), 9))
59 |         pafs = np.zeros((int(self.grid), int(self.grid), 16))
60 |         for vertex in vertices:
61 |             for idx, point in enumerate(vertex):
62 |                 point = tuple([int(i/self.ratio) for i in point])
63 |                 center_point = tuple([int(i/self.ratio) for i in vertex[-1]])
64 | 
65 |                 gaussian_map = heatmaps[:, :, idx]
66 |                 if idx < 8:
67 |                     count = np.zeros((int(self.grid), int(self.grid)), dtype=np.uint32)
68 |                     if point[0] >= 0 and point[0] < Config.crop_size and point[1] >= 0 and point[1] < Config.crop_size:
69 |                         centerA = np.array(point)
70 |                         centerB = np.array(center_point)
71 |                         pafs[:, :, idx*2:idx*2+2], count = generate_vecmap(centerA, centerB, pafs[:, :, idx*2:idx*2+2], count)
72 |                 heatmaps[:, :, idx] = generate_gaussianmap(point, gaussian_map)
73 | 
74 |         return heatmaps, pafs
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     def main():
79 |         path_to_data_dir = '/media/external/Bottle_dataset_split'
80 |         dataset = Dataset(path_to_data_dir)
81 |         print('len(dataset):', len(dataset))
82 |         print(dataset[0])
83 |     main()
84 | 


--------------------------------------------------------------------------------
/cuboid_pnp_solver.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
  2 | # This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
  3 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  4 | 
  5 | import numpy as np
  6 | from pyrr import Quaternion
  7 | import cv2
  8 | from cuboid import *
  9 | 
 10 | 
 11 | class CuboidPNPSolver(object):
 12 |     """
 13 |     This class is used to find the 6-DoF pose of a cuboid given its projected vertices.
 14 | 
 15 |     Runs perspective-n-point (PNP) algorithm.
 16 |     """
 17 | 
 18 |     # Class variables
 19 |     cv2version = cv2.__version__.split('.')
 20 |     cv2majorversion = int(cv2version[0])
 21 | 
 22 |     def __init__(self, object_name="", camera_intrinsic_matrix = None, cuboid3d = None, 
 23 |             dist_coeffs = np.zeros((4, 1))):
 24 |         self.object_name = object_name
 25 |         if (not camera_intrinsic_matrix is None):
 26 |             self._camera_intrinsic_matrix = camera_intrinsic_matrix
 27 |         else:
 28 |             camera_intrinsic_matrix = np.array([
 29 |                 [0, 0, 0],
 30 |                 [0, 0, 0],
 31 |                 [0, 0, 0]
 32 |             ])
 33 |         self._cuboid3d = cuboid3d
 34 |         
 35 |         self._dist_coeffs = dist_coeffs
 36 | 
 37 |     def set_camera_intrinsic_matrix(self, new_intrinsic_matrix):
 38 |         '''Sets the camera intrinsic matrix'''
 39 |         self._camera_intrinsic_matrix = new_intrinsic_matrix
 40 | 
 41 |     def solve_pnp(self, cuboid2d_points, pnp_algorithm = None):
 42 |         """
 43 |         Detects the rotation and traslation 
 44 |         of a cuboid object from its vertexes' 
 45 |         2D location in the image
 46 |         """
 47 | 
 48 |         # Fallback to default PNP algorithm base on OpenCV version
 49 |         if pnp_algorithm is None:
 50 |             if CuboidPNPSolver.cv2majorversion == 2:
 51 |                 pnp_algorithm = cv2.CV_ITERATIVE
 52 |             elif CuboidPNPSolver.cv2majorversion == 3:
 53 |                 pnp_algorithm = cv2.SOLVEPNP_ITERATIVE
 54 |                 # Alternative algorithms:
 55 |                 # pnp_algorithm = SOLVE_PNP_P3P  
 56 |                 # pnp_algorithm = SOLVE_PNP_EPNP        
 57 |         
 58 |         location = None
 59 |         quaternion = None
 60 |         projected_points = cuboid2d_points
 61 | 
 62 |         cuboid3d_points = np.array(self._cuboid3d.get_vertices())
 63 |         obj_2d_points = []
 64 |         obj_3d_points = []
 65 | 
 66 |         for i in range(CuboidVertexType.TotalVertexCount):
 67 |             check_point_2d = cuboid2d_points[i]
 68 |             # Ignore invalid points
 69 |             if (check_point_2d is None):
 70 |                 continue
 71 |             obj_2d_points.append(check_point_2d)
 72 |             obj_3d_points.append(cuboid3d_points[i])
 73 | 
 74 |         obj_2d_points = np.array(obj_2d_points, dtype=float)
 75 |         obj_3d_points = np.array(obj_3d_points, dtype=float)
 76 | 
 77 |         valid_point_count = len(obj_2d_points)
 78 | 
 79 |         # Can only do PNP if we have more than 3 valid points
 80 |         is_points_valid = valid_point_count >= 4
 81 | 
 82 |         if is_points_valid:
 83 |             
 84 |             ret, rvec, tvec = cv2.solvePnP(
 85 |                 obj_3d_points,
 86 |                 obj_2d_points,
 87 |                 self._camera_intrinsic_matrix,
 88 |                 self._dist_coeffs,
 89 |                 flags=pnp_algorithm
 90 |             )
 91 | 
 92 |             if ret:
 93 |                 location = list(x[0] for x in tvec)
 94 |                 quaternion = self.convert_rvec_to_quaternion(rvec)
 95 |                 
 96 |                 projected_points, _ = cv2.projectPoints(cuboid3d_points, rvec, tvec, self._camera_intrinsic_matrix, self._dist_coeffs)
 97 |                 projected_points = np.squeeze(projected_points)
 98 |                 
 99 |                 # If the location.Z is negative or object is behind the camera then flip both location and rotation
100 |                 x, y, z = location
101 |                 if z < 0:
102 |                     # Get the opposite location
103 |                     location = [-x, -y, -z]
104 | 
105 |                     # Change the rotation by 180 degree
106 |                     rotate_angle = np.pi
107 |                     rotate_quaternion = Quaternion.from_axis_rotation(location, rotate_angle)
108 |                     quaternion = rotate_quaternion.cross(quaternion)
109 | 
110 |         return location, quaternion, projected_points
111 | 
112 |     def convert_rvec_to_quaternion(self, rvec):
113 |         '''Convert rvec (which is log quaternion) to quaternion'''
114 |         theta = np.sqrt(rvec[0] * rvec[0] + rvec[1] * rvec[1] + rvec[2] * rvec[2])  # in radians
115 |         raxis = [rvec[0] / theta, rvec[1] / theta, rvec[2] / theta]
116 | 
117 |         # pyrr's Quaternion (order is XYZW), https://pyrr.readthedocs.io/en/latest/oo_api_quaternion.html
118 |         return Quaternion.from_axis_rotation(raxis, theta)
119 |         
120 |         # Alternatively: pyquaternion
121 |         # return Quaternion(axis=raxis, radians=theta)  # uses OpenCV's Quaternion (order is WXYZ)
122 | 
123 |     def project_points(self, rvec, tvec):
124 |         '''Project points from model onto image using rotation, translation'''
125 |         output_points, tmp = cv2.projectPoints(
126 |             self.__object_vertex_coordinates, 
127 |             rvec, 
128 |             tvec, 
129 |             self.__camera_intrinsic_matrix, 
130 |             self.__dist_coeffs)
131 |         
132 |         output_points = np.squeeze(output_points)
133 | 
134 |         return output_points
135 | 


--------------------------------------------------------------------------------
/cuboid.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
  2 | # This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
  3 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  4 | 
  5 | from enum import IntEnum, unique
  6 | import numpy as np
  7 | import cv2
  8 | from pyrr import Quaternion, Matrix44, Vector3, euler
  9 | 
 10 | # Related to the object's local coordinate system
 11 | # @unique
 12 | class CuboidVertexType(IntEnum):
 13 |     FrontTopRight = 0
 14 |     FrontTopLeft = 1
 15 |     FrontBottomLeft = 2
 16 |     FrontBottomRight = 3
 17 |     RearTopRight = 4
 18 |     RearTopLeft = 5
 19 |     RearBottomLeft = 6
 20 |     RearBottomRight = 7
 21 |     Center = 8
 22 |     TotalCornerVertexCount = 8 # Corner vertexes doesn't include the center point
 23 |     TotalVertexCount = 9
 24 | 
 25 | # List of the vertex indexes in each line edges of the cuboid
 26 | CuboidLineIndexes = [
 27 |     # Front face
 28 |     [ CuboidVertexType.FrontTopLeft,      CuboidVertexType.FrontTopRight ],
 29 |     [ CuboidVertexType.FrontTopRight,     CuboidVertexType.FrontBottomRight ],
 30 |     [ CuboidVertexType.FrontBottomRight,  CuboidVertexType.FrontBottomLeft ],
 31 |     [ CuboidVertexType.FrontBottomLeft,   CuboidVertexType.FrontTopLeft ],
 32 |     # Back face
 33 |     [ CuboidVertexType.RearTopLeft,       CuboidVertexType.RearTopRight ],
 34 |     [ CuboidVertexType.RearTopRight,      CuboidVertexType.RearBottomRight ],
 35 |     [ CuboidVertexType.RearBottomRight,   CuboidVertexType.RearBottomLeft ],
 36 |     [ CuboidVertexType.RearBottomLeft,    CuboidVertexType.RearTopLeft ],
 37 |     # Left face
 38 |     [ CuboidVertexType.FrontBottomLeft,   CuboidVertexType.RearBottomLeft ],
 39 |     [ CuboidVertexType.FrontTopLeft,      CuboidVertexType.RearTopLeft ],
 40 |     # Right face
 41 |     [ CuboidVertexType.FrontBottomRight,  CuboidVertexType.RearBottomRight ],
 42 |     [ CuboidVertexType.FrontTopRight,     CuboidVertexType.RearTopRight ],
 43 | ]
 44 | 
 45 | 
 46 | # ========================= Cuboid3d =========================
 47 | class Cuboid3d():
 48 |     '''This class contains a 3D cuboid.'''
 49 | 
 50 |     # Create a box with a certain size
 51 |     def __init__(self, size3d = [1.0, 1.0, 1.0], center_location = [0, 0, 0],
 52 |         coord_system = None, parent_object = None):
 53 | 
 54 |         # NOTE: This local coordinate system is similar
 55 |         # to the intrinsic transform matrix of a 3d object
 56 |         self.center_location = center_location
 57 |         self.coord_system = coord_system
 58 |         self.size3d = size3d
 59 |         self._vertices = [0, 0, 0] * CuboidVertexType.TotalVertexCount
 60 | 
 61 |         self.generate_vertexes()
 62 | 
 63 |     def get_vertex(self, vertex_type):
 64 |         """Returns the location of a vertex.
 65 | 
 66 |         Args:
 67 |             vertex_type: enum of type CuboidVertexType
 68 | 
 69 |         Returns:
 70 |             Numpy array(3) - Location of the vertex type in the cuboid
 71 |         """
 72 |         return self._vertices[vertex_type]
 73 | 
 74 |     def get_vertices(self):
 75 |         return self._vertices
 76 | 
 77 |     def generate_vertexes(self):
 78 |         width, height, depth = self.size3d
 79 | 
 80 |         # By default just use the normal OpenCV coordinate system
 81 |         if (self.coord_system is None):
 82 |             cx, cy, cz = self.center_location
 83 |             # X axis point to the right
 84 |             right = cx + width / 2.0
 85 |             left = cx - width / 2.0
 86 |             # Y axis point downward
 87 |             top = cy - height / 2.0
 88 |             bottom = cy + height / 2.0
 89 |             # Z axis point forward
 90 |             front = cz + depth / 2.0
 91 |             rear = cz - depth / 2.0
 92 | 
 93 |             # List of 8 vertices of the box       
 94 |             self._vertices = [
 95 |                 [right, top, front],    # Front Top Right
 96 |                 [left, top, front],     # Front Top Left
 97 |                 [left, bottom, front],  # Front Bottom Left
 98 |                 [right, bottom, front], # Front Bottom Right
 99 |                 [right, top, rear],     # Rear Top Right
100 |                 [left, top, rear],      # Rear Top Left
101 |                 [left, bottom, rear],   # Rear Bottom Left
102 |                 [right, bottom, rear],  # Rear Bottom Right
103 |                 self.center_location,   # Center
104 |             ]
105 |         else:
106 |             sx, sy, sz = self.size3d
107 |             forward = np.array(self.coord_system.forward, dtype=float) * sy * 0.5
108 |             up = np.array(self.coord_system.up, dtype=float) * sz * 0.5
109 |             right = np.array(self.coord_system.right, dtype=float) * sx * 0.5
110 |             center = np.array(self.center_location, dtype=float)
111 |             self._vertices = [
112 |                 center + forward + up + right,      # Front Top Right
113 |                 center + forward + up - right,      # Front Top Left
114 |                 center + forward - up - right,      # Front Bottom Left
115 |                 center + forward - up + right,      # Front Bottom Right
116 |                 center - forward + up  + right,     # Rear Top Right
117 |                 center - forward + up - right,      # Rear Top Left
118 |                 center - forward - up - right,      # Rear Bottom Left
119 |                 center - forward - up + right,      # Rear Bottom Right
120 |                 self.center_location,               # Center
121 |             ]
122 | 
123 |     def get_projected_cuboid2d(self, cuboid_transform, camera_intrinsic_matrix):
124 |         """
125 |         Projects the cuboid into the image plane using camera intrinsics.
126 | 
127 |         Args:
128 |             cuboid_transform: the world transform of the cuboid
129 |             camera_intrinsic_matrix: camera intrinsic matrix
130 | 
131 |         Returns:
132 |             Cuboid2d - the projected cuboid points
133 |         """
134 | 
135 |         world_transform_matrix = cuboid_transform
136 |         rvec = [0, 0, 0]
137 |         tvec = [0, 0, 0]
138 |         dist_coeffs = np.zeros((4, 1))
139 | 
140 |         transformed_vertices = [0, 0, 0] * CuboidVertexType.TotalVertexCount
141 |         for vertex_index in range(CuboidVertexType.TotalVertexCount):
142 |             vertex3d = self._vertices[vertex_index]
143 |             transformed_vertices[vertex_index] = world_transform_matrix * vertex3d
144 | 
145 |         projected_vertices = cv2.projectPoints(transformed_vertices, rvec, tvec, 
146 |                                 camera_intrinsic_matrix, dist_coeffs)
147 | 
148 |         return Cuboid2d(projected_vertices)
149 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from config import Config
  4 | from network.rtpose_vgg import get_model, use_vgg
  5 | from dataset import Dataset
  6 | from torch.utils.data import DataLoader
  7 | import torch.nn as nn
  8 | import torch.optim
  9 | import numpy as np
 10 | from tensorboardX import SummaryWriter
 11 | 
 12 | 
 13 | def _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs, restore):
 14 | 
 15 |     # create tensorboard
 16 |     writer = SummaryWriter(path_to_logs_dir)
 17 | 
 18 |     # dataloader
 19 |     train_dataset = Dataset(class_name=class_name, path_to_data=path_to_data_dir)
 20 |     train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=0, drop_last=True)
 21 |     val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val')
 22 |     val_dataloader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=0, drop_last=True)
 23 | 
 24 |     # load model
 25 |     model = get_model(trunk='vgg19')
 26 |     model = model.cuda()
 27 |     use_vgg(model, './model', 'vgg19')
 28 | 
 29 |     # restore model
 30 |     if restore:
 31 |         model.load_state_dict(torch.load(restore))
 32 | 
 33 |     model.train()
 34 | 
 35 |     # freeze low-level layer
 36 |     for i in range(20):
 37 |         for param in model.model0[i].parameters():
 38 |             param.requires_grad = False
 39 |     trainable_vars = [param for param in model.parameters() if param.requires_grad]
 40 |     optimizer = torch.optim.Adam(trainable_vars, lr=0.0001)
 41 | 
 42 |     epoch = 0
 43 |     step = 1
 44 |     best_mse = 1.0
 45 | 
 46 |     while epoch != epochs:
 47 |         for batch_index, (images, heatmaps_target, pafs_target, _, _) in enumerate(train_dataloader):
 48 |             images = images.cuda()
 49 |             _, saved_for_loss = model(images)
 50 |             loss, heatmaps_losses, pafs_losses = _loss(saved_for_loss, heatmaps_target.cuda(), pafs_target.cuda())
 51 |             optimizer.zero_grad()
 52 |             loss.backward()
 53 |             optimizer.step()
 54 |             if step % 10 == 0:
 55 |                 print('Epoch: {}, Step: {}, Loss: {}' .format(epoch, step, loss.data.item()))
 56 |             writer.add_scalar('train_total_loss/loss', loss, step)
 57 |             for stage, (heatmaps_loss, pafs_loss) in enumerate(zip(heatmaps_losses, pafs_losses)):
 58 |                 writer.add_scalar('train_heatmaps_loss/stage_{}' .format(str(stage)), heatmaps_loss, step)
 59 |                 writer.add_scalar('train_pafs_loss/stage_{}' .format(str(stage)), pafs_loss, step)
 60 |             if step % 1000 == 0:
 61 |                 pafs_loss, heatmaps_loss = _validate(model, val_dataloader)
 62 |                 total_loss = pafs_loss + heatmaps_loss
 63 |                 print('Validation Paf MSE: {} Heatmap MSE: {} Total MSE: {}' .format(pafs_loss, heatmaps_loss, total_loss))
 64 |                 writer.add_scalar('val/heatmaps_loss', heatmaps_loss, step)
 65 |                 writer.add_scalar('val/pafs_loss', pafs_loss, step)
 66 |                 writer.add_scalar('val/total_loss', total_loss, step)
 67 |                 if total_loss < best_mse:
 68 |                     print('Save checkpoint')
 69 |                     torch.save(model.state_dict(), os.path.join(path_to_logs_dir, '{}-checkpoint-best.pth'.format(class_name)))
 70 |                     best_mse = total_loss
 71 |                     print('Best MSE: {}' .format(total_loss))
 72 |                 model.train()
 73 |             step += 1
 74 |         epoch += 1
 75 |     print('Save checkpoint')
 76 |     torch.save(model.state_dict(), os.path.join(path_to_logs_dir, '{}-checkpoint-last.pth'.format(class_name)))
 77 | 
 78 | 
 79 | def _validate(model, val_dataloader):
 80 |     model.eval()
 81 |     total_pafs_mse = []
 82 |     total_heatmaps_mse = []
 83 |     for batch_index, (images, heatmaps_target, pafs_target) in enumerate(val_dataloader):
 84 |         images = images.cuda()
 85 |         out, _ = model(images)
 86 |         pafs_pred = out[0].detach().cpu().numpy()
 87 |         pafs_target = pafs_target.numpy()
 88 |         heatmaps_pred = out[1].detach().cpu().numpy()
 89 |         heatmaps_target = heatmaps_target.numpy()
 90 |         pafs_mse = np.nanmean((pafs_pred - pafs_target) **2)
 91 |         heatmaps_mse = np.nanmean((heatmaps_pred - heatmaps_target) **2)
 92 |         total_pafs_mse.append(pafs_mse)
 93 |         total_heatmaps_mse.append(heatmaps_mse)
 94 |     total_pafs_mse = np.array(total_pafs_mse).mean()
 95 |     total_heatmaps_mse = np.array(total_heatmaps_mse).mean()
 96 | 
 97 |     return total_pafs_mse, total_heatmaps_mse
 98 | 
 99 | 
100 | def _loss(saved_for_loss, heatmaps_target, pafs_target):
101 |     criterion = nn.MSELoss().cuda()
102 |     total_loss = 0
103 |     heatmaps_losses = []
104 |     pafs_losses = []
105 |     for i in range(Config.output_stage):
106 |         pafs_pred = saved_for_loss[2*i]
107 |         heatmaps_pred = saved_for_loss[2*i+1]
108 |         heatmaps_loss = criterion(heatmaps_pred, heatmaps_target)
109 |         heatmaps_losses.append(heatmaps_loss)
110 |         pafs_loss = criterion(pafs_pred, pafs_target)
111 |         pafs_losses.append(pafs_loss)
112 |         total_loss += heatmaps_loss + pafs_loss
113 | 
114 |     return total_loss, heatmaps_losses, pafs_losses
115 | 
116 | 
117 | if __name__ == '__main__':
118 | 
119 |     def main(args):
120 |         path_to_data_dir = args.path_to_data_dir
121 |         if not os.path.exists(path_to_data_dir):
122 |             raise FileNotFoundError(path_to_data_dir)
123 |         path_to_logs_dir = args.path_to_logs_dir
124 |         if path_to_logs_dir:
125 |             os.makedirs(path_to_logs_dir, exist_ok=True)
126 |         class_name = args.class_name
127 |         batch_size = args.batch_size
128 |         epochs = args.epochs
129 |         restore = args.restore_checkpoint
130 |         _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs, restore)
131 | 
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument('-d', '--path_to_data_dir', default='/media/ssd_external/Bottle_dataset_split', help='path to data directory')
134 |     parser.add_argument('-l', '--path_to_logs_dir', default='./logs', help='path to logs directory')
135 |     parser.add_argument('-b', '--batch_size', default=32, type=int, help='batch size')
136 |     parser.add_argument('-e', '--epochs', default=10, type=int, help='epochs')
137 |     parser.add_argument('-class', '--class_name', dest="class_name", choices=["Jack_Daniels", "Jose_Cuervo"], default="Jack_Daniels", type=str, help='the class name of object')
138 |     parser.add_argument('-r', '--restore_checkpoint', default=None,
139 |                      help='path to restore checkpoint file, e.g., ./logs/model-100.pth')
140 |     main(parser.parse_args())
141 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from config import Config
  4 | from network.rtpose_vgg import get_model, use_vgg
  5 | from dataset import Dataset
  6 | from torch.utils.data import DataLoader
  7 | import torch.optim
  8 | import numpy as np
  9 | import json
 10 | from utils import *
 11 | from cuboid import Cuboid3d
 12 | from pyrr import Quaternion
 13 | from tqdm import tqdm
 14 | 
 15 | 
 16 | def _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix):
 17 | 
 18 |     # load pre-trained model
 19 |     model = get_model(trunk='vgg19')
 20 |     model = model.cuda()
 21 |     use_vgg(model, './model', 'vgg19')
 22 |     print("=> Load pre-trained model from {}".format(path_to_checkpoint))
 23 |     model.load_state_dict(torch.load(path_to_checkpoint))
 24 |     model.eval()
 25 | 
 26 |     # parameter of object size for pnp solver
 27 |     print("=> Load {} object size".format(class_name))
 28 |     path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json')
 29 |     if not os.path.exists(path_to_object_seetings):
 30 |         raise FileNotFoundError(path_to_object_seetings)
 31 |     object_list = json.load(open(path_to_object_seetings))['exported_objects']
 32 |     object_size = None
 33 |     for obj in object_list:
 34 |         if obj['class'].find(class_name) != -1:
 35 |             object_size = obj['cuboid_dimensions']
 36 |     if not object_size:
 37 |         raise ValueError("Object size is none")
 38 |     _cuboid3d = Cuboid3d(object_size)
 39 |     cuboid3d_points = np.array(_cuboid3d.get_vertices())
 40 | 
 41 |     # parameter of camera for pnp solver
 42 |     path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json')
 43 |     if not os.path.exists(path_to_camera_seetings):
 44 |         raise FileNotFoundError(path_to_camera_seetings)
 45 |     intrinsic_settings = json.load(open(path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
 46 |     matrix_camera = np.zeros((3, 3))
 47 |     matrix_camera[0,0] = intrinsic_settings['fx']
 48 |     matrix_camera[1,1] = intrinsic_settings['fy']
 49 |     matrix_camera[0,2] = max(intrinsic_settings['cx'], intrinsic_settings['cy'])
 50 |     matrix_camera[1,2] = max(intrinsic_settings['cx'], intrinsic_settings['cy'])
 51 |     matrix_camera[2,2] = 1
 52 | 
 53 |     try:
 54 |         dist_coeffs = np.array(json.load(open(path_to_camera_seetings))['camera_settings'][0]["distortion_coefficients"])
 55 |     except KeyError:
 56 |         dist_coeffs = np.zeros((4, 1))
 57 | 
 58 |     # dataloader
 59 |     val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val', img_prefix=img_prefix)
 60 |     val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, drop_last=False)
 61 | 
 62 |     correct = 0
 63 |     wrong = 0
 64 |     # set threshold (cm)
 65 |     threshold = 3.0
 66 | 
 67 |     for batch_index, (images, _, _, location_targets, ratio) in tqdm(enumerate(val_dataloader)):
 68 |         images = images.cuda()
 69 |         output, _ = model(images)
 70 |         line, vertex = output[0], output[1]
 71 |         line, vertex = line.squeeze(), vertex.squeeze()
 72 |         objects, peaks = find_objects(vertex, line)
 73 |         location_predictions = []
 74 |         if len(objects) > 0:
 75 |             for object in objects:
 76 |                 cuboid2d_points = object[1] + [(object[0][0] * 8, object[0][1] * 8)]
 77 |                 cuboid3d_points = np.array(cuboid3d_points)
 78 |                 location = None
 79 |                 quaternion = None
 80 |                 obj_2d_points = []
 81 |                 obj_3d_points = []
 82 | 
 83 |                 for i in range(8):
 84 |                     check_point_2d = cuboid2d_points[i]
 85 |                     # Ignore invalid points
 86 |                     if (check_point_2d is None):
 87 |                         continue
 88 |                     elif check_point_2d[0] < 0 or check_point_2d[1] < 0 or check_point_2d[
 89 |                         0] >= Config.crop_size / Config.stride or check_point_2d[1] >= Config.crop_size / Config.stride:
 90 |                         continue
 91 |                     else:
 92 |                         check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio)
 93 |                     obj_2d_points.append(check_point_2d)
 94 |                     obj_3d_points.append(cuboid3d_points[i])
 95 |                 projected_points = object[1]
 96 |                 vertexes = projected_points.copy()
 97 |                 centroid = tuple([int(point * Config.stride * ratio) for point in object[0]])
 98 |                 obj_2d_points = np.array(obj_2d_points, dtype=np.float32)
 99 |                 obj_3d_points = np.array(obj_3d_points, dtype=np.float32)
100 |                 valid_point_count = len(obj_2d_points)
101 |                 if valid_point_count >= 4:
102 |                     ret, rvec, tvec = cv2.solvePnP(obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs,
103 |                                                    flags=cv2.SOLVEPNP_ITERATIVE)
104 |                     if ret:
105 |                         location = list(x[0] for x in tvec)
106 |                         quaternion = convert_rvec_to_quaternion(rvec)
107 | 
108 |                         projected_points, _ = cv2.projectPoints(cuboid3d_points, rvec, tvec, matrix_camera,
109 |                                                                 dist_coeffs)
110 |                         projected_points = np.squeeze(projected_points)
111 |                         # If the location.Z is negative or object is behind the camera then flip both location and rotation
112 |                         x, y, z = location
113 |                         if z < 0:
114 |                             # Get the opposite location
115 |                             location = [-x, -y, -z]
116 |                             # Change the rotation by 180 degree
117 |                             rotate_angle = np.pi
118 |                             rotate_quaternion = Quaternion.from_axis_rotation(location, rotate_angle)
119 |                             quaternion = rotate_quaternion.cross(quaternion)
120 |                         vertexes = [tuple(p) for p in projected_points]
121 |                     location_predictions.append(location)
122 |         location_predictions = np.array(location_predictions)
123 |         if len(location_targets) == 0:
124 |             wrong += len(location_predictions)
125 |         else:
126 |             location_targets = location_targets.cpu().data.numpy()[0]
127 |             for location_target in location_targets:
128 |                 distances = [np.sqrt(np.sum(np.square(location_target - location_prediction/10.0))) for location_prediction in location_predictions]
129 |                 if len(distances) == 0:
130 |                     pass
131 |                     wrong += 1
132 |                 elif min(distances) > threshold:
133 |                     wrong += 1
134 |                 else:
135 |                     correct += 1
136 | 
137 |     print('Object: {} Accuracy: {}%'.format(class_name, correct/(wrong+correct)*100.0))
138 | 
139 | 
140 | if __name__ == '__main__':
141 | 
142 |     def main(args):
143 |         path_to_data_dir = args.path_to_data_dir
144 |         if not os.path.exists(path_to_data_dir):
145 |             raise FileNotFoundError(path_to_data_dir)
146 |         path_to_checkpoint = args.checkpoint
147 |         if not os.path.exists(path_to_checkpoint):
148 |             raise FileNotFoundError(path_to_data_dir)
149 |         class_name = args.class_name
150 |         img_prefix = args.img_prefix
151 |         _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix)
152 | 
153 |     parser = argparse.ArgumentParser()
154 |     parser.add_argument('-d', '--path_to_data_dir', default='/media/ssd_external/Bottle_dataset_split', help='path to data directory')
155 |     parser.add_argument('-class', '--class_name', dest="class_name", choices=["Jack_Daniels", "Jose_Cuervo"], default="Jack_Daniels", type=str, help='the class name of object')
156 |     parser.add_argument('-c', '--checkpoint', dest="checkpoint", required=True, type=str,
157 |                         help='the path of model checkpoint')
158 |     parser.add_argument('-prefix', '--img_prefix', dest="img_prefix", default="png", type=str,
159 |                         help='the image prefix')
160 |     main(parser.parse_args())
161 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | from network.rtpose_vgg import get_model, use_vgg
  2 | import cv2
  3 | import glob
  4 | import os
  5 | import torch
  6 | from utils import *
  7 | from config import Config
  8 | from argparse import ArgumentParser
  9 | from cuboid import Cuboid3d
 10 | from pyrr import Quaternion
 11 | import json
 12 | 
 13 | 
 14 | def main(args):
 15 |     output_dir = args.output_dir
 16 |     if output_dir:
 17 |         os.makedirs(output_dir, exist_ok=True)
 18 |     path_to_data_dir = args.path_to_data_dir
 19 |     if not os.path.exists(path_to_data_dir):
 20 |         raise FileNotFoundError(path_to_data_dir)
 21 |     path_to_checkpoint = args.checkpoint
 22 |     if not os.path.exists(path_to_checkpoint):
 23 |         raise FileNotFoundError(path_to_data_dir)
 24 |     class_name = args.class_name
 25 |     fps = args.fps
 26 |     img_prefix = args.img_prefix
 27 | 
 28 |     # load pre-trained model
 29 |     model = get_model(trunk='vgg19')
 30 |     model = model.cuda()
 31 |     use_vgg(model, './model', 'vgg19')
 32 |     print("=> Load pre-trained model from {}".format(path_to_checkpoint))
 33 |     model.load_state_dict(torch.load(path_to_checkpoint))
 34 |     model.eval()
 35 | 
 36 |     # parameter of object size for pnp solver
 37 |     print("=> Load {} object size".format(class_name))
 38 |     path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json')
 39 |     if not os.path.exists(path_to_object_seetings):
 40 |         raise FileNotFoundError(path_to_object_seetings)
 41 |     object_list = json.load(open(path_to_object_seetings))['exported_objects']
 42 |     object_size = None
 43 |     for obj in object_list:
 44 |         if obj['class'].find(class_name) != -1:
 45 |             object_size = obj['cuboid_dimensions']
 46 |     if not object_size:
 47 |         raise ValueError("Object size is none")
 48 |     _cuboid3d = Cuboid3d(object_size)
 49 |     cuboid3d_points = np.array(_cuboid3d.get_vertices())
 50 | 
 51 |     # parameter of camera for pnp solver
 52 |     path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json')
 53 |     if not os.path.exists(path_to_camera_seetings):
 54 |         raise FileNotFoundError(path_to_camera_seetings)
 55 |     intrinsic_settings = json.load(open(path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
 56 |     matrix_camera = np.zeros((3, 3))
 57 |     matrix_camera[0, 0] = intrinsic_settings['fx']
 58 |     matrix_camera[1, 1] = intrinsic_settings['fy']
 59 |     matrix_camera[0, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy'])
 60 |     matrix_camera[1, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy'])
 61 |     matrix_camera[2, 2] = 1
 62 |     try:
 63 |         dist_coeffs = np.array(json.load(open(path_to_camera_seetings))['camera_settings'][0]["distortion_coefficients"])
 64 |     except KeyError:
 65 |         dist_coeffs = np.zeros((4, 1))
 66 |     path_to_sequences = sorted(glob.glob(os.path.join(path_to_data_dir, '*.{}'.format(img_prefix))))
 67 | 
 68 |     for img_path in path_to_sequences:
 69 |         original_img = crop(cv2.imread(img_path))
 70 |         ratio = max(original_img.shape[:2]) / Config.crop_size
 71 |         img = cv2.resize(original_img, (Config.crop_size, Config.crop_size))
 72 |         img = preprocess(img).float()
 73 |         img = torch.unsqueeze(img, 0)
 74 |         out, _ = model(img.cuda())
 75 |         line, vertex = out[0].squeeze(), out[1].squeeze()
 76 |         objects, peaks = find_objects(vertex, line)
 77 |         original_img = cv2.putText(original_img, "Class name: {}".format(class_name), (50, 50),
 78 |                                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
 79 | 
 80 |         if len(objects) > 0:
 81 |             for object in objects:
 82 |                 cuboid2d_points = object[1] + [(object[0][0] * 8, object[0][1] * 8)]
 83 |                 cuboid3d_points = np.array(cuboid3d_points)
 84 |                 location = None
 85 |                 quaternion = None
 86 |                 obj_2d_points = []
 87 |                 obj_3d_points = []
 88 | 
 89 |                 for i in range(8):
 90 |                     check_point_2d = cuboid2d_points[i]
 91 |                     # Ignore invalid points
 92 |                     if check_point_2d is None:
 93 |                         continue
 94 |                     elif check_point_2d[0] < 0 or check_point_2d[1] < 0 or check_point_2d[0] >= Config.crop_size/Config.stride or check_point_2d[1] >= Config.crop_size/Config.stride:
 95 |                         continue
 96 |                     else:
 97 |                         check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio)
 98 |                     obj_2d_points.append(check_point_2d)
 99 |                     obj_3d_points.append(cuboid3d_points[i])
100 |                 centroid = tuple([int(point * Config.stride * ratio) for point in object[0]])
101 |                 original_img = cv2.circle(original_img, centroid, 5, -1)
102 |                 obj_2d_points = np.array(obj_2d_points, dtype=float)
103 |                 obj_3d_points = np.array(obj_3d_points, dtype=float)
104 |                 valid_point_count = len(obj_2d_points)
105 |                 if valid_point_count >= 5:
106 |                     ret, rvec, tvec = cv2.solvePnP(obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)
107 |                     if ret:
108 |                         location = list(x[0] for x in tvec)
109 |                         quaternion = convert_rvec_to_quaternion(rvec)
110 | 
111 |                         projected_points, _ = cv2.projectPoints(cuboid3d_points, rvec, tvec, matrix_camera,
112 |                                                                 dist_coeffs)
113 |                         projected_points = np.squeeze(projected_points)
114 |                         # If the location.Z is negative or object is behind the camera then flip both location and rotation
115 |                         x, y, z = location
116 |                         original_img = cv2.putText(original_img, "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}".format(x/10, y/10, z/10), (50, 150), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 2)
117 |                         print("Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}".format(x/10, y/10, z/10))
118 |                         if z < 0:
119 |                             # Get the opposite location
120 |                             location = [-x, -y, -z]
121 | 
122 |                             # Change the rotation by 180 degree
123 |                             rotate_angle = np.pi
124 |                             rotate_quaternion = Quaternion.from_axis_rotation(location, rotate_angle)
125 |                             quaternion = rotate_quaternion.cross(quaternion)
126 |                         vertexes = [tuple(p) for p in projected_points]
127 |                         plot(original_img, vertexes)
128 |             if args.save:
129 |                 if not os.path.exists(output_dir):
130 |                     os.makedirs(output_dir, exist_ok=True)
131 |                 output_path = os.path.join(output_dir, img_path.split('/')[-1])
132 |                 print('=> Save {}' .format(output_path))
133 |                 cv2.imwrite(output_path, original_img)
134 |             if args.plot:
135 |                 original_img = cv2.resize(original_img, (600, 600))
136 |                 cv2.imshow('prediction', original_img)
137 |                 cv2.waitKey(int(1000/fps))
138 | 
139 | 
140 | def plot(img, _vertexes, color=(255, 255, 0), scale=1):
141 |     vertexes = _vertexes.copy()
142 |     # plot
143 |     for point in vertexes:
144 |         if point:
145 |             point = tuple([int(p*scale) for p in point])
146 |             img = cv2.circle(img, point, 7, color, -1)
147 |     line_1 = [0, 1, 5, 4, 4, 0, 1, 5, 7, 3, 2, 6]
148 |     line_2 = [1, 5, 4, 0, 7, 3, 2, 6, 3, 2, 6, 7]
149 |     for p1, p2 in zip(line_1, line_2):
150 |         if vertexes[p1] and vertexes[p2]:
151 |             _p1 = tuple([int(p * scale) for p in vertexes[p1]])
152 |             _p2 = tuple([int(p * scale) for p in vertexes[p2]])
153 |             img = cv2.line(img, _p1, _p2, color=color, thickness=2)
154 |     if vertexes[4] and vertexes[3] and vertexes[8]:
155 |         center = tuple([int(p * scale) for p in vertexes[8]])
156 |         norm = tuple([int(((p1 + p2) // 2 * scale - c) * 10 + c) for p1, p2, c in zip(vertexes[4], vertexes[3], center)])
157 |         img = cv2.circle(img, norm, 7, color, 1)
158 |         img = cv2.line(img, center, norm, color=color, thickness=2)
159 | 
160 |     return img
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     parser = ArgumentParser(description="Semantic Segmentation parser")
165 |     parser.add_argument('-class', '--class_name', dest="class_name", choices=["Jack_Daniels", "Jose_Cuervo"], default="Jack_Daniels", type=str, help='the class name of object')
166 |     parser.add_argument('-d', '--path_to_data_dir', dest="path_to_data_dir", required=True, type=str, help='the path of rgb image')
167 |     parser.add_argument('-c', '--checkpoint', dest="checkpoint", required=True, type=str,
168 |                         help='the path of model checkpoint')
169 |     parser.add_argument('-o', '--output_dir', dest="output_dir", default=None, type=str,
170 |                         help='the path of output folder')
171 |     parser.add_argument('-f', '--fps', dest="fps", default=5, type=int,
172 |                         help='FPS of input sequence')
173 |     parser.add_argument('-s', '--save', dest="save", action='store_true')
174 |     parser.add_argument('-p', '--plot', dest="plot", action='store_true')
175 |     parser.add_argument('-prefix', '--img_prefix', dest="img_prefix", default="jpg", type=str,
176 |                         help='the image prefix')
177 |     main(parser.parse_args())
178 | 
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from config import Config
  3 | from torchvision import transforms
  4 | from scipy.ndimage.filters import gaussian_filter
  5 | from pyrr import Quaternion
  6 | import cv2
  7 | 
  8 | 
  9 | def preprocess(img):
 10 |     transform = transforms.Compose([
 11 |         transforms.ToTensor(),
 12 |         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 13 |     ])
 14 | 
 15 |     return transform(img)
 16 | 
 17 | 
 18 | def crop(img):
 19 |     h, w = img.shape[:2]
 20 |     m = max(h, w)
 21 |     top = (m - h) // 2
 22 |     bottom = (m - h) // 2
 23 |     if top + bottom + h < m:
 24 |         bottom += 1
 25 |     left = (m - w) // 2
 26 |     right = (m - w) // 2
 27 |     if left + right + w < m:
 28 |         right += 1
 29 |     pad_image = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
 30 | 
 31 |     return pad_image
 32 | 
 33 | 
 34 | def generate_gaussianmap(center, accumulate_confid_map):
 35 |     crop_size = Config.crop_size
 36 |     stride = Config.stride
 37 |     sigma = Config.sigma
 38 | 
 39 |     grid = crop_size / stride
 40 |     start = stride / 2.0 - 0.5
 41 |     x_range = [i for i in range(int(grid))]
 42 |     y_range = [i for i in range(int(grid))]
 43 |     xx, yy = np.meshgrid(x_range, y_range)
 44 |     xx = xx * stride + start
 45 |     yy = yy * stride + start
 46 |     d2 = (xx - center[0]) ** 2 + (yy - center[1]) ** 2
 47 |     exponent = d2 / 2.0 / sigma / sigma
 48 |     mask = exponent <= 4.6052
 49 |     cofid_map = np.exp(-exponent)
 50 |     cofid_map = np.multiply(mask, cofid_map)
 51 |     accumulate_confid_map += cofid_map
 52 |     accumulate_confid_map[accumulate_confid_map > 1.0] = 1.0
 53 |     return accumulate_confid_map
 54 | 
 55 | 
 56 | def generate_vecmap(centerA, centerB, accumulate_vec_map, count):
 57 |     centerA = centerA.astype(float)
 58 |     centerB = centerB.astype(float)
 59 |     crop_size = Config.crop_size
 60 |     stride = Config.stride
 61 |     grid = crop_size / stride
 62 | 
 63 |     thre = Config.vec_width
 64 |     centerB = centerB / stride
 65 |     centerA = centerA / stride
 66 | 
 67 |     limb_vec = centerB - centerA
 68 |     norm = np.linalg.norm(limb_vec)
 69 |     if (norm == 0.0):
 70 |         return accumulate_vec_map, count
 71 |     limb_vec_unit = limb_vec / norm
 72 | 
 73 |     # To make sure not beyond the border of this two points
 74 |     min_x = max(int(round(min(centerA[0], centerB[0]) - thre)), 0)
 75 |     max_x = min(int(round(max(centerA[0], centerB[0]) + thre)), grid)
 76 |     min_y = max(int(round(min(centerA[1], centerB[1]) - thre)), 0)
 77 |     max_y = min(int(round(max(centerA[1], centerB[1]) + thre)), grid)
 78 |     range_x = list(range(int(min_x), int(max_x), 1))
 79 |     range_y = list(range(int(min_y), int(max_y), 1))
 80 |     xx, yy = np.meshgrid(range_x, range_y)
 81 |     ba_x = xx - centerA[0]  # the vector from (x,y) to centerA
 82 |     ba_y = yy - centerA[1]
 83 |     limb_width = np.abs(ba_x * limb_vec_unit[1] - ba_y * limb_vec_unit[0])
 84 |     mask = limb_width < thre  # mask is 2D
 85 | 
 86 |     vec_map = np.copy(accumulate_vec_map) * 0.0
 87 |     vec_map[yy, xx] = np.repeat(mask[:, :, np.newaxis], 2, axis=2)
 88 |     vec_map[yy, xx] *= limb_vec_unit[np.newaxis, np.newaxis, :]
 89 |     mask = np.logical_or.reduce(
 90 |         (np.abs(vec_map[:, :, 0]) > 0, np.abs(vec_map[:, :, 1]) > 0))
 91 | 
 92 |     accumulate_vec_map = np.multiply(
 93 |         accumulate_vec_map, count[:, :, np.newaxis])
 94 |     accumulate_vec_map += vec_map
 95 |     count[mask == True] += 1
 96 | 
 97 |     mask = count == 0
 98 | 
 99 |     count[mask == True] = 1
100 | 
101 |     accumulate_vec_map = np.divide(accumulate_vec_map, count[:, :, np.newaxis])
102 |     count[mask == True] = 0
103 | 
104 |     return accumulate_vec_map, count
105 | 
106 | 
107 | def find_objects(vertex2, aff, numvertex=8):
108 |     '''Detects objects given network belief maps and affinities, using heuristic method'''
109 | 
110 |     all_peaks = []
111 |     peak_counter = 0
112 |     for j in range(vertex2.size()[0]):
113 |         belief = vertex2[j].clone()
114 |         map_ori = belief.cpu().data.numpy()
115 |         map = gaussian_filter(belief.cpu().data.numpy(), sigma=Config.test_sigma)
116 |         p = 1
117 |         map_left = np.zeros(map.shape)
118 |         map_left[p:, :] = map[:-p, :]
119 |         map_right = np.zeros(map.shape)
120 |         map_right[:-p, :] = map[p:, :]
121 |         map_up = np.zeros(map.shape)
122 |         map_up[:, p:] = map[:, :-p]
123 |         map_down = np.zeros(map.shape)
124 |         map_down[:, :-p] = map[:, p:]
125 | 
126 |         peaks_binary = np.logical_and.reduce(
127 |             (
128 |                 map >= map_left,
129 |                 map >= map_right,
130 |                 map >= map_up,
131 |                 map >= map_down,
132 |                 map > Config.thresh_map)
133 |         )
134 |         peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])
135 | 
136 |         # Computing the weigthed average for localizing the peaks
137 |         peaks = list(peaks)
138 | 
139 |         win = 5
140 |         ran = win // 2
141 |         peaks_avg = []
142 |         for p_value in range(len(peaks)):
143 |             p = peaks[p_value]
144 |             weights = np.zeros((win, win))
145 |             i_values = np.zeros((win, win))
146 |             j_values = np.zeros((win, win))
147 |             for i in range(-ran, ran + 1):
148 |                 for j in range(-ran, ran + 1):
149 |                     if p[1] + i < 0 \
150 |                             or p[1] + i >= map_ori.shape[0] \
151 |                             or p[0] + j < 0 \
152 |                             or p[0] + j >= map_ori.shape[1]:
153 |                         continue
154 | 
155 |                     i_values[j + ran, i + ran] = p[1] + i
156 |                     j_values[j + ran, i + ran] = p[0] + j
157 | 
158 |                     weights[j + ran, i + ran] = (map_ori[p[1] + i, p[0] + j])
159 | 
160 |             # if the weights are all zeros
161 |             # then add the none continuous points
162 |             OFFSET_DUE_TO_UPSAMPLING = 0.4395
163 |             try:
164 |                 peaks_avg.append(
165 |                     (np.average(j_values, weights=weights) + OFFSET_DUE_TO_UPSAMPLING,
166 |                      np.average(i_values, weights=weights) + OFFSET_DUE_TO_UPSAMPLING))
167 |             except:
168 |                 peaks_avg.append((p[0] + OFFSET_DUE_TO_UPSAMPLING, p[1] + OFFSET_DUE_TO_UPSAMPLING))
169 |         # Note: Python3 doesn't support len for zip object
170 |         peaks_len = min(len(np.nonzero(peaks_binary)[1]), len(np.nonzero(peaks_binary)[0]))
171 | 
172 |         peaks_with_score = [peaks_avg[x_] + (map_ori[peaks[x_][1], peaks[x_][0]],) for x_ in range(len(peaks))]
173 | 
174 |         id = range(peak_counter, peak_counter + peaks_len)
175 | 
176 |         peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
177 | 
178 |         all_peaks.append(peaks_with_score_and_id)
179 |         peak_counter += peaks_len
180 | 
181 |     objects = []
182 | 
183 |     # Check object centroid and build the objects if the centroid is found
184 |     for nb_object in range(len(all_peaks[-1])):
185 |         if all_peaks[-1][nb_object][2] > Config.thresh_points:
186 |             objects.append([
187 |                 [all_peaks[-1][nb_object][:2][0], all_peaks[-1][nb_object][:2][1]],
188 |                 [None for i in range(numvertex)],
189 |                 [None for i in range(numvertex)],
190 |                 all_peaks[-1][nb_object][2]
191 |             ])
192 | 
193 |     # Working with an output that only has belief maps
194 |     if aff is None:
195 |         if len(objects) > 0 and len(all_peaks) > 0 and len(all_peaks[0]) > 0:
196 |             for i_points in range(8):
197 |                 if len(all_peaks[i_points]) > 0 and all_peaks[i_points][0][2] > Config.threshold:
198 |                     objects[0][1][i_points] = (all_peaks[i_points][0][0], all_peaks[i_points][0][1])
199 |     else:
200 |         # For all points found
201 |         for i_lists in range(len(all_peaks[:-1])):
202 |             lists = all_peaks[i_lists]
203 | 
204 |             for candidate in lists:
205 |                 if candidate[2] < Config.thresh_points:
206 |                     continue
207 | 
208 |                 i_best = -1
209 |                 best_dist = 10000
210 |                 best_angle = 100
211 |                 for i_obj in range(len(objects)):
212 |                     center = [objects[i_obj][0][0], objects[i_obj][0][1]]
213 | 
214 |                     # integer is used to look into the affinity map,
215 |                     # but the float version is used to run
216 |                     point_int = [int(candidate[0]), int(candidate[1])]
217 |                     point = [candidate[0], candidate[1]]
218 | 
219 |                     # look at the distance to the vector field.
220 |                     v_aff = np.array([
221 |                         aff[i_lists * 2,
222 |                             point_int[1],
223 |                             point_int[0]].data.item(),
224 |                         aff[i_lists * 2 + 1,
225 |                             point_int[1],
226 |                             point_int[0]].data.item()]) * 10
227 | 
228 |                     # normalize the vector
229 |                     xvec = v_aff[0]
230 |                     yvec = v_aff[1]
231 | 
232 |                     norms = np.sqrt(xvec * xvec + yvec * yvec)
233 | 
234 |                     xvec /= norms
235 |                     yvec /= norms
236 | 
237 |                     v_aff = np.concatenate([[xvec], [yvec]])
238 | 
239 |                     v_center = np.array(center) - np.array(point)
240 |                     xvec = v_center[0]
241 |                     yvec = v_center[1]
242 | 
243 |                     norms = np.sqrt(xvec * xvec + yvec * yvec)
244 | 
245 |                     xvec /= norms
246 |                     yvec /= norms
247 | 
248 |                     v_center = np.concatenate([[xvec], [yvec]])
249 | 
250 |                     # vector affinity
251 |                     dist_angle = np.linalg.norm(v_center - v_aff)
252 | 
253 |                     # distance between vertexes
254 |                     dist_point = np.linalg.norm(np.array(point) - np.array(center))
255 | 
256 |                     if dist_angle < Config.thresh_angle \
257 |                             and best_dist > 1000 \
258 |                             or dist_angle < Config.thresh_angle \
259 |                             and best_dist > dist_point:
260 |                         i_best = i_obj
261 |                         best_angle = dist_angle
262 |                         best_dist = dist_point
263 | 
264 |                 if i_best is -1:
265 |                     continue
266 | 
267 |                 if objects[i_best][1][i_lists] is None \
268 |                         or best_angle < Config.thresh_angle \
269 |                         and best_dist < objects[i_best][2][i_lists][1]:
270 |                     objects[i_best][1][i_lists] = (candidate[0], candidate[1])
271 |                     objects[i_best][2][i_lists] = (best_angle, best_dist)
272 | 
273 |     return objects, all_peaks
274 | 
275 | 
276 | def convert_rvec_to_quaternion(rvec):
277 |     '''Convert rvec (which is log quaternion) to quaternion'''
278 |     theta = np.sqrt(rvec[0] * rvec[0] + rvec[1] * rvec[1] + rvec[2] * rvec[2])  # in radians
279 |     raxis = [rvec[0] / theta, rvec[1] / theta, rvec[2] / theta]
280 | 
281 |     # pyrr's Quaternion (order is XYZW), https://pyrr.readthedocs.io/en/latest/oo_api_quaternion.html
282 | 
283 |     return Quaternion.from_axis_rotation(raxis, theta)
284 | 


--------------------------------------------------------------------------------
/network/rtpose_vgg.py:
--------------------------------------------------------------------------------
  1 | """CPM Pytorch Implementation"""
  2 | 
  3 | from collections import OrderedDict
  4 | import os
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import torch.utils.data as data
  9 | import torch.utils.model_zoo as model_zoo
 10 | from torch.autograd import Variable
 11 | from torch.nn import init
 12 | from config import Config
 13 | 
 14 | 
 15 | def make_stages(cfg_dict):
 16 |     """Builds CPM stages from a dictionary
 17 |     Args:
 18 |         cfg_dict: a dictionary
 19 |     """
 20 |     layers = []
 21 |     for i in range(len(cfg_dict) - 1):
 22 |         one_ = cfg_dict[i]
 23 |         for k, v in one_.items():
 24 |             if 'pool' in k:
 25 |                 layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1],
 26 |                                         padding=v[2])]
 27 |             else:
 28 |                 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
 29 |                                    kernel_size=v[2], stride=v[3],
 30 |                                    padding=v[4])
 31 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 32 |     one_ = list(cfg_dict[-1].keys())
 33 |     k = one_[0]
 34 |     v = cfg_dict[-1][k]
 35 |     conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
 36 |                        kernel_size=v[2], stride=v[3], padding=v[4])
 37 |     layers += [conv2d]
 38 |     return nn.Sequential(*layers)
 39 | 
 40 | 
 41 | def make_vgg19_block(block):
 42 |     """Builds a vgg19 block from a dictionary
 43 |     Args:
 44 |         block: a dictionary
 45 |     """
 46 |     layers = []
 47 |     for i in range(len(block)):
 48 |         one_ = block[i]
 49 |         for k, v in one_.items():
 50 |             if 'pool' in k:
 51 |                 layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1],
 52 |                                         padding=v[2])]
 53 |             else:
 54 |                 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
 55 |                                    kernel_size=v[2], stride=v[3],
 56 |                                    padding=v[4])
 57 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 58 |     return nn.Sequential(*layers)
 59 | 
 60 | 
 61 | 
 62 | def get_model(trunk='vgg19'):
 63 |     """Creates the whole CPM model
 64 |     Args:
 65 |         trunk: string, 'vgg19' or 'mobilenet'
 66 |     Returns: Module, the defined model
 67 |     """
 68 |     blocks = {}
 69 |     # block0 is the preprocessing stage
 70 |     if trunk == 'vgg19':
 71 |         block0 = [{'conv1_1': [3, 64, 3, 1, 1]},
 72 |                   {'conv1_2': [64, 64, 3, 1, 1]},
 73 |                   {'pool1_stage1': [2, 2, 0]},
 74 |                   {'conv2_1': [64, 128, 3, 1, 1]},
 75 |                   {'conv2_2': [128, 128, 3, 1, 1]},
 76 |                   {'pool2_stage1': [2, 2, 0]},
 77 |                   {'conv3_1': [128, 256, 3, 1, 1]},
 78 |                   {'conv3_2': [256, 256, 3, 1, 1]},
 79 |                   {'conv3_3': [256, 256, 3, 1, 1]},
 80 |                   {'conv3_4': [256, 256, 3, 1, 1]},
 81 |                   {'pool3_stage1': [2, 2, 0]},
 82 |                   {'conv4_1': [256, 512, 3, 1, 1]},
 83 |                   {'conv4_2': [512, 512, 3, 1, 1]},
 84 |                   {'conv4_3_CPM': [512, 256, 3, 1, 1]},
 85 |                   {'conv4_4_CPM': [256, 128, 3, 1, 1]}]
 86 | 
 87 |     elif trunk == 'mobilenet':
 88 |         block0 = [{'conv_bn': [3, 32, 2]},  # out: 3, 32, 184, 184
 89 |                   {'conv_dw1': [32, 64, 1]},  # out: 32, 64, 184, 184
 90 |                   {'conv_dw2': [64, 128, 2]},  # out: 64, 128, 92, 92
 91 |                   {'conv_dw3': [128, 128, 1]},  # out: 128, 256, 92, 92
 92 |                   {'conv_dw4': [128, 256, 2]},  # out: 256, 256, 46, 46
 93 |                   {'conv4_3_CPM': [256, 256, 1, 3, 1]},
 94 |                   {'conv4_4_CPM': [256, 128, 1, 3, 1]}]
 95 | 
 96 |     # Stage 1
 97 |     blocks['block1_1'] = [{'conv5_1_CPM_L1': [128, 128, 3, 1, 1]},
 98 |                           {'conv5_2_CPM_L1': [128, 128, 3, 1, 1]},
 99 |                           {'conv5_3_CPM_L1': [128, 128, 3, 1, 1]},
100 |                           {'conv5_4_CPM_L1': [128, 512, 1, 1, 0]},
101 |                           {'conv5_5_CPM_L1': [512, 16, 1, 1, 0]}]
102 | 
103 |     blocks['block1_2'] = [{'conv5_1_CPM_L2': [128, 128, 3, 1, 1]},
104 |                           {'conv5_2_CPM_L2': [128, 128, 3, 1, 1]},
105 |                           {'conv5_3_CPM_L2': [128, 128, 3, 1, 1]},
106 |                           {'conv5_4_CPM_L2': [128, 512, 1, 1, 0]},
107 |                           {'conv5_5_CPM_L2': [512, 9, 1, 1, 0]}]
108 | 
109 |     # Stages 2 - 6
110 |     for i in range(2, 7):
111 |         blocks['block%d_1' % i] = [
112 |             {'Mconv1_stage%d_L1' % i: [153, 128, 7, 1, 3]},
113 |             {'Mconv2_stage%d_L1' % i: [128, 128, 7, 1, 3]},
114 |             {'Mconv3_stage%d_L1' % i: [128, 128, 7, 1, 3]},
115 |             {'Mconv4_stage%d_L1' % i: [128, 128, 7, 1, 3]},
116 |             {'Mconv5_stage%d_L1' % i: [128, 128, 7, 1, 3]},
117 |             {'Mconv6_stage%d_L1' % i: [128, 128, 1, 1, 0]},
118 |             {'Mconv7_stage%d_L1' % i: [128, 16, 1, 1, 0]}
119 |         ]
120 | 
121 |         blocks['block%d_2' % i] = [
122 |             {'Mconv1_stage%d_L2' % i: [153, 128, 7, 1, 3]},
123 |             {'Mconv2_stage%d_L2' % i: [128, 128, 7, 1, 3]},
124 |             {'Mconv3_stage%d_L2' % i: [128, 128, 7, 1, 3]},
125 |             {'Mconv4_stage%d_L2' % i: [128, 128, 7, 1, 3]},
126 |             {'Mconv5_stage%d_L2' % i: [128, 128, 7, 1, 3]},
127 |             {'Mconv6_stage%d_L2' % i: [128, 128, 1, 1, 0]},
128 |             {'Mconv7_stage%d_L2' % i: [128, 9, 1, 1, 0]}
129 |         ]
130 | 
131 |     models = {}
132 | 
133 |     if trunk == 'vgg19':
134 |         print("Bulding VGG19")
135 |         models['block0'] = make_vgg19_block(block0)
136 | 
137 |     for k, v in blocks.items():
138 |         models[k] = make_stages(list(v))
139 | 
140 |     class rtpose_model(nn.Module):
141 |         def __init__(self, model_dict):
142 |             super(rtpose_model, self).__init__()
143 |             self.model0 = model_dict['block0']
144 |             self.model1_1 = model_dict['block1_1']
145 |             self.model2_1 = model_dict['block2_1']
146 |             self.model3_1 = model_dict['block3_1']
147 |             self.model4_1 = model_dict['block4_1']
148 |             self.model5_1 = model_dict['block5_1']
149 |             self.model6_1 = model_dict['block6_1']
150 | 
151 |             self.model1_2 = model_dict['block1_2']
152 |             self.model2_2 = model_dict['block2_2']
153 |             self.model3_2 = model_dict['block3_2']
154 |             self.model4_2 = model_dict['block4_2']
155 |             self.model5_2 = model_dict['block5_2']
156 |             self.model6_2 = model_dict['block6_2']
157 | 
158 |             self._initialize_weights_norm()
159 | 
160 |         def forward(self, x):
161 | 
162 |             saved_for_loss = []
163 |             out1 = self.model0(x)
164 | 
165 |             out1_1 = self.model1_1(out1)
166 |             out1_2 = self.model1_2(out1)
167 |             out2 = torch.cat([out1_1, out1_2, out1], 1)
168 |             saved_for_loss.append(out1_1)
169 |             saved_for_loss.append(out1_2)
170 |             out = (out1_1, out1_2)
171 | 
172 |             if Config.output_stage > 1:
173 |                 out2_1 = self.model2_1(out2)
174 |                 out2_2 = self.model2_2(out2)
175 |                 out3 = torch.cat([out2_1, out2_2, out1], 1)
176 |                 saved_for_loss.append(out2_1)
177 |                 saved_for_loss.append(out2_2)
178 |                 out = (out2_1, out2_2)
179 | 
180 |             if Config.output_stage > 2:
181 |                 out3_1 = self.model3_1(out3)
182 |                 out3_2 = self.model3_2(out3)
183 |                 out4 = torch.cat([out3_1, out3_2, out1], 1)
184 |                 saved_for_loss.append(out3_1)
185 |                 saved_for_loss.append(out3_2)
186 |                 out = (out3_1, out3_2)
187 | 
188 |             if Config.output_stage > 3:
189 |                 out4_1 = self.model4_1(out4)
190 |                 out4_2 = self.model4_2(out4)
191 |                 out5 = torch.cat([out4_1, out4_2, out1], 1)
192 |                 saved_for_loss.append(out4_1)
193 |                 saved_for_loss.append(out4_2)
194 |                 out = (out4_1, out4_2)
195 | 
196 |             if Config.output_stage > 4:
197 |                 out5_1 = self.model5_1(out5)
198 |                 out5_2 = self.model5_2(out5)
199 |                 out6 = torch.cat([out5_1, out5_2, out1], 1)
200 |                 saved_for_loss.append(out5_1)
201 |                 saved_for_loss.append(out5_2)
202 |                 out = (out5_1, out5_2)
203 | 
204 |             if Config.output_stage > 5:
205 |                 out6_1 = self.model6_1(out6)
206 |                 out6_2 = self.model6_2(out6)
207 |                 saved_for_loss.append(out6_1)
208 |                 saved_for_loss.append(out6_2)
209 |                 out = (out6_1, out6_2)
210 | 
211 |             return out, saved_for_loss
212 | 
213 |         def _initialize_weights_norm(self):
214 | 
215 |             for m in self.modules():
216 |                 if isinstance(m, nn.Conv2d):
217 |                     init.normal_(m.weight, std=0.01)
218 |                     if m.bias is not None:  # mobilenet conv2d doesn't add bias
219 |                         init.constant_(m.bias, 0.0)
220 | 
221 |             # last layer of these block don't have Relu
222 |             init.normal_(self.model1_1[8].weight, std=0.01)
223 |             init.normal_(self.model1_2[8].weight, std=0.01)
224 | 
225 |             init.normal_(self.model2_1[12].weight, std=0.01)
226 |             init.normal_(self.model3_1[12].weight, std=0.01)
227 |             init.normal_(self.model4_1[12].weight, std=0.01)
228 |             init.normal_(self.model5_1[12].weight, std=0.01)
229 |             init.normal_(self.model6_1[12].weight, std=0.01)
230 | 
231 |             init.normal_(self.model2_2[12].weight, std=0.01)
232 |             init.normal_(self.model3_2[12].weight, std=0.01)
233 |             init.normal_(self.model4_2[12].weight, std=0.01)
234 |             init.normal_(self.model5_2[12].weight, std=0.01)
235 |             init.normal_(self.model6_2[12].weight, std=0.01)
236 | 
237 |     model = rtpose_model(models)
238 |     return model
239 | 
240 | 
241 | """Load pretrained model on Imagenet
242 | :param model, the PyTorch nn.Module which will train.
243 | :param model_path, the directory which load the pretrained model, will download one if not have.
244 | :param trunk, the feature extractor network of model.               
245 | """
246 | 
247 | 
248 | def use_vgg(model, model_path, trunk):
249 |     model_urls = {
250 |         'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
251 |         'ssd': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth',
252 |         'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'}
253 | 
254 |     number_weight = {
255 |         'vgg16': 18,
256 |         'ssd': 18,
257 |         'vgg19': 20}
258 | 
259 |     url = model_urls[trunk]
260 | 
261 |     if trunk == 'ssd':
262 |         urllib.urlretrieve('https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth',
263 |                            os.path.join(model_path, 'ssd.pth'))
264 |         vgg_state_dict = torch.load(os.path.join(model_path, 'ssd.pth'))
265 |         print('loading SSD')
266 |     else:
267 |         vgg_state_dict = model_zoo.load_url(url, model_dir=model_path)
268 |     vgg_keys = vgg_state_dict.keys()
269 | 
270 |     # load weights of vgg
271 |     weights_load = {}
272 |     # weight+bias,weight+bias.....(repeat 10 times)
273 |     for i in range(number_weight[trunk]):
274 |         weights_load[list(model.state_dict().keys())[i]
275 |                      ] = vgg_state_dict[list(vgg_keys)[i]]
276 | 
277 |     state = model.state_dict()
278 |     state.update(weights_load)
279 |     model.load_state_dict(state)
280 | 


--------------------------------------------------------------------------------