├── FaceBoxes
    ├── .gitignore
    ├── FaceBoxes.py
    ├── __init__.py
    ├── build_cpu_nms.sh
    ├── models
    │   └── faceboxes.py
    ├── readme.md
    ├── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── box_utils.py
    │   ├── build.py
    │   ├── config.py
    │   ├── functions.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── cpu_nms.pyx
    │   │   └── py_cpu_nms.py
    │   ├── nms_wrapper.py
    │   ├── prior_box.py
    │   └── timer.py
    └── weights
    │   ├── FaceBoxesProd.pth
    │   └── readme.md
├── LICENSE
├── README.md
├── Sim3DR
    ├── .gitignore
    ├── Sim3DR.py
    ├── __init__.py
    ├── _init_paths.py
    ├── build_sim3dr.sh
    ├── lib
    │   ├── rasterize.h
    │   ├── rasterize.pyx
    │   └── rasterize_kernel.cpp
    ├── lighting.py
    ├── readme.md
    ├── setup.py
    └── tests
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── io.cpp
    │   ├── io.h
    │   └── test.cpp
├── artistic.py
├── backbone_nets
    ├── ResNeSt
    │   ├── __init__.py
    │   ├── ablation.py
    │   ├── resnest.py
    │   ├── resnet.py
    │   └── splat.py
    ├── ghostnet_backbone.py
    ├── mobilenetv1_backbone.py
    ├── mobilenetv2_backbone.py
    ├── pointnet_backbone.py
    └── resnet_backbone.py
├── benchmark.py
├── benchmark_aflw2000.py
├── benchmark_validate.py
├── demo
    ├── 0.png
    ├── 1.png
    ├── 10.png
    ├── 11.png
    ├── 12.png
    ├── 2.png
    ├── 3.png
    ├── 4.png
    ├── 5.png
    ├── 6.png
    ├── 7.png
    ├── 8.png
    ├── 9.png
    ├── AF-1.png
    ├── AF-2.png
    ├── alignment.png
    ├── comparison-deca.png
    ├── demo.gif
    ├── multiple.png
    ├── orientation.png
    ├── single.png
    └── teaser.png
├── img
    ├── sample_1.jpg
    ├── sample_2.jpg
    ├── sample_3.jpg
    └── sample_4.jpg
├── loss_definition.py
├── main_train.py
├── model_building.py
├── pretrained
    └── __init__.py
├── setup.py
├── singleImage.py
├── singleImage_simple.py
├── synergy3DMM.py
├── synergy_demo.ipynb
├── train_script.sh
├── utils
    ├── __init__.py
    ├── ddfa.py
    ├── inference.py
    ├── io.py
    ├── params.py
    └── render.py
└── uv_texture_realFaces.py


/FaceBoxes/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__
3 | **/__pycache__


--------------------------------------------------------------------------------
/FaceBoxes/FaceBoxes.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import os.path as osp
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | from .utils.prior_box import PriorBox
 10 | from .utils.nms_wrapper import nms
 11 | from .utils.box_utils import decode
 12 | from .utils.timer import Timer
 13 | from .utils.functions import check_keys, remove_prefix, load_model
 14 | from .utils.config import cfg
 15 | from .models.faceboxes import FaceBoxesNet
 16 | 
 17 | # some global configs
 18 | confidence_threshold = 0.05
 19 | top_k = 5000
 20 | keep_top_k = 750
 21 | nms_threshold = 0.3
 22 | vis_thres = 0.5 #0.5
 23 | resize = 1
 24 | 
 25 | scale_flag = True
 26 | HEIGHT, WIDTH = 720, 1080
 27 | 
 28 | make_abs_path = lambda fn: osp.join(osp.dirname(osp.realpath(__file__)), fn)
 29 | pretrained_path = make_abs_path('weights/FaceBoxesProd.pth')
 30 | 
 31 | 
 32 | def viz_bbox(img, dets, wfp='out.jpg'):
 33 |     # show
 34 |     for b in dets:
 35 |         if b[4] < vis_thres:
 36 |             continue
 37 |         text = "{:.4f}".format(b[4])
 38 |         b = list(map(int, b))
 39 |         cv2.rectangle(img, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
 40 |         cx = b[0]
 41 |         cy = b[1] + 12
 42 |         cv2.putText(img, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
 43 |     cv2.imwrite(wfp, img)
 44 |     print(f'Viz bbox to {wfp}')
 45 | 
 46 | 
 47 | class FaceBoxes:
 48 |     def __init__(self, timer_flag=False):
 49 |         net = FaceBoxesNet(phase='test', size=None, num_classes=2)  # initialize detector
 50 |         self.net = load_model(net, pretrained_path=pretrained_path, load_to_cpu=True)
 51 |         self.net.eval()
 52 | 
 53 |         for p in self.net.parameters():
 54 |             p.requires_grad_(False)
 55 | 
 56 |         # print('Finished loading model!')
 57 | 
 58 |         self.timer_flag = timer_flag
 59 | 
 60 |     def __call__(self, img_):
 61 |         img_raw = img_.copy()
 62 | 
 63 |         # scaling to speed up
 64 |         scale = 1
 65 |         if scale_flag:
 66 |             h, w = img_raw.shape[:2]
 67 |             if h > HEIGHT:
 68 |                 scale = HEIGHT / h
 69 |             if w * scale > WIDTH:
 70 |                 scale *= WIDTH / (w * scale)
 71 |             # print(scale)
 72 |             if scale == 1:
 73 |                 img_raw_scale = img_raw
 74 |             else:
 75 |                 h_s = int(scale * h)
 76 |                 w_s = int(scale * w)
 77 |                 # print(h_s, w_s)
 78 |                 img_raw_scale = cv2.resize(img_raw, dsize=(w_s, h_s))
 79 |                 # print(img_raw_scale.shape)
 80 | 
 81 |             img = np.float32(img_raw_scale)
 82 |         else:
 83 |             img = np.float32(img_raw)
 84 | 
 85 |         # forward
 86 |         _t = {'forward_pass': Timer(), 'misc': Timer()}
 87 |         im_height, im_width, _ = img.shape
 88 |         scale_bbox = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
 89 |         img -= (104, 117, 123)
 90 |         img = img.transpose(2, 0, 1)
 91 |         img = torch.from_numpy(img).unsqueeze(0)
 92 | 
 93 |         _t['forward_pass'].tic()
 94 |         loc, conf = self.net(img)  # forward pass
 95 |         _t['forward_pass'].toc()
 96 |         _t['misc'].tic()
 97 |         priorbox = PriorBox(image_size=(im_height, im_width))
 98 |         priors = priorbox.forward()
 99 |         prior_data = priors.data
100 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
101 |         if scale_flag:
102 |             boxes = boxes * scale_bbox / scale / resize
103 |         else:
104 |             boxes = boxes * scale_bbox / resize
105 | 
106 |         boxes = boxes.cpu().numpy()
107 |         scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
108 | 
109 |         # ignore low scores
110 |         inds = np.where(scores > confidence_threshold)[0]
111 |         boxes = boxes[inds]
112 |         scores = scores[inds]
113 | 
114 |         # keep top-K before NMS
115 |         order = scores.argsort()[::-1][:top_k]
116 |         boxes = boxes[order]
117 |         scores = scores[order]
118 | 
119 |         # do NMS
120 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
121 |         # keep = py_cpu_nms(dets, args.nms_threshold)
122 |         keep = nms(dets, nms_threshold)
123 |         dets = dets[keep, :]
124 | 
125 |         # keep top-K faster NMS
126 |         dets = dets[:keep_top_k, :]
127 |         _t['misc'].toc()
128 | 
129 |         if self.timer_flag:
130 |             print('Detection: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'.format(1, 1, _t[
131 |                 'forward_pass'].average_time, _t['misc'].average_time))
132 | 
133 |         # filter using vis_thres
134 |         det_bboxes = []
135 |         for b in dets:
136 |             if b[4] > vis_thres:
137 |                 xmin, ymin, xmax, ymax, score = b[0], b[1], b[2], b[3], b[4]
138 |                 w = xmax - xmin + 1
139 |                 h = ymax - ymin + 1
140 |                 bbox = [xmin, ymin, xmax, ymax, score]
141 |                 det_bboxes.append(bbox)
142 | 
143 |         return det_bboxes
144 | 
145 | 
146 | def main():
147 |     face_boxes = FaceBoxes(timer_flag=True)
148 | 
149 |     fn = 'trump_hillary.jpg'
150 |     img_fp = f'../examples/inputs/{fn}'
151 |     img = cv2.imread(img_fp)
152 |     dets = face_boxes(img)  # xmin, ymin, w, h
153 |     # print(dets)
154 | 
155 |     wfn = fn.replace('.jpg', '_det.jpg')
156 |     wfp = osp.join('../examples/results', wfn)
157 |     viz_bbox(img, dets, wfp)
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     main()
162 | 


--------------------------------------------------------------------------------
/FaceBoxes/__init__.py:
--------------------------------------------------------------------------------
1 | from .FaceBoxes import FaceBoxes
2 | 


--------------------------------------------------------------------------------
/FaceBoxes/build_cpu_nms.sh:
--------------------------------------------------------------------------------
1 | cd utils
2 | python3 build.py build_ext --inplace
3 | cd ..


--------------------------------------------------------------------------------
/FaceBoxes/models/faceboxes.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class BasicConv2d(nn.Module):
  9 | 
 10 |     def __init__(self, in_channels, out_channels, **kwargs):
 11 |         super(BasicConv2d, self).__init__()
 12 |         self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
 13 |         self.bn = nn.BatchNorm2d(out_channels, eps=1e-5)
 14 | 
 15 |     def forward(self, x):
 16 |         x = self.conv(x)
 17 |         x = self.bn(x)
 18 |         return F.relu(x, inplace=True)
 19 | 
 20 | 
 21 | class Inception(nn.Module):
 22 |     def __init__(self):
 23 |         super(Inception, self).__init__()
 24 |         self.branch1x1 = BasicConv2d(128, 32, kernel_size=1, padding=0)
 25 |         self.branch1x1_2 = BasicConv2d(128, 32, kernel_size=1, padding=0)
 26 |         self.branch3x3_reduce = BasicConv2d(128, 24, kernel_size=1, padding=0)
 27 |         self.branch3x3 = BasicConv2d(24, 32, kernel_size=3, padding=1)
 28 |         self.branch3x3_reduce_2 = BasicConv2d(128, 24, kernel_size=1, padding=0)
 29 |         self.branch3x3_2 = BasicConv2d(24, 32, kernel_size=3, padding=1)
 30 |         self.branch3x3_3 = BasicConv2d(32, 32, kernel_size=3, padding=1)
 31 | 
 32 |     def forward(self, x):
 33 |         branch1x1 = self.branch1x1(x)
 34 | 
 35 |         branch1x1_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
 36 |         branch1x1_2 = self.branch1x1_2(branch1x1_pool)
 37 | 
 38 |         branch3x3_reduce = self.branch3x3_reduce(x)
 39 |         branch3x3 = self.branch3x3(branch3x3_reduce)
 40 | 
 41 |         branch3x3_reduce_2 = self.branch3x3_reduce_2(x)
 42 |         branch3x3_2 = self.branch3x3_2(branch3x3_reduce_2)
 43 |         branch3x3_3 = self.branch3x3_3(branch3x3_2)
 44 | 
 45 |         outputs = [branch1x1, branch1x1_2, branch3x3, branch3x3_3]
 46 |         return torch.cat(outputs, 1)
 47 | 
 48 | 
 49 | class CRelu(nn.Module):
 50 | 
 51 |     def __init__(self, in_channels, out_channels, **kwargs):
 52 |         super(CRelu, self).__init__()
 53 |         self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
 54 |         self.bn = nn.BatchNorm2d(out_channels, eps=1e-5)
 55 | 
 56 |     def forward(self, x):
 57 |         x = self.conv(x)
 58 |         x = self.bn(x)
 59 |         x = torch.cat([x, -x], 1)
 60 |         x = F.relu(x, inplace=True)
 61 |         return x
 62 | 
 63 | 
 64 | class FaceBoxesNet(nn.Module):
 65 | 
 66 |     def __init__(self, phase, size, num_classes):
 67 |         super(FaceBoxesNet, self).__init__()
 68 |         self.phase = phase
 69 |         self.num_classes = num_classes
 70 |         self.size = size
 71 | 
 72 |         self.conv1 = CRelu(3, 24, kernel_size=7, stride=4, padding=3)
 73 |         self.conv2 = CRelu(48, 64, kernel_size=5, stride=2, padding=2)
 74 | 
 75 |         self.inception1 = Inception()
 76 |         self.inception2 = Inception()
 77 |         self.inception3 = Inception()
 78 | 
 79 |         self.conv3_1 = BasicConv2d(128, 128, kernel_size=1, stride=1, padding=0)
 80 |         self.conv3_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
 81 | 
 82 |         self.conv4_1 = BasicConv2d(256, 128, kernel_size=1, stride=1, padding=0)
 83 |         self.conv4_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
 84 | 
 85 |         self.loc, self.conf = self.multibox(self.num_classes)
 86 | 
 87 |         if self.phase == 'test':
 88 |             self.softmax = nn.Softmax(dim=-1)
 89 | 
 90 |         if self.phase == 'train':
 91 |             for m in self.modules():
 92 |                 if isinstance(m, nn.Conv2d):
 93 |                     if m.bias is not None:
 94 |                         nn.init.xavier_normal_(m.weight.data)
 95 |                         m.bias.data.fill_(0.02)
 96 |                     else:
 97 |                         m.weight.data.normal_(0, 0.01)
 98 |                 elif isinstance(m, nn.BatchNorm2d):
 99 |                     m.weight.data.fill_(1)
100 |                     m.bias.data.zero_()
101 | 
102 |     def multibox(self, num_classes):
103 |         loc_layers = []
104 |         conf_layers = []
105 |         loc_layers += [nn.Conv2d(128, 21 * 4, kernel_size=3, padding=1)]
106 |         conf_layers += [nn.Conv2d(128, 21 * num_classes, kernel_size=3, padding=1)]
107 |         loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
108 |         conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
109 |         loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
110 |         conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
111 |         return nn.Sequential(*loc_layers), nn.Sequential(*conf_layers)
112 | 
113 |     def forward(self, x):
114 | 
115 |         detection_sources = list()
116 |         loc = list()
117 |         conf = list()
118 | 
119 |         x = self.conv1(x)
120 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
121 |         x = self.conv2(x)
122 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
123 |         x = self.inception1(x)
124 |         x = self.inception2(x)
125 |         x = self.inception3(x)
126 |         detection_sources.append(x)
127 | 
128 |         x = self.conv3_1(x)
129 |         x = self.conv3_2(x)
130 |         detection_sources.append(x)
131 | 
132 |         x = self.conv4_1(x)
133 |         x = self.conv4_2(x)
134 |         detection_sources.append(x)
135 | 
136 |         for (x, l, c) in zip(detection_sources, self.loc, self.conf):
137 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
138 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
139 | 
140 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
141 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
142 | 
143 |         if self.phase == "test":
144 |             output = (loc.view(loc.size(0), -1, 4),
145 |                       self.softmax(conf.view(conf.size(0), -1, self.num_classes)))
146 |         else:
147 |             output = (loc.view(loc.size(0), -1, 4),
148 |                       conf.view(conf.size(0), -1, self.num_classes))
149 | 
150 |         return output
151 | 


--------------------------------------------------------------------------------
/FaceBoxes/readme.md:
--------------------------------------------------------------------------------
 1 | ## How to fun FaceBoxes
 2 | 
 3 | ### Build the cpu version of NMS
 4 | ```shell script
 5 | cd utils
 6 | python3 build.py build_ext --inplace
 7 | ```
 8 | 
 9 | or just run
10 | 
11 | ```shell script
12 | sh ./build_cpu_nms.sh
13 | ```
14 | 
15 | ### Run the demo of face detection
16 | ```shell script
17 | python3 FaceBoxes.py
18 | ```


--------------------------------------------------------------------------------
/FaceBoxes/utils/.gitignore:
--------------------------------------------------------------------------------
1 | utils/build
2 | utils/nms/*.so
3 | utils/*.c
4 | build/
5 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/FaceBoxes/utils/__init__.py


--------------------------------------------------------------------------------
/FaceBoxes/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import torch
  4 | import numpy as np
  5 | 
  6 | 
  7 | def point_form(boxes):
  8 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
  9 |     representation for comparison to point form ground truth data.
 10 |     Args:
 11 |         boxes: (tensor) center-size default boxes from priorbox layers.
 12 |     Return:
 13 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 14 |     """
 15 |     return torch.cat((boxes[:, :2] - boxes[:, 2:] / 2,  # xmin, ymin
 16 |                       boxes[:, :2] + boxes[:, 2:] / 2), 1)  # xmax, ymax
 17 | 
 18 | 
 19 | def center_size(boxes):
 20 |     """ Convert prior_boxes to (cx, cy, w, h)
 21 |     representation for comparison to center-size form ground truth data.
 22 |     Args:
 23 |         boxes: (tensor) point_form boxes
 24 |     Return:
 25 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 26 |     """
 27 |     return torch.cat((boxes[:, 2:] + boxes[:, :2]) / 2,  # cx, cy
 28 |                      boxes[:, 2:] - boxes[:, :2], 1)  # w, h
 29 | 
 30 | 
 31 | def intersect(box_a, box_b):
 32 |     """ We resize both tensors to [A,B,2] without new malloc:
 33 |     [A,2] -> [A,1,2] -> [A,B,2]
 34 |     [B,2] -> [1,B,2] -> [A,B,2]
 35 |     Then we compute the area of intersect between box_a and box_b.
 36 |     Args:
 37 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 38 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 39 |     Return:
 40 |       (tensor) intersection area, Shape: [A,B].
 41 |     """
 42 |     A = box_a.size(0)
 43 |     B = box_b.size(0)
 44 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 45 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 46 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 47 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 48 |     inter = torch.clamp((max_xy - min_xy), min=0)
 49 |     return inter[:, :, 0] * inter[:, :, 1]
 50 | 
 51 | 
 52 | def jaccard(box_a, box_b):
 53 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 54 |     is simply the intersection over union of two boxes.  Here we operate on
 55 |     ground truth boxes and default boxes.
 56 |     E.g.:
 57 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 58 |     Args:
 59 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 60 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 61 |     Return:
 62 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 63 |     """
 64 |     inter = intersect(box_a, box_b)
 65 |     area_a = ((box_a[:, 2] - box_a[:, 0]) *
 66 |               (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 67 |     area_b = ((box_b[:, 2] - box_b[:, 0]) *
 68 |               (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 69 |     union = area_a + area_b - inter
 70 |     return inter / union  # [A,B]
 71 | 
 72 | 
 73 | def matrix_iou(a, b):
 74 |     """
 75 |     return iou of a and b, numpy version for data augenmentation
 76 |     """
 77 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 78 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 79 | 
 80 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 81 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 82 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 83 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i)
 84 | 
 85 | 
 86 | def matrix_iof(a, b):
 87 |     """
 88 |     return iof of a and b, numpy version for data augenmentation
 89 |     """
 90 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 91 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 92 | 
 93 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 94 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 95 |     return area_i / np.maximum(area_a[:, np.newaxis], 1)
 96 | 
 97 | 
 98 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
 99 |     """Match each prior box with the ground truth box of the highest jaccard
100 |     overlap, encode the bounding boxes, then return the matched indices
101 |     corresponding to both confidence and location preds.
102 |     Args:
103 |         threshold: (float) The overlap threshold used when mathing boxes.
104 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
105 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
106 |         variances: (tensor) Variances corresponding to each prior coord,
107 |             Shape: [num_priors, 4].
108 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
109 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
110 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
111 |         idx: (int) current batch index
112 |     Return:
113 |         The matched indices corresponding to 1)location and 2)confidence preds.
114 |     """
115 |     # jaccard index
116 |     overlaps = jaccard(
117 |         truths,
118 |         point_form(priors)
119 |     )
120 |     # (Bipartite Matching)
121 |     # [1,num_objects] best prior for each ground truth
122 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
123 | 
124 |     # ignore hard gt
125 |     valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
126 |     best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
127 |     if best_prior_idx_filter.shape[0] <= 0:
128 |         loc_t[idx] = 0
129 |         conf_t[idx] = 0
130 |         return
131 | 
132 |     # [1,num_priors] best ground truth for each prior
133 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
134 |     best_truth_idx.squeeze_(0)
135 |     best_truth_overlap.squeeze_(0)
136 |     best_prior_idx.squeeze_(1)
137 |     best_prior_idx_filter.squeeze_(1)
138 |     best_prior_overlap.squeeze_(1)
139 |     best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
140 |     # TODO refactor: index  best_prior_idx with long tensor
141 |     # ensure every gt matches with its prior of max overlap
142 |     for j in range(best_prior_idx.size(0)):
143 |         best_truth_idx[best_prior_idx[j]] = j
144 |     matches = truths[best_truth_idx]  # Shape: [num_priors,4]
145 |     conf = labels[best_truth_idx]  # Shape: [num_priors]
146 |     conf[best_truth_overlap < threshold] = 0  # label as background
147 |     loc = encode(matches, priors, variances)
148 |     loc_t[idx] = loc  # [num_priors,4] encoded offsets to learn
149 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
150 | 
151 | 
152 | def encode(matched, priors, variances):
153 |     """Encode the variances from the priorbox layers into the ground truth boxes
154 |     we have matched (based on jaccard overlap) with the prior boxes.
155 |     Args:
156 |         matched: (tensor) Coords of ground truth for each prior in point-form
157 |             Shape: [num_priors, 4].
158 |         priors: (tensor) Prior boxes in center-offset form
159 |             Shape: [num_priors,4].
160 |         variances: (list[float]) Variances of priorboxes
161 |     Return:
162 |         encoded boxes (tensor), Shape: [num_priors, 4]
163 |     """
164 | 
165 |     # dist b/t match center and prior's center
166 |     g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
167 |     # encode variance
168 |     g_cxcy /= (variances[0] * priors[:, 2:])
169 |     # match wh / prior wh
170 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
171 |     g_wh = torch.log(g_wh) / variances[1]
172 |     # return target for smooth_l1_loss
173 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
174 | 
175 | 
176 | # Adapted from https://github.com/Hakuyume/chainer-ssd
177 | def decode(loc, priors, variances):
178 |     """Decode locations from predictions using priors to undo
179 |     the encoding we did for offset regression at train time.
180 |     Args:
181 |         loc (tensor): location predictions for loc layers,
182 |             Shape: [num_priors,4]
183 |         priors (tensor): Prior boxes in center-offset form.
184 |             Shape: [num_priors,4].
185 |         variances: (list[float]) Variances of priorboxes
186 |     Return:
187 |         decoded bounding box predictions
188 |     """
189 | 
190 |     boxes = torch.cat((
191 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
192 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
193 |     boxes[:, :2] -= boxes[:, 2:] / 2
194 |     boxes[:, 2:] += boxes[:, :2]
195 |     return boxes
196 | 
197 | 
198 | def log_sum_exp(x):
199 |     """Utility function for computing log_sum_exp while determining
200 |     This will be used to determine unaveraged confidence loss across
201 |     all examples in a batch.
202 |     Args:
203 |         x (Variable(tensor)): conf_preds from conf layers
204 |     """
205 |     x_max = x.data.max()
206 |     return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
207 | 
208 | 
209 | # Original author: Francisco Massa:
210 | # https://github.com/fmassa/object-detection.torch
211 | # Ported to PyTorch by Max deGroot (02/01/2017)
212 | def nms(boxes, scores, overlap=0.5, top_k=200):
213 |     """Apply non-maximum suppression at test time to avoid detecting too many
214 |     overlapping bounding boxes for a given object.
215 |     Args:
216 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
217 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
218 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
219 |         top_k: (int) The Maximum number of box preds to consider.
220 |     Return:
221 |         The indices of the kept boxes with respect to num_priors.
222 |     """
223 | 
224 |     keep = torch.Tensor(scores.size(0)).fill_(0).long()
225 |     if boxes.numel() == 0:
226 |         return keep
227 |     x1 = boxes[:, 0]
228 |     y1 = boxes[:, 1]
229 |     x2 = boxes[:, 2]
230 |     y2 = boxes[:, 3]
231 |     area = torch.mul(x2 - x1, y2 - y1)
232 |     v, idx = scores.sort(0)  # sort in ascending order
233 |     # I = I[v >= 0.01]
234 |     idx = idx[-top_k:]  # indices of the top-k largest vals
235 |     xx1 = boxes.new()
236 |     yy1 = boxes.new()
237 |     xx2 = boxes.new()
238 |     yy2 = boxes.new()
239 |     w = boxes.new()
240 |     h = boxes.new()
241 | 
242 |     # keep = torch.Tensor()
243 |     count = 0
244 |     while idx.numel() > 0:
245 |         i = idx[-1]  # index of current largest val
246 |         # keep.append(i)
247 |         keep[count] = i
248 |         count += 1
249 |         if idx.size(0) == 1:
250 |             break
251 |         idx = idx[:-1]  # remove kept element from view
252 |         # load bboxes of next highest vals
253 |         torch.index_select(x1, 0, idx, out=xx1)
254 |         torch.index_select(y1, 0, idx, out=yy1)
255 |         torch.index_select(x2, 0, idx, out=xx2)
256 |         torch.index_select(y2, 0, idx, out=yy2)
257 |         # store element-wise max with next highest score
258 |         xx1 = torch.clamp(xx1, min=x1[i])
259 |         yy1 = torch.clamp(yy1, min=y1[i])
260 |         xx2 = torch.clamp(xx2, max=x2[i])
261 |         yy2 = torch.clamp(yy2, max=y2[i])
262 |         w.resize_as_(xx2)
263 |         h.resize_as_(yy2)
264 |         w = xx2 - xx1
265 |         h = yy2 - yy1
266 |         # check sizes of xx1 and xx2.. after each iteration
267 |         w = torch.clamp(w, min=0.0)
268 |         h = torch.clamp(h, min=0.0)
269 |         inter = w * h
270 |         # IoU = i / (area(a) + area(b) - i)
271 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
272 |         union = (rem_areas - inter) + area[i]
273 |         IoU = inter / union  # store result in iou
274 |         # keep only elements with an IoU <= overlap
275 |         idx = idx[IoU.le(overlap)]
276 |     return keep, count
277 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/build.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | import os
11 | from os.path import join as pjoin
12 | import numpy as np
13 | from distutils.core import setup
14 | from distutils.extension import Extension
15 | from Cython.Distutils import build_ext
16 | 
17 | 
18 | def find_in_path(name, path):
19 |     "Find a file in a search path"
20 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
21 |     for dir in path.split(os.pathsep):
22 |         binpath = pjoin(dir, name)
23 |         if os.path.exists(binpath):
24 |             return os.path.abspath(binpath)
25 |     return None
26 | 
27 | 
28 | # Obtain the numpy include directory.  This logic works across numpy versions.
29 | try:
30 |     numpy_include = np.get_include()
31 | except AttributeError:
32 |     numpy_include = np.get_numpy_include()
33 | 
34 | 
35 | # run the customize_compiler
36 | class custom_build_ext(build_ext):
37 |     def build_extensions(self):
38 |         # customize_compiler_for_nvcc(self.compiler)
39 |         build_ext.build_extensions(self)
40 | 
41 | 
42 | ext_modules = [
43 |     Extension(
44 |         "nms.cpu_nms",
45 |         ["nms/cpu_nms.pyx"],
46 |         # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
47 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
48 |         include_dirs=[numpy_include]
49 |     )
50 | ]
51 | 
52 | setup(
53 |     name='mot_utils',
54 |     ext_modules=ext_modules,
55 |     # inject our custom trigger
56 |     cmdclass={'build_ext': custom_build_ext},
57 | )
58 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/config.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | cfg = {
 4 |     'name': 'FaceBoxes',
 5 |     'min_sizes': [[32, 64, 128], [256], [512]],
 6 |     'steps': [32, 64, 128],
 7 |     'variance': [0.1, 0.2],
 8 |     'clip': False
 9 | }
10 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/functions.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import sys
 4 | import os.path as osp
 5 | import torch
 6 | 
 7 | def check_keys(model, pretrained_state_dict):
 8 |     ckpt_keys = set(pretrained_state_dict.keys())
 9 |     model_keys = set(model.state_dict().keys())
10 |     used_pretrained_keys = model_keys & ckpt_keys
11 |     unused_pretrained_keys = ckpt_keys - model_keys
12 |     missing_keys = model_keys - ckpt_keys
13 |     # print('Missing keys:{}'.format(len(missing_keys)))
14 |     # print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
15 |     # print('Used keys:{}'.format(len(used_pretrained_keys)))
16 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
17 |     return True
18 | 
19 | 
20 | def remove_prefix(state_dict, prefix):
21 |     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
22 |     # print('remove prefix \'{}\''.format(prefix))
23 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
24 |     return {f(key): value for key, value in state_dict.items()}
25 | 
26 | 
27 | def load_model(model, pretrained_path, load_to_cpu):
28 |     if not osp.isfile(pretrained_path):
29 |         print(f'The pre-trained FaceBoxes model {pretrained_path} does not exist')
30 |         sys.exit('-1')
31 |     # print('Loading pretrained model from {}'.format(pretrained_path))
32 |     if load_to_cpu:
33 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
34 |     else:
35 |         device = torch.cuda.current_device()
36 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
37 |     if "state_dict" in pretrained_dict.keys():
38 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
39 |     else:
40 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
41 |     check_keys(model, pretrained_dict)
42 |     model.load_state_dict(pretrained_dict, strict=False)
43 |     return model
44 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/FaceBoxes/utils/nms/__init__.py


--------------------------------------------------------------------------------
/FaceBoxes/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int64)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 71 |     cdef unsigned int N = boxes.shape[0]
 72 |     cdef float iw, ih, box_area
 73 |     cdef float ua
 74 |     cdef int pos = 0
 75 |     cdef float maxscore = 0
 76 |     cdef int maxpos = 0
 77 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 78 | 
 79 |     for i in range(N):
 80 |         maxscore = boxes[i, 4]
 81 |         maxpos = i
 82 | 
 83 |         tx1 = boxes[i,0]
 84 |         ty1 = boxes[i,1]
 85 |         tx2 = boxes[i,2]
 86 |         ty2 = boxes[i,3]
 87 |         ts = boxes[i,4]
 88 | 
 89 |         pos = i + 1
 90 | 	# get max box
 91 |         while pos < N:
 92 |             if maxscore < boxes[pos, 4]:
 93 |                 maxscore = boxes[pos, 4]
 94 |                 maxpos = pos
 95 |             pos = pos + 1
 96 | 
 97 | 	# add max box as a detection 
 98 |         boxes[i,0] = boxes[maxpos,0]
 99 |         boxes[i,1] = boxes[maxpos,1]
100 |         boxes[i,2] = boxes[maxpos,2]
101 |         boxes[i,3] = boxes[maxpos,3]
102 |         boxes[i,4] = boxes[maxpos,4]
103 | 
104 | 	# swap ith box with position of max box
105 |         boxes[maxpos,0] = tx1
106 |         boxes[maxpos,1] = ty1
107 |         boxes[maxpos,2] = tx2
108 |         boxes[maxpos,3] = ty2
109 |         boxes[maxpos,4] = ts
110 | 
111 |         tx1 = boxes[i,0]
112 |         ty1 = boxes[i,1]
113 |         tx2 = boxes[i,2]
114 |         ty2 = boxes[i,3]
115 |         ts = boxes[i,4]
116 | 
117 |         pos = i + 1
118 | 	# NMS iterations, note that N changes if detection boxes fall below threshold
119 |         while pos < N:
120 |             x1 = boxes[pos, 0]
121 |             y1 = boxes[pos, 1]
122 |             x2 = boxes[pos, 2]
123 |             y2 = boxes[pos, 3]
124 |             s = boxes[pos, 4]
125 | 
126 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
127 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
128 |             if iw > 0:
129 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
130 |                 if ih > 0:
131 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
132 |                     ov = iw * ih / ua #iou between max box and detection box
133 | 
134 |                     if method == 1: # linear
135 |                         if ov > Nt: 
136 |                             weight = 1 - ov
137 |                         else:
138 |                             weight = 1
139 |                     elif method == 2: # gaussian
140 |                         weight = np.exp(-(ov * ov)/sigma)
141 |                     else: # original NMS
142 |                         if ov > Nt: 
143 |                             weight = 0
144 |                         else:
145 |                             weight = 1
146 | 
147 |                     boxes[pos, 4] = weight*boxes[pos, 4]
148 | 		    
149 | 		    # if box score falls below threshold, discard the box by swapping with last box
150 | 		    # update N
151 |                     if boxes[pos, 4] < threshold:
152 |                         boxes[pos,0] = boxes[N-1, 0]
153 |                         boxes[pos,1] = boxes[N-1, 1]
154 |                         boxes[pos,2] = boxes[N-1, 2]
155 |                         boxes[pos,3] = boxes[N-1, 3]
156 |                         boxes[pos,4] = boxes[N-1, 4]
157 |                         N = N - 1
158 |                         pos = pos - 1
159 | 
160 |             pos = pos + 1
161 | 
162 |     keep = [i for i in range(N)]
163 |     return keep
164 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | from .nms.cpu_nms import cpu_nms, cpu_soft_nms
11 | 
12 | 
13 | def nms(dets, thresh):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 | 
16 |     if dets.shape[0] == 0:
17 |         return []
18 |     return cpu_nms(dets, thresh)
19 |     # return gpu_nms(dets, thresh)
20 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/prior_box.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from .config import cfg
 4 | 
 5 | import torch
 6 | from itertools import product as product
 7 | from math import ceil
 8 | 
 9 | 
10 | class PriorBox(object):
11 |     def __init__(self, image_size=None):
12 |         super(PriorBox, self).__init__()
13 |         # self.aspect_ratios = cfg['aspect_ratios']
14 |         self.min_sizes = cfg['min_sizes']
15 |         self.steps = cfg['steps']
16 |         self.clip = cfg['clip']
17 |         self.image_size = image_size
18 |         self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps]
19 | 
20 |     def forward(self):
21 |         anchors = []
22 |         for k, f in enumerate(self.feature_maps):
23 |             min_sizes = self.min_sizes[k]
24 |             for i, j in product(range(f[0]), range(f[1])):
25 |                 for min_size in min_sizes:
26 |                     s_kx = min_size / self.image_size[1]
27 |                     s_ky = min_size / self.image_size[0]
28 |                     if min_size == 32:
29 |                         dense_cx = [x * self.steps[k] / self.image_size[1] for x in
30 |                                     [j + 0, j + 0.25, j + 0.5, j + 0.75]]
31 |                         dense_cy = [y * self.steps[k] / self.image_size[0] for y in
32 |                                     [i + 0, i + 0.25, i + 0.5, i + 0.75]]
33 |                         for cy, cx in product(dense_cy, dense_cx):
34 |                             anchors += [cx, cy, s_kx, s_ky]
35 |                     elif min_size == 64:
36 |                         dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0, j + 0.5]]
37 |                         dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0, i + 0.5]]
38 |                         for cy, cx in product(dense_cy, dense_cx):
39 |                             anchors += [cx, cy, s_kx, s_ky]
40 |                     else:
41 |                         cx = (j + 0.5) * self.steps[k] / self.image_size[1]
42 |                         cy = (i + 0.5) * self.steps[k] / self.image_size[0]
43 |                         anchors += [cx, cy, s_kx, s_ky]
44 |         # back to torch land
45 |         output = torch.Tensor(anchors).view(-1, 4)
46 |         if self.clip:
47 |             output.clamp_(max=1, min=0)
48 |         return output
49 | 


--------------------------------------------------------------------------------
/FaceBoxes/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | import time
11 | 
12 | 
13 | class Timer(object):
14 |     """A simple timer."""
15 | 
16 |     def __init__(self):
17 |         self.total_time = 0.
18 |         self.calls = 0
19 |         self.start_time = 0.
20 |         self.diff = 0.
21 |         self.average_time = 0.
22 | 
23 |     def tic(self):
24 |         # using time.time instead of time.clock because time time.clock
25 |         # does not normalize for multithreading
26 |         self.start_time = time.time()
27 | 
28 |     def toc(self, average=True):
29 |         self.diff = time.time() - self.start_time
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 |         self.average_time = self.total_time / self.calls
33 |         if average:
34 |             return self.average_time
35 |         else:
36 |             return self.diff
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 | 


--------------------------------------------------------------------------------
/FaceBoxes/weights/FaceBoxesProd.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/FaceBoxes/weights/FaceBoxesProd.pth


--------------------------------------------------------------------------------
/FaceBoxes/weights/readme.md:
--------------------------------------------------------------------------------
1 | The pre-trained model `FaceBoxesProd.pth` is downloaded from [Google Drive](https://drive.google.com/file/d/1tRVwOlu0QtjvADQ2H7vqrRwsWEmaqioI).


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Cho Ying Wu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | #  <div align="center"> SynergyNet</div>
  2 | 3DV 2021: Synergy between 3DMM and 3D Landmarks for Accurate 3D Facial Geometry
  3 | 
  4 | Cho-Ying Wu, Qiangeng Xu, Ulrich Neumann, CGIT Lab at University of Souther California
  5 | 
  6 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/synergy-between-3dmm-and-3d-landmarks-for/face-alignment-on-aflw)](https://paperswithcode.com/sota/face-alignment-on-aflw?p=synergy-between-3dmm-and-3d-landmarks-for)
  7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/synergy-between-3dmm-and-3d-landmarks-for/head-pose-estimation-on-aflw2000)](https://paperswithcode.com/sota/head-pose-estimation-on-aflw2000?p=synergy-between-3dmm-and-3d-landmarks-for)
  8 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/synergy-between-3dmm-and-3d-landmarks-for/face-alignment-on-aflw2000-3d)](https://paperswithcode.com/sota/face-alignment-on-aflw2000-3d?p=synergy-between-3dmm-and-3d-landmarks-for)
  9 | 
 10 | [<a href="https://arxiv.org/abs/2110.09772">paper</a>] [<a href="https://youtu.be/i1Y8U2Z20ko">video</a>] [<a href="https://choyingw.github.io/works/SynergyNet/index.html">project page</a>]
 11 | 
 12 | News [Dec 11, 2024]: Add details of training UV-texture GAN. See the section "Training UV-texture GAN"
 13 | 
 14 | News [Jul 10, 2022]: Add simplified api for getting 3d landmarks, face mesh, and face pose in only one line. See "Simplified API" It's convenient if you simply want to plug in this method in your work.
 15 | 
 16 | News: Add Colab demo 
 17 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1q9HRLA3wGxz4IFIseZFK1maOyH0wutYk?usp=sharing)
 18 | 
 19 | News: Our new work [<a href="https://github.com/choyingw/Voice2Mesh">Cross-Modal Perceptionist</a>] is accepted to CVPR 2022, which is based on this SynergyNet project.<br>
 20 | 
 21 | <img src='demo/demo.gif'>
 22 | 
 23 | <img src='demo/teaser.png'>
 24 | 
 25 | <img src='demo/multiple.png'>
 26 | 
 27 | <img src='demo/single.png'>
 28 | 
 29 | ## <div align="center"> Advantages</div>
 30 | 
 31 | :+1: SOTA on all 3D facial alignment, face orientation estimation, and 3D face modeling.<br><br>
 32 | :+1: Fast inference with 3000fps on a laptop RTX 2080.<br><br>
 33 | :+1: Simple implementation with only widely used operations.<br><br>
 34 | 
 35 | (This project is built/tested on Python 3.8 and PyTorch 1.9 on a compatible GPU)
 36 | 
 37 | ## <div align="center"> Single Image Inference Demo</div>
 38 | 
 39 | 1. Clone
 40 | 
 41 |     ```git clone https://github.com/choyingw/SynergyNet```
 42 | 
 43 |     ```cd SynergyNet ```
 44 | 
 45 | 2. Use conda
 46 | 
 47 |     ```conda create --name SynergyNet```
 48 | 
 49 |     ```conda activate SynergyNet```
 50 | 
 51 | 3. Install pre-requisite common packages
 52 | 
 53 |     ```PyTorch 1.9 (should also be compatiable with 1.0+ versions), Torchvision, Opencv, Scipy, Matplotlib, Cython ```
 54 | 
 55 | 4. Download data [<a href="https://drive.google.com/file/d/1YVBRcXmCeO1t5Bepv67KVr_QKcOur3Yy/view?usp=sharing">here</a>] and
 56 | [<a href="https://drive.google.com/file/d/1SQsMhvAmpD1O8Hm0yEGom0C0rXtA0qs8/view?usp=sharing">here</a>]. Extract these data under the repo root.
 57 | 
 58 | These data are processed from [<a href="https://github.com/cleardusk/3DDFA">3DDFA</a>] and [<a href="https://github.com/shamangary/FSA-Net">FSA-Net</a>].
 59 | 
 60 | Download pretrained weights [<a href="https://drive.google.com/file/d/1BVHbiLTfX6iTeJcNbh-jgHjWDoemfrzG/view?usp=sharing">here</a>]. Put the model under 'pretrained/'
 61 | 
 62 | 5. Compile Sim3DR and FaceBoxes:
 63 | 
 64 |     ```cd Sim3DR```
 65 | 
 66 |     ```./build_sim3dr.sh```
 67 | 
 68 |     ```cd ../FaceBoxes```
 69 | 
 70 |     ```./build_cpu_nms.sh```
 71 | 
 72 |     ```cd ..```
 73 | 
 74 | 6. Inference
 75 | 
 76 |     ```python singleImage.py -f img```
 77 | 
 78 | The default inference requires a compatible GPU to run. If you would like to run on a CPU, please comment the .cuda() and load the pretrained weights into cpu.
 79 | 
 80 | ## <div align="center"> Simplified API</div>
 81 | 
 82 | We provide a simple API for convenient usage if you want to plug in this method into your work.
 83 | 
 84 | ```python
 85 | import cv2
 86 | from synergy3DMM import SynergyNet
 87 | model = SynergyNet()
 88 | I = cv2.imread(<your image path>)
 89 | # get landmark [[y, x, z], 68 (points)], mesh [[y, x, z], 53215 (points)], and face pose (Euler angles [yaw, pitch, roll] and translation [y, x, z])
 90 | lmk3d, mesh, pose = model.get_all_outputs(I)
 91 | ```
 92 | We provide a simple script in singleImage_simple.py
 93 | 
 94 | We also provide a setup.py file. Run <code>pip install -e . </code> You can do <code>from synergy3DMM import SynergyNet</code> in other directory. Note that [<a href="https://drive.google.com/file/d/1SQsMhvAmpD1O8Hm0yEGom0C0rXtA0qs8/view?usp=sharing">3dmm_data</a>] and [<a href="https://drive.google.com/file/d/1BVHbiLTfX6iTeJcNbh-jgHjWDoemfrzG/view?usp=sharing">pretrained weight</a>] (Put the model under 'pretrained/') need to be present.
 95 | 
 96 | ## <div align="center">Benchmark Evaluation</div>
 97 | 
 98 | 1. Follow Single Image Inference Demo: Step 1-4
 99 | 
100 | 2. Benchmarking
101 | 
102 |     ```python benchmark.py -w pretrained/best.pth.tar```
103 | 
104 | Print-out results and visualization fo first-50 examples are stored under 'results/' (see 'demo/' for some pre-generated samples as references) are shown.
105 | 
106 | Updates: Best head pose estimation [<a href="https://drive.google.com/file/d/1XN74PIMWDue1UNJuDJd16BH8fvZpfwDp/view?usp=sharing">pretrained model</a>]  (Mean MAE: 3.31) that is better than number reported in paper (3.35). Use -w to load different pretrained models.
107 | 
108 | ## <div align="center">Training</div>
109 | 
110 | 1. Follow Single Image Inference Demo: Step 1-4.
111 | 
112 | 2. Download training data from [<a href="https://github.com/cleardusk/3DDFA">3DDFA</a>]: train_aug_120x120.zip and extract the zip file under the root folder (Containing about 680K images).
113 | 
114 | 3. 
115 |     ```bash train_script.sh```
116 | 
117 | 4. Please refer to train_script for hyperparameters, such as learning rate, epochs, or GPU device. The default settings take ~19G on a 3090 GPU and about 6 hours for training. If your GPU is less than this size, please decrease the batch size and learning rate proportionally.
118 | 
119 | ## <div align="center">Textured Artistic Face Meshes</div>
120 | 
121 | 1. Follow Single Image Inference Demo: Step 1-5.
122 | 
123 | 2. Download artistic faces data [<a href="https://drive.google.com/file/d/1yYR5aqSCUGnggjhTbwU4vEwHxV1xq9ko/view?usp=sharing">here</a>], which are from [<a href="https://faculty.idc.ac.il/arik/site/foa/artistic-faces-dataset.asp">AF-Dataset</a>]. Download our predicted UV maps [<a href="https://drive.google.com/file/d/1TWJiitXAfZD_AwoJLw58XBPgOdanqgcG/view?usp=sharing">here</a>] by UV-texture GAN. Extract them under the root folder.
124 | 
125 | 3.
126 |     ```python artistic.py -f art-all --png```(whole folder)
127 |     
128 |     ```python artistic.py -f art-all/122.png```(single image)
129 |     
130 | 
131 | Note that this artistic face dataset contains many different level/style face abstration. If a testing image is close to real, the result is much better than those of highly abstract samples. 
132 | 
133 | ## <div align="center">Textured Real Face Renderings</div>
134 | 
135 | 1. Follow Single Image Inference Demo: Step 1-5.
136 | 
137 | 2. Download our predicted UV maps and real face images for AFLW2000-3D [<a href="https://drive.google.com/file/d/12QCzkzBCKIEA3DSn6Kx5seeCeoUXKISc/view?usp=sharing">here</a>] by UV-texture GAN. Extract them under the root folder.
138 | 
139 | 3.
140 |     ```python uv_texture_realFaces.py -f texture_data/real --png``` (whole folder)
141 | 
142 |     ```python uv_texture_realFaces.py -f texture_data/real/image00002_real_A.png``` (single image) 
143 | 
144 | The results (3D meshes and renderings) are stored under 'inference_output'
145 | 
146 | ## <div align="center">Training UV-texture GAN</div>
147 | 
148 | 1. Acquire AFLW2000-3D dataset and use [<a href="https://github.com/jiaxiangshang/MGCNet/tree/master">MGC-Net</a>] test pipeline to get UV-texture for the AFLW2000 images.
149 | 
150 | 2. Use [<a href="https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix">Pix2Pix</a>] and train LSGAN with un-paired loss by their training recipe. In the input layer, concat the mean UV-texture and image and also shortcut add the mean texture at the output of generator.
151 | 
152 | 3. The mean UV-texture can be got from original BFM set or from [<a href="https://github.com/yfeng95/face3d/blob/master/examples/Data/uv_texture_map.jpg">face3d</a>]
153 | 
154 | ## <div align="center">More Results</div>
155 | 
156 | We show a comparison with [<a href="https://github.com/YadiraF/DECA">DECA</a>] using the top-3 largest roll angle samples in AFLW2000-3D.
157 | 
158 | <img src='demo/comparison-deca.png'>
159 | 
160 | 
161 | Facial alignemnt on AFLW2000-3D (NME of facial landmarks):
162 | 
163 | <img src='demo/alignment.png'>
164 | 
165 | Face orientation estimation on AFLW2000-3D (MAE of Euler angles):
166 | 
167 | <img src='demo/orientation.png'>
168 | 
169 | Results on artistic faces: 
170 | 
171 | <img src='demo/AF-1.png'>
172 | 
173 | <img src='demo/AF-2.png'>
174 | 
175 | **Related Project**
176 | 
177 | [<a href="https://github.com/choyingw/Voice2Mesh">Cross-Modal Perceptionist</a>] (analysis on relation for voice and 3D face)
178 | 
179 | **Bibtex**
180 | 
181 | If you find our work useful, please consider to cite our work 
182 | 
183 |     @INPROCEEDINGS{wu2021synergy,
184 |       author={Wu, Cho-Ying and Xu, Qiangeng and Neumann, Ulrich},
185 |       booktitle={2021 International Conference on 3D Vision (3DV)}, 
186 |       title={Synergy between 3DMM and 3D Landmarks for Accurate 3D Facial Geometry}, 
187 |       year={2021}
188 |       }
189 | 
190 | **Acknowledgement**
191 | 
192 | The project is developed on [<a href="https://github.com/cleardusk/3DDFA">3DDFA</a>] and [<a href="https://github.com/shamangary/FSA-Net">FSA-Net</a>]. Thank them for their wonderful work. Thank [<a href="https://github.com/cleardusk/3DDFA_V2">3DDFA-V2</a>] for the face detector and rendering codes.
193 | 


--------------------------------------------------------------------------------
/Sim3DR/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | cmake-build-debug/
3 | .idea/
4 | build/
5 | *.so
6 | data/
7 | 
8 | lib/rasterize.cpp


--------------------------------------------------------------------------------
/Sim3DR/Sim3DR.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from . import _init_paths
 4 | import numpy as np
 5 | import Sim3DR_Cython
 6 | 
 7 | 
 8 | def get_normal(vertices, triangles):
 9 |     normal = np.zeros_like(vertices, dtype=np.float32)
10 |     Sim3DR_Cython.get_normal(normal, vertices, triangles, vertices.shape[0], triangles.shape[0])
11 |     return normal
12 | 
13 | 
14 | def rasterize(vertices, triangles, colors, bg=None,
15 |               height=None, width=None, channel=None,
16 |               reverse=False):
17 |     if bg is not None:
18 |         height, width, channel = bg.shape
19 |     else:
20 |         assert height is not None and width is not None and channel is not None
21 |         bg = np.zeros((height, width, channel), dtype=np.float32)
22 | 
23 |     buffer = np.zeros((height, width), dtype=np.float32) - 1e8
24 | 
25 |     if colors.dtype != np.float32:
26 |         colors = colors.astype(np.float32)
27 |     Sim3DR_Cython.rasterize(bg, vertices, triangles, colors, buffer, triangles.shape[0], height, width, channel,
28 |                             reverse=reverse)
29 |     return bg
30 | 


--------------------------------------------------------------------------------
/Sim3DR/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | from .Sim3DR import get_normal, rasterize
4 | from .lighting import RenderPipeline
5 | 


--------------------------------------------------------------------------------
/Sim3DR/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import os.path as osp
 4 | import sys
 5 | 
 6 | 
 7 | def add_path(path):
 8 |     if path not in sys.path:
 9 |         sys.path.insert(0, path)
10 | 
11 | 
12 | this_dir = osp.dirname(__file__)
13 | lib_path = osp.join(this_dir, '.')
14 | add_path(lib_path)
15 | 


--------------------------------------------------------------------------------
/Sim3DR/build_sim3dr.sh:
--------------------------------------------------------------------------------
1 | python3 setup.py build_ext --inplace


--------------------------------------------------------------------------------
/Sim3DR/lib/rasterize.h:
--------------------------------------------------------------------------------
  1 | #ifndef MESH_CORE_HPP_
  2 | #define MESH_CORE_HPP_
  3 | 
  4 | #include <stdio.h>
  5 | #include <cmath>
  6 | #include <algorithm>
  7 | #include <string>
  8 | #include <iostream>
  9 | #include <fstream>
 10 | 
 11 | using namespace std;
 12 | 
 13 | class Point3D {
 14 | public:
 15 |     float x;
 16 |     float y;
 17 |     float z;
 18 | 
 19 | public:
 20 |     Point3D() : x(0.f), y(0.f), z(0.f) {}
 21 |     Point3D(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
 22 | 
 23 |     void initialize(float x_, float y_, float z_){
 24 |         this->x = x_; this->y = y_; this->z = z_;
 25 |     }
 26 | 
 27 |     Point3D cross(Point3D &p){
 28 |         Point3D c;
 29 |         c.x = this->y * p.z - this->z * p.y;
 30 |         c.y = this->z * p.x - this->x * p.z;
 31 |         c.z = this->x * p.y - this->y * p.x;
 32 |         return c;
 33 |     }
 34 | 
 35 |     float dot(Point3D &p) {
 36 |         return this->x * p.x + this->y * p.y + this->z * p.z;
 37 |     }
 38 | 
 39 |     Point3D operator-(const Point3D &p) {
 40 |         Point3D np;
 41 |         np.x = this->x - p.x;
 42 |         np.y = this->y - p.y;
 43 |         np.z = this->z - p.z;
 44 |         return np;
 45 |     }
 46 | 
 47 | };
 48 | 
 49 | class Point {
 50 | public:
 51 |     float x;
 52 |     float y;
 53 | 
 54 | public:
 55 |     Point() : x(0.f), y(0.f) {}
 56 |     Point(float x_, float y_) : x(x_), y(y_) {}
 57 |     float dot(Point p) {
 58 |         return this->x * p.x + this->y * p.y;
 59 |     }
 60 | 
 61 |     Point operator-(const Point &p) {
 62 |         Point np;
 63 |         np.x = this->x - p.x;
 64 |         np.y = this->y - p.y;
 65 |         return np;
 66 |     }
 67 | 
 68 |     Point operator+(const Point &p) {
 69 |         Point np;
 70 |         np.x = this->x + p.x;
 71 |         np.y = this->y + p.y;
 72 |         return np;
 73 |     }
 74 | 
 75 |     Point operator*(float s) {
 76 |         Point np;
 77 |         np.x = s * this->x;
 78 |         np.y = s * this->y;
 79 |         return np;
 80 |     }
 81 | };
 82 | 
 83 | 
 84 | bool is_point_in_tri(Point p, Point p0, Point p1, Point p2);
 85 | 
 86 | void get_point_weight(float *weight, Point p, Point p0, Point p1, Point p2);
 87 | 
 88 | void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int ntri, bool norm_flg);
 89 | 
 90 | void _get_ver_normal(float *ver_normal, float *tri_normal, int *triangles, int nver, int ntri);
 91 | 
 92 | void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri);
 93 | 
 94 | void _rasterize_triangles(
 95 |         float *vertices, int *triangles, float *depth_buffer, int *triangle_buffer, float *barycentric_weight,
 96 |         int ntri, int h, int w);
 97 | 
 98 | void _rasterize(
 99 |         unsigned char *image, float *vertices, int *triangles, float *colors,
100 |         float *depth_buffer, int ntri, int h, int w, int c, float alpha, bool reverse);
101 | 
102 | void _render_texture_core(
103 |         float *image, float *vertices, int *triangles,
104 |         float *texture, float *tex_coords, int *tex_triangles,
105 |         float *depth_buffer,
106 |         int nver, int tex_nver, int ntri,
107 |         int h, int w, int c,
108 |         int tex_h, int tex_w, int tex_c,
109 |         int mapping_type);
110 | 
111 | void _write_obj_with_colors_texture(string filename, string mtl_name,
112 |                                     float *vertices, int *triangles, float *colors, float *uv_coords,
113 |                                     int nver, int ntri, int ntexver);
114 | 
115 | #endif


--------------------------------------------------------------------------------
/Sim3DR/lib/rasterize.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | # from libcpp.string cimport string
  4 | cimport cython
  5 | from libcpp cimport bool
  6 | 
  7 | # from cpython import bool
  8 | 
  9 | # use the Numpy-C-API from Cython
 10 | np.import_array()
 11 | 
 12 | # cdefine the signature of our c function
 13 | cdef extern from "rasterize.h":
 14 |     void _rasterize_triangles(
 15 |             float*vertices, int*triangles, float*depth_buffer, int*triangle_buffer, float*barycentric_weight,
 16 |             int ntri, int h, int w
 17 |     )
 18 | 
 19 |     void _rasterize(
 20 |             unsigned char*image, float*vertices, int*triangles, float*colors, float*depth_buffer,
 21 |             int ntri, int h, int w, int c, float alpha, bool reverse
 22 |     )
 23 | 
 24 |     # void _render_texture_core(
 25 |     #     float* image, float* vertices, int* triangles,
 26 |     #     float* texture, float* tex_coords, int* tex_triangles,
 27 |     #     float* depth_buffer,
 28 |     #     int nver, int tex_nver, int ntri,
 29 |     #     int h, int w, int c,
 30 |     #     int tex_h, int tex_w, int tex_c,
 31 |     #     int mapping_type)
 32 | 
 33 |     void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int nver, bool norm_flg)
 34 |     void _get_ver_normal(float *ver_normal, float*tri_normal, int*triangles, int nver, int ntri)
 35 |     void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri)
 36 | 
 37 | 
 38 |     # void _write_obj_with_colors_texture(string filename, string mtl_name,
 39 |     #     float* vertices, int* triangles, float* colors, float* uv_coords,
 40 |     #     int nver, int ntri, int ntexver)
 41 | 
 42 | @cython.boundscheck(False)
 43 | @cython.wraparound(False)
 44 | def get_tri_normal(np.ndarray[float, ndim=2, mode="c"] tri_normal not None,
 45 |                    np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 46 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 47 |                    int ntri, bool norm_flg = False):
 48 |     _get_tri_normal(<float*> np.PyArray_DATA(tri_normal), <float*> np.PyArray_DATA(vertices),
 49 |                     <int*> np.PyArray_DATA(triangles), ntri, norm_flg)
 50 | 
 51 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 52 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 53 | def get_ver_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None,
 54 |                    np.ndarray[float, ndim=2, mode = "c"] tri_normal not None,
 55 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 56 |                    int nver, int ntri):
 57 |     _get_ver_normal(
 58 |         <float*> np.PyArray_DATA(ver_normal), <float*> np.PyArray_DATA(tri_normal), <int*> np.PyArray_DATA(triangles),
 59 |         nver, ntri)
 60 | 
 61 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 62 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 63 | def get_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None,
 64 |                    np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 65 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 66 |                    int nver, int ntri):
 67 |     _get_normal(
 68 |         <float*> np.PyArray_DATA(ver_normal), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
 69 |         nver, ntri)
 70 | 
 71 | 
 72 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 73 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 74 | def rasterize_triangles(
 75 |         np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 76 |         np.ndarray[int, ndim=2, mode="c"] triangles not None,
 77 |         np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 78 |         np.ndarray[int, ndim=2, mode = "c"] triangle_buffer not None,
 79 |         np.ndarray[float, ndim=2, mode = "c"] barycentric_weight not None,
 80 |         int ntri, int h, int w
 81 | ):
 82 |     _rasterize_triangles(
 83 |         <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
 84 |         <float*> np.PyArray_DATA(depth_buffer), <int*> np.PyArray_DATA(triangle_buffer),
 85 |         <float*> np.PyArray_DATA(barycentric_weight),
 86 |         ntri, h, w)
 87 | 
 88 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 89 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 90 | def rasterize(np.ndarray[unsigned char, ndim=3, mode = "c"] image not None,
 91 |               np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 92 |               np.ndarray[int, ndim=2, mode="c"] triangles not None,
 93 |               np.ndarray[float, ndim=2, mode = "c"] colors not None,
 94 |               np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 95 |               int ntri, int h, int w, int c, float alpha = 1, bool reverse = False
 96 |               ):
 97 |     _rasterize(
 98 |         <unsigned char*> np.PyArray_DATA(image), <float*> np.PyArray_DATA(vertices),
 99 |         <int*> np.PyArray_DATA(triangles),
100 |         <float*> np.PyArray_DATA(colors),
101 |         <float*> np.PyArray_DATA(depth_buffer),
102 |         ntri, h, w, c, alpha, reverse)
103 | 
104 | # def render_texture_core(np.ndarray[float, ndim=3, mode = "c"] image not None,
105 | #                 np.ndarray[float, ndim=2, mode = "c"] vertices not None,
106 | #                 np.ndarray[int, ndim=2, mode="c"] triangles not None,
107 | #                 np.ndarray[float, ndim=3, mode = "c"] texture not None,
108 | #                 np.ndarray[float, ndim=2, mode = "c"] tex_coords not None,
109 | #                 np.ndarray[int, ndim=2, mode="c"] tex_triangles not None,
110 | #                 np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
111 | #                 int nver, int tex_nver, int ntri,
112 | #                 int h, int w, int c,
113 | #                 int tex_h, int tex_w, int tex_c,
114 | #                 int mapping_type
115 | #                 ):
116 | #     _render_texture_core(
117 | #         <float*> np.PyArray_DATA(image), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
118 | #         <float*> np.PyArray_DATA(texture), <float*> np.PyArray_DATA(tex_coords), <int*> np.PyArray_DATA(tex_triangles),
119 | #         <float*> np.PyArray_DATA(depth_buffer),
120 | #         nver, tex_nver, ntri,
121 | #         h, w, c,
122 | #         tex_h, tex_w, tex_c,
123 | #         mapping_type)
124 | #
125 | # def write_obj_with_colors_texture_core(string filename, string mtl_name,
126 | #                 np.ndarray[float, ndim=2, mode = "c"] vertices not None,
127 | #                 np.ndarray[int, ndim=2, mode="c"] triangles not None,
128 | #                 np.ndarray[float, ndim=2, mode = "c"] colors not None,
129 | #                 np.ndarray[float, ndim=2, mode = "c"] uv_coords not None,
130 | #                 int nver, int ntri, int ntexver
131 | #                 ):
132 | #     _write_obj_with_colors_texture(filename, mtl_name,
133 | #         <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles), <float*> np.PyArray_DATA(colors), <float*> np.PyArray_DATA(uv_coords),
134 | #         nver, ntri, ntexver)
135 | 


--------------------------------------------------------------------------------
/Sim3DR/lighting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import numpy as np
 4 | from .Sim3DR import get_normal, rasterize
 5 | 
 6 | _norm = lambda arr: arr / np.sqrt(np.sum(arr ** 2, axis=1))[:, None]
 7 | 
 8 | 
 9 | def norm_vertices(vertices):
10 |     vertices -= vertices.min(0)[None, :]
11 |     vertices /= vertices.max()
12 |     vertices *= 2
13 |     vertices -= vertices.max(0)[None, :] / 2
14 |     return vertices
15 | 
16 | 
17 | def convert_type(obj):
18 |     if isinstance(obj, tuple) or isinstance(obj, list):
19 |         return np.array(obj, dtype=np.float32)[None, :]
20 |     return obj
21 | 
22 | 
23 | class RenderPipeline(object):
24 |     def __init__(self, **kwargs):
25 |         self.intensity_ambient = convert_type(kwargs.get('intensity_ambient', 0.3))
26 |         self.intensity_directional = convert_type(kwargs.get('intensity_directional', 0.6))
27 |         self.intensity_specular = convert_type(kwargs.get('intensity_specular', 0.1))
28 |         self.specular_exp = kwargs.get('specular_exp', 5)
29 |         self.color_ambient = convert_type(kwargs.get('color_ambient', (1, 1, 1)))
30 |         self.color_directional = convert_type(kwargs.get('color_directional', (1, 1, 1)))
31 |         self.light_pos = convert_type(kwargs.get('light_pos', (0, 0, 5)))
32 |         self.view_pos = convert_type(kwargs.get('view_pos', (0, 0, 5)))
33 | 
34 |     def update_light_pos(self, light_pos):
35 |         self.light_pos = convert_type(light_pos)
36 | 
37 |     def __call__(self, vertices, triangles, bg, texture=None):
38 |         normal = get_normal(vertices, triangles)
39 | 
40 |         # 1. lighting
41 |         light = np.zeros_like(vertices, dtype=np.float32)
42 |         # ambient component
43 |         if self.intensity_ambient > 0:
44 |             light += self.intensity_ambient * self.color_ambient
45 | 
46 |         vertices_n = norm_vertices(vertices.copy())
47 |         if self.intensity_directional > 0:
48 |             # diffuse component
49 |             direction = _norm(self.light_pos - vertices_n)
50 |             cos = np.sum(normal * direction, axis=1)[:, None]
51 |             # cos = np.clip(cos, 0, 1)
52 |             #  todo: check below
53 |             light += self.intensity_directional * (self.color_directional * np.clip(cos, 0, 1))
54 | 
55 |             # specular component
56 |             if self.intensity_specular > 0:
57 |                 v2v = _norm(self.view_pos - vertices_n)
58 |                 reflection = 2 * cos * normal - direction
59 |                 spe = np.sum((v2v * reflection) ** self.specular_exp, axis=1)[:, None]
60 |                 spe = np.where(cos != 0, np.clip(spe, 0, 1), np.zeros_like(spe))
61 |                 light += self.intensity_specular * self.color_directional * np.clip(spe, 0, 1)
62 |         light = np.clip(light, 0, 1)
63 | 
64 |         # 2. rasterization, [0, 1]
65 |         if texture is None:
66 |             render_img = rasterize(vertices, triangles, light, bg=bg)
67 |             return render_img
68 |         else:
69 |             texture *= light
70 |             render_img = rasterize(vertices, triangles, texture, bg=bg)
71 |             return render_img
72 | 
73 | 
74 | def main():
75 |     pass
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/Sim3DR/readme.md:
--------------------------------------------------------------------------------
1 | ## Sim3DR
2 | This is a simple 3D render, written by c++ and cython. 
3 | 
4 | ### Build Sim3DR
5 | 
6 | ```shell script
7 | python3 setup.py build_ext --inplace
8 | ```


--------------------------------------------------------------------------------
/Sim3DR/setup.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | python setup.py build_ext -i
 3 | to compile
 4 | '''
 5 | 
 6 | from distutils.core import setup, Extension
 7 | from Cython.Build import cythonize
 8 | from Cython.Distutils import build_ext
 9 | import numpy
10 | 
11 | setup(
12 |     name='Sim3DR_Cython',  # not the package name
13 |     cmdclass={'build_ext': build_ext},
14 |     ext_modules=[Extension("Sim3DR_Cython",
15 |                            sources=["lib/rasterize.pyx", "lib/rasterize_kernel.cpp"],
16 |                            language='c++',
17 |                            include_dirs=[numpy.get_include()],
18 |                            extra_compile_args=["-std=c++11"])],
19 | )
20 | 


--------------------------------------------------------------------------------
/Sim3DR/tests/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | 


--------------------------------------------------------------------------------
/Sim3DR/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | set(TARGET test)
 4 | project(${TARGET})
 5 | 
 6 | #find_package( OpenCV REQUIRED )
 7 | #include_directories( ${OpenCV_INCLUDE_DIRS} )
 8 | 
 9 | #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -O3")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11")
11 | add_executable(${TARGET} test.cpp rasterize_kernel.cpp io.cpp)
12 | target_include_directories(${TARGET} PRIVATE ${PROJECT_SOURCE_DIR})
13 | 


--------------------------------------------------------------------------------
/Sim3DR/tests/io.cpp:
--------------------------------------------------------------------------------
 1 | #include "io.h"
 2 | 
 3 | //void load_obj(const string obj_fp, float* vertices, float* colors, float* triangles){
 4 | //    string line;
 5 | //    ifstream in(obj_fp);
 6 | //
 7 | //    if(in.is_open()){
 8 | //        while (getline(in, line)){
 9 | //            stringstream ss(line);
10 | //
11 | //            char t; // type: v, f
12 | //            ss >> t;
13 | //            if (t == 'v'){
14 | //
15 | //            }
16 | //        }
17 | //    }
18 | //}
19 | 
20 | void load_obj(const char *obj_fp, float *vertices, float *colors, int *triangles, int nver, int ntri) {
21 |     FILE *fp;
22 |     fp = fopen(obj_fp, "r");
23 | 
24 |     char t; // type: v or f
25 |     if (fp != nullptr) {
26 |         for (int i = 0; i < nver; ++i) {
27 |             fscanf(fp, "%c", &t);
28 |             for (int j = 0; j < 3; ++j)
29 |                 fscanf(fp, " %f", &vertices[3 * i + j]);
30 |             for (int j = 0; j < 3; ++j)
31 |                 fscanf(fp, " %f", &colors[3 * i + j]);
32 |             fscanf(fp, "\n");
33 |         }
34 | //        fscanf(fp, "%c", &t);
35 |         for (int i = 0; i < ntri; ++i) {
36 |             fscanf(fp, "%c", &t);
37 |             for (int j = 0; j < 3; ++j) {
38 |                 fscanf(fp, " %d", &triangles[3 * i + j]);
39 |                 triangles[3 * i + j] -= 1;
40 |             }
41 |             fscanf(fp, "\n");
42 |         }
43 | 
44 |         fclose(fp);
45 |     }
46 | }
47 | 
48 | void load_ply(const char *ply_fp, float *vertices, int *triangles, int nver, int ntri) {
49 |     FILE *fp;
50 |     fp = fopen(ply_fp, "r");
51 | 
52 | //    char s[256];
53 |     char t;
54 |     if (fp != nullptr) {
55 | //        for (int i = 0; i < 9; ++i)
56 | //            fscanf(fp, "%s", s);
57 |         for (int i = 0; i < nver; ++i)
58 |             fscanf(fp, "%f %f %f\n", &vertices[3 * i], &vertices[3 * i + 1], &vertices[3 * i + 2]);
59 | 
60 |         for (int i = 0; i < ntri; ++i)
61 |             fscanf(fp, "%c %d %d %d\n", &t, &triangles[3 * i], &triangles[3 * i + 1], &triangles[3 * i + 2]);
62 | 
63 |         fclose(fp);
64 |     }
65 | }
66 | 
67 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c) {
68 |     FILE *fp;
69 |     //open file for output
70 |     fp = fopen(filename, "wb");
71 |     if (!fp) {
72 |         fprintf(stderr, "Unable to open file '%s'\n", filename);
73 |         exit(1);
74 |     }
75 | 
76 |     //write the header file
77 |     //image format
78 |     fprintf(fp, "P6\n");
79 | 
80 |     //image size
81 |     fprintf(fp, "%d %d\n", w, h);
82 | 
83 |     // rgb component depth
84 |     fprintf(fp, "%d\n", MAX_PXL_VALUE);
85 | 
86 |     // pixel data
87 |     fwrite(img, sizeof(unsigned char), size_t(h * w * c), fp);
88 |     fclose(fp);
89 | }


--------------------------------------------------------------------------------
/Sim3DR/tests/io.h:
--------------------------------------------------------------------------------
 1 | #ifndef IO_H_
 2 | #define IO_H_
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <fstream>
 7 | #include <sstream>
 8 | #include <stdio.h>
 9 | 
10 | using namespace std;
11 | 
12 | #define MAX_PXL_VALUE 255
13 | 
14 | void load_obj(const char* obj_fp, float* vertices, float* colors, int* triangles, int nver, int ntri);
15 | void load_ply(const char* ply_fp, float* vertices, int* triangles, int nver, int ntri);
16 | 
17 | 
18 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c);
19 | 
20 | #endif


--------------------------------------------------------------------------------
/Sim3DR/tests/test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Tesing cases
  3 |  */
  4 | 
  5 | #include <iostream>
  6 | #include <time.h>
  7 | #include "rasterize.h"
  8 | #include "io.h"
  9 | 
 10 | void test_isPointInTri() {
 11 |     Point p0(0, 0);
 12 |     Point p1(1, 0);
 13 |     Point p2(1, 1);
 14 | 
 15 |     Point p(0.2, 0.2);
 16 | 
 17 |     if (is_point_in_tri(p, p0, p1, p2))
 18 |         std::cout << "In";
 19 |     else
 20 |         std::cout << "Out";
 21 |     std::cout << std::endl;
 22 | }
 23 | 
 24 | void test_getPointWeight() {
 25 |     Point p0(0, 0);
 26 |     Point p1(1, 0);
 27 |     Point p2(1, 1);
 28 | 
 29 |     Point p(0.2, 0.2);
 30 | 
 31 |     float weight[3];
 32 |     get_point_weight(weight, p, p0, p1, p2);
 33 |     std::cout << weight[0] << " " << weight[1] << " " << weight[2] << std::endl;
 34 | }
 35 | 
 36 | void test_get_tri_normal() {
 37 |     float tri_normal[3];
 38 | //    float vertices[9] = {1, 0, 0, 0, 0, 0, 0, 1, 0};
 39 |     float vertices[9] = {1, 1.1, 0, 0, 0, 0, 0, 0.6, 0.7};
 40 |     int triangles[3] = {0, 1, 2};
 41 |     int ntri = 1;
 42 | 
 43 |     _get_tri_normal(tri_normal, vertices, triangles, ntri);
 44 | 
 45 |     for (int i = 0; i < 3; ++i)
 46 |         std::cout << tri_normal[i] << ", ";
 47 |     std::cout << std::endl;
 48 | }
 49 | 
 50 | void test_load_obj() {
 51 |     const char *fp = "../data/vd005_mesh.obj";
 52 |     int nver = 35709;
 53 |     int ntri = 70789;
 54 | 
 55 |     auto *vertices = new float[nver];
 56 |     auto *colors = new float[nver];
 57 |     auto *triangles = new int[ntri];
 58 |     load_obj(fp, vertices, colors, triangles, nver, ntri);
 59 | 
 60 |     delete[] vertices;
 61 |     delete[] colors;
 62 |     delete[] triangles;
 63 | }
 64 | 
 65 | void test_render() {
 66 |     // 1. loading obj
 67 | //    const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/vd005_mesh.obj";
 68 |     const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/face1.obj";
 69 |     int nver = 35709; //53215; //35709;
 70 |     int ntri = 70789; //105840;//70789;
 71 | 
 72 |     auto *vertices = new float[3 * nver];
 73 |     auto *colors = new float[3 * nver];
 74 |     auto *triangles = new int[3 * ntri];
 75 |     load_obj(fp, vertices, colors, triangles, nver, ntri);
 76 | 
 77 |     // 2. rendering
 78 |     int h = 224, w = 224, c = 3;
 79 | 
 80 |     // enlarging
 81 |     int scale = 4;
 82 |     h *= scale;
 83 |     w *= scale;
 84 |     for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale;
 85 | 
 86 |     auto *image = new unsigned char[h * w * c]();
 87 |     auto *depth_buffer = new float[h * w]();
 88 | 
 89 |     for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999;
 90 | 
 91 |     clock_t t;
 92 |     t = clock();
 93 | 
 94 |     _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true);
 95 |     t = clock() - t;
 96 |     double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds
 97 |     printf("Render took %f seconds to execute \n", time_taken);
 98 | 
 99 | 
100 | //    auto *image_char = new u_char[h * w * c]();
101 | //    for (int i = 0; i < h * w * c; ++i)
102 | //        image_char[i] = u_char(255 * image[i]);
103 |     write_ppm("res.ppm", image, h, w, c);
104 | 
105 | //    delete[] image_char;
106 |     delete[] vertices;
107 |     delete[] colors;
108 |     delete[] triangles;
109 |     delete[] image;
110 |     delete[] depth_buffer;
111 | }
112 | 
113 | void test_light() {
114 |     // 1. loading obj
115 |     const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/emma_input_0_noheader.ply";
116 |     int nver = 53215; //35709;
117 |     int ntri = 105840; //70789;
118 | 
119 |     auto *vertices = new float[3 * nver];
120 |     auto *colors = new float[3 * nver];
121 |     auto *triangles = new int[3 * ntri];
122 |     load_ply(fp, vertices, triangles, nver, ntri);
123 | 
124 |     // 2. rendering
125 | //    int h = 1901, w = 3913, c = 3;
126 |     int h = 2000, w = 4000, c = 3;
127 | 
128 |     // enlarging
129 | //    int scale = 1;
130 | //    h *= scale;
131 | //    w *= scale;
132 | //    for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale;
133 | 
134 |     auto *image = new unsigned char[h * w * c]();
135 |     auto *depth_buffer = new float[h * w]();
136 | 
137 |     for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999;
138 |     for (int i = 0; i < 3 * nver; ++i) colors[i] = 0.8;
139 | 
140 |     clock_t t;
141 |     t = clock();
142 | 
143 |     _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true);
144 |     t = clock() - t;
145 |     double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds
146 |     printf("Render took %f seconds to execute \n", time_taken);
147 | 
148 | 
149 | //    auto *image_char = new u_char[h * w * c]();
150 | //    for (int i = 0; i < h * w * c; ++i)
151 | //        image_char[i] = u_char(255 * image[i]);
152 |     write_ppm("emma.ppm", image, h, w, c);
153 | 
154 | //    delete[] image_char;
155 |     delete[] vertices;
156 |     delete[] colors;
157 |     delete[] triangles;
158 |     delete[] image;
159 |     delete[] depth_buffer;
160 | }
161 | 
162 | int main(int argc, char *argv[]) {
163 | //    std::cout << "Hello CMake!" << std::endl;
164 | 
165 | //    test_isPointInTri();
166 | //    test_getPointWeight();
167 | //    test_get_tri_normal();
168 | //    test_load_obj();
169 | //    test_render();
170 |     test_light();
171 |     return 0;
172 | }


--------------------------------------------------------------------------------
/artistic.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as transforms
  3 | import numpy as np
  4 | import cv2
  5 | from utils.ddfa import ToTensor, Normalize
  6 | from model_building import SynergyNet
  7 | from utils.inference import crop_img, predict_denseVert
  8 | import argparse
  9 | import torch.backends.cudnn as cudnn
 10 | cudnn.benchmark = True
 11 | import os
 12 | import os.path as osp
 13 | import glob
 14 | from FaceBoxes import FaceBoxes
 15 | 
 16 | # Following 3DDFA-V2, we also use 120x120 resolution
 17 | IMG_SIZE = 120
 18 | 
 19 | def write_obj_with_colors(obj_name, vertices, triangles, colors):
 20 |     triangles = triangles.copy()
 21 | 
 22 |     if obj_name.split('.')[-1] != 'obj':
 23 |         obj_name = obj_name + '.obj'
 24 |     with open(obj_name, 'w') as f:
 25 |         for i in range(vertices.shape[1]):
 26 |             s = 'v {:.4f} {:.4f} {:.4f} {} {} {}\n'.format(vertices[0, i], vertices[1, i], vertices[2, i], colors[i, 2],
 27 |                                                colors[i, 1], colors[i, 0])
 28 |             f.write(s)
 29 |         for i in range(triangles.shape[1]):
 30 |             s = 'f {} {} {}\n'.format(triangles[0, i], triangles[1, i], triangles[2, i])
 31 |             f.write(s)
 32 | 
 33 | def main(args):
 34 |     # load pre-tained model
 35 |     checkpoint_fp = 'pretrained/best.pth.tar' 
 36 |     args.arch = 'mobilenet_v2'
 37 |     args.devices_id = [0]
 38 | 
 39 |     checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
 40 |     
 41 |     model = SynergyNet(args)
 42 |     model_dict = model.state_dict()
 43 | 
 44 |     # load BFM_UV mapping and kept indicies and deleted triangles
 45 |     uv_vert=np.load('3dmm_data/BFM_UV.npy')
 46 |     coord_u = (uv_vert[:,1]*255.0).astype(np.int32)
 47 |     coord_v = (uv_vert[:,0]*255.0).astype(np.int32)
 48 |     keep_ind = np.load('3dmm_data/keptInd.npy')
 49 |     tri_deletion = np.load('3dmm_data/deletedTri.npy')
 50 | 
 51 |     # because the model is trained by multiple gpus, prefix 'module' should be removed
 52 |     for k in checkpoint.keys():
 53 |         model_dict[k.replace('module.', '')] = checkpoint[k]
 54 | 
 55 |     model.load_state_dict(model_dict, strict=False)
 56 |     model = model.cuda()
 57 |     model.eval()
 58 | 
 59 |     # face detector
 60 |     face_boxes = FaceBoxes()
 61 | 
 62 |     # preparation
 63 |     transform = transforms.Compose([ToTensor(), Normalize(mean=127.5, std=128)])
 64 |     if osp.isdir(args.files):
 65 |         if not args.files[-1] == '/':
 66 |             args.files = args.files + '/'
 67 |         if not args.png:
 68 |             files = sorted(glob.glob(args.files+'*.jpg'))
 69 |         else:
 70 |             files = sorted(glob.glob(args.files+'*.png'))
 71 |     else:
 72 |         files = [args.files]
 73 | 
 74 |     for img_fp in files:
 75 |         print("Process the image: ", img_fp)
 76 | 
 77 |         img_ori = cv2.imread(img_fp)
 78 | 
 79 |         # crop faces
 80 |         rects = face_boxes(img_ori)
 81 | 
 82 |         # storage
 83 |         vertices_lst = []
 84 |         for rect in rects:
 85 |             roi_box = rect
 86 | 
 87 |             # enlarge the bbox a little and do a square crop
 88 |             HCenter = (rect[1] + rect[3])/2
 89 |             WCenter = (rect[0] + rect[2])/2
 90 |             side_len = roi_box[3]-roi_box[1]
 91 |             margin = side_len * 1.2 // 2
 92 |             roi_box[0], roi_box[1], roi_box[2], roi_box[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin
 93 | 
 94 |             img = crop_img(img_ori, roi_box)
 95 |             img = cv2.resize(img, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
 96 |             
 97 |             input = transform(img).unsqueeze(0)
 98 |             with torch.no_grad():
 99 |                 input = input.cuda()
100 |                 param = model.forward_test(input)
101 |                 param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
102 | 
103 |             # dense pts
104 |             vertices = predict_denseVert(param, roi_box, transform=True)
105 |             vertices_lst.append(vertices)
106 | 
107 |         # textured obj file output
108 |         if not osp.exists(f'inference_output/obj/'):
109 |             os.makedirs(f'inference_output/obj/')
110 |         
111 |         name = img_fp.rsplit('/',1)[-1][:-4] # drop off the extension
112 |         colors = cv2.imread(f'uv_art/{name}_fake_B.png',-1)
113 |         colors = np.flip(colors,axis=0)
114 |         colors_uv = (colors[coord_u, coord_v,:])
115 | 
116 |         wfp = f'inference_output/obj/{name}.obj'
117 |         write_obj_with_colors(wfp, vertices[:,keep_ind], tri_deletion, colors_uv[keep_ind,:].astype(np.float32))
118 | 
119 | if __name__ == '__main__':
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument('-f', '--files', default='', help='path to a single image or path to a folder containing multiple images')
122 |     parser.add_argument("--png", action="store_true", help="if images are with .png extension")
123 |     parser.add_argument('--img_size', default=120, type=int)
124 |     parser.add_argument('-b', '--batch-size', default=1, type=int)
125 | 
126 |     args = parser.parse_args()
127 |     main(args)


--------------------------------------------------------------------------------
/backbone_nets/ResNeSt/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnest import *
2 | from .ablation import *
3 | 


--------------------------------------------------------------------------------
/backbone_nets/ResNeSt/ablation.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## Email: zhanghang0704@gmail.com
  4 | ## Copyright (c) 2020
  5 | ##
  6 | ## LICENSE file in the root directory of this source tree 
  7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  8 | """ResNeSt ablation study models"""
  9 | 
 10 | import torch
 11 | from .resnet import ResNet, Bottleneck
 12 | 
 13 | __all__ = ['resnest50_fast_1s1x64d', 'resnest50_fast_2s1x64d', 'resnest50_fast_4s1x64d',
 14 |            'resnest50_fast_1s2x40d', 'resnest50_fast_2s2x40d', 'resnest50_fast_4s2x40d',
 15 |            'resnest50_fast_1s4x24d']
 16 | 
 17 | _url_format = 'https://s3.us-west-1.wasabisys.com/resnest/torch/{}-{}.pth'
 18 | 
 19 | _model_sha256 = {name: checksum for checksum, name in [
 20 |     ('d8fbf808', 'resnest50_fast_1s1x64d'),
 21 |     ('44938639', 'resnest50_fast_2s1x64d'),
 22 |     ('f74f3fc3', 'resnest50_fast_4s1x64d'),
 23 |     ('32830b84', 'resnest50_fast_1s2x40d'),
 24 |     ('9d126481', 'resnest50_fast_2s2x40d'),
 25 |     ('41d14ed0', 'resnest50_fast_4s2x40d'),
 26 |     ('d4a4f76f', 'resnest50_fast_1s4x24d'),
 27 |     ]}
 28 | 
 29 | def short_hash(name):
 30 |     if name not in _model_sha256:
 31 |         raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
 32 |     return _model_sha256[name][:8]
 33 | 
 34 | resnest_model_urls = {name: _url_format.format(name, short_hash(name)) for
 35 |     name in _model_sha256.keys()
 36 | }
 37 | 
 38 | def resnest50_fast_1s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
 39 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 40 |                    radix=1, groups=1, bottleneck_width=64,
 41 |                    deep_stem=True, stem_width=32, avg_down=True,
 42 |                    avd=True, avd_first=True, **kwargs)
 43 |     if pretrained:
 44 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 45 |             resnest_model_urls['resnest50_fast_1s1x64d'], progress=True, check_hash=True))
 46 |     return model
 47 | 
 48 | def resnest50_fast_2s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
 49 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 50 |                    radix=2, groups=1, bottleneck_width=64,
 51 |                    deep_stem=True, stem_width=32, avg_down=True,
 52 |                    avd=True, avd_first=True, **kwargs)
 53 |     if pretrained:
 54 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 55 |             resnest_model_urls['resnest50_fast_2s1x64d'], progress=True, check_hash=True))
 56 |     return model
 57 | 
 58 | def resnest50_fast_4s1x64d(pretrained=False, root='~/.encoding/models', **kwargs):
 59 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 60 |                    radix=4, groups=1, bottleneck_width=64,
 61 |                    deep_stem=True, stem_width=32, avg_down=True,
 62 |                    avd=True, avd_first=True, **kwargs)
 63 |     if pretrained:
 64 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 65 |             resnest_model_urls['resnest50_fast_4s1x64d'], progress=True, check_hash=True))
 66 |     return model
 67 | 
 68 | def resnest50_fast_1s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
 69 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 70 |                    radix=1, groups=2, bottleneck_width=40,
 71 |                    deep_stem=True, stem_width=32, avg_down=True,
 72 |                    avd=True, avd_first=True, **kwargs)
 73 |     if pretrained:
 74 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 75 |             resnest_model_urls['resnest50_fast_1s2x40d'], progress=True, check_hash=True))
 76 |     return model
 77 | 
 78 | def resnest50_fast_2s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
 79 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 80 |                    radix=2, groups=2, bottleneck_width=40,
 81 |                    deep_stem=True, stem_width=32, avg_down=True,
 82 |                    avd=True, avd_first=True, **kwargs)
 83 |     if pretrained:
 84 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 85 |             resnest_model_urls['resnest50_fast_2s2x40d'], progress=True, check_hash=True))
 86 |     return model
 87 | 
 88 | def resnest50_fast_4s2x40d(pretrained=False, root='~/.encoding/models', **kwargs):
 89 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
 90 |                    radix=4, groups=2, bottleneck_width=40,
 91 |                    deep_stem=True, stem_width=32, avg_down=True,
 92 |                    avd=True, avd_first=True, **kwargs)
 93 |     if pretrained:
 94 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
 95 |             resnest_model_urls['resnest50_fast_4s2x40d'], progress=True, check_hash=True))
 96 |     return model
 97 | 
 98 | def resnest50_fast_1s4x24d(pretrained=False, root='~/.encoding/models', **kwargs):
 99 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
100 |                    radix=1, groups=4, bottleneck_width=24,
101 |                    deep_stem=True, stem_width=32, avg_down=True,
102 |                    avd=True, avd_first=True, **kwargs)
103 |     if pretrained:
104 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
105 |             resnest_model_urls['resnest50_fast_1s4x24d'], progress=True, check_hash=True))
106 |     return model
107 | 


--------------------------------------------------------------------------------
/backbone_nets/ResNeSt/resnest.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## Email: zhanghang0704@gmail.com
 4 | ## Copyright (c) 2020
 5 | ##
 6 | ## LICENSE file in the root directory of this source tree 
 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 8 | """ResNeSt models"""
 9 | 
10 | import torch
11 | from .resnet import ResNet, Bottleneck
12 | 
13 | __all__ = ['resnest50', 'resnest101', 'resnest200', 'resnest269']
14 | 
15 | _url_format = 'https://s3.us-west-1.wasabisys.com/resnest/torch/{}-{}.pth'
16 | 
17 | _model_sha256 = {name: checksum for checksum, name in [
18 |     ('528c19ca', 'resnest50'),
19 |     ('22405ba7', 'resnest101'),
20 |     ('75117900', 'resnest200'),
21 |     ('0cc87c48', 'resnest269'),
22 |     ]}
23 | 
24 | def short_hash(name):
25 |     if name not in _model_sha256:
26 |         raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
27 |     return _model_sha256[name][:8]
28 | 
29 | resnest_model_urls = {name: _url_format.format(name, short_hash(name)) for
30 |     name in _model_sha256.keys()
31 | }
32 | 
33 | def resnest50(pretrained=False, root='~/.encoding/models', **kwargs):
34 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
35 |                    radix=2, groups=1, bottleneck_width=64,
36 |                    deep_stem=True, stem_width=32, avg_down=True,
37 |                    avd=True, avd_first=False, **kwargs)
38 |     if pretrained:
39 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
40 |             resnest_model_urls['resnest50'], progress=True, check_hash=True), strict=False)
41 |     return model
42 | 
43 | def resnest101(pretrained=False, root='~/.encoding/models', **kwargs):
44 |     model = ResNet(Bottleneck, [3, 4, 23, 3],
45 |                    radix=2, groups=1, bottleneck_width=64,
46 |                    deep_stem=True, stem_width=64, avg_down=True,
47 |                    avd=True, avd_first=False, **kwargs)
48 |     if pretrained:
49 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
50 |             resnest_model_urls['resnest101'], progress=True, check_hash=True), strict=False)
51 |     return model
52 | 
53 | def resnest200(pretrained=False, root='~/.encoding/models', **kwargs):
54 |     model = ResNet(Bottleneck, [3, 24, 36, 3],
55 |                    radix=2, groups=1, bottleneck_width=64,
56 |                    deep_stem=True, stem_width=64, avg_down=True,
57 |                    avd=True, avd_first=False, **kwargs)
58 |     if pretrained:
59 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
60 |             resnest_model_urls['resnest200'], progress=True, check_hash=True))
61 |     return model
62 | 
63 | def resnest269(pretrained=False, root='~/.encoding/models', **kwargs):
64 |     model = ResNet(Bottleneck, [3, 30, 48, 8],
65 |                    radix=2, groups=1, bottleneck_width=64,
66 |                    deep_stem=True, stem_width=64, avg_down=True,
67 |                    avd=True, avd_first=False, **kwargs)
68 |     if pretrained:
69 |         model.load_state_dict(torch.hub.load_state_dict_from_url(
70 |             resnest_model_urls['resnest269'], progress=True, check_hash=True))
71 |     return model
72 | 


--------------------------------------------------------------------------------
/backbone_nets/ResNeSt/splat.py:
--------------------------------------------------------------------------------
  1 | """Split-Attention"""
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as F
  6 | from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU
  7 | from torch.nn.modules.utils import _pair
  8 | 
  9 | __all__ = ['SplAtConv2d']
 10 | 
 11 | class SplAtConv2d(Module):
 12 |     """Split-Attention Conv2d
 13 |     """
 14 |     def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
 15 |                  dilation=(1, 1), groups=1, bias=True,
 16 |                  radix=2, reduction_factor=4,
 17 |                  rectify=False, rectify_avg=False, norm_layer=None,
 18 |                  dropblock_prob=0.0, **kwargs):
 19 |         super(SplAtConv2d, self).__init__()
 20 |         padding = _pair(padding)
 21 |         self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
 22 |         self.rectify_avg = rectify_avg
 23 |         inter_channels = max(in_channels*radix//reduction_factor, 32)
 24 |         self.radix = radix
 25 |         self.cardinality = groups
 26 |         self.channels = channels
 27 |         self.dropblock_prob = dropblock_prob
 28 |         if self.rectify:
 29 |             from rfconv import RFConv2d
 30 |             self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
 31 |                                  groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
 32 |         else:
 33 |             self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
 34 |                                groups=groups*radix, bias=bias, **kwargs)
 35 |         self.use_bn = norm_layer is not None
 36 |         if self.use_bn:
 37 |             self.bn0 = norm_layer(channels*radix)
 38 |         self.relu = ReLU(inplace=True)
 39 |         self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
 40 |         if self.use_bn:
 41 |             self.bn1 = norm_layer(inter_channels)
 42 |         self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
 43 |         if dropblock_prob > 0.0:
 44 |             self.dropblock = DropBlock2D(dropblock_prob, 3)
 45 |         self.rsoftmax = rSoftMax(radix, groups)
 46 | 
 47 |     def forward(self, x):
 48 |         x = self.conv(x)
 49 |         if self.use_bn:
 50 |             x = self.bn0(x)
 51 |         if self.dropblock_prob > 0.0:
 52 |             x = self.dropblock(x)
 53 |         x = self.relu(x)
 54 | 
 55 |         batch, rchannel = x.shape[:2]
 56 |         if self.radix > 1:
 57 |             if torch.__version__ < '1.5':
 58 |                 splited = torch.split(x, int(rchannel//self.radix), dim=1)
 59 |             else:
 60 |                 splited = torch.split(x, rchannel//self.radix, dim=1)
 61 |             gap = sum(splited) 
 62 |         else:
 63 |             gap = x
 64 |         gap = F.adaptive_avg_pool2d(gap, 1)
 65 |         gap = self.fc1(gap)
 66 | 
 67 |         if self.use_bn:
 68 |             gap = self.bn1(gap)
 69 |         gap = self.relu(gap)
 70 | 
 71 |         atten = self.fc2(gap)
 72 |         atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
 73 | 
 74 |         if self.radix > 1:
 75 |             if torch.__version__ < '1.5':
 76 |                 attens = torch.split(atten, int(rchannel//self.radix), dim=1)
 77 |             else:
 78 |                 attens = torch.split(atten, rchannel//self.radix, dim=1)
 79 |             out = sum([att*split for (att, split) in zip(attens, splited)])
 80 |         else:
 81 |             out = atten * x
 82 |         return out.contiguous()
 83 | 
 84 | class rSoftMax(nn.Module):
 85 |     def __init__(self, radix, cardinality):
 86 |         super().__init__()
 87 |         self.radix = radix
 88 |         self.cardinality = cardinality
 89 | 
 90 |     def forward(self, x):
 91 |         batch = x.size(0)
 92 |         if self.radix > 1:
 93 |             x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
 94 |             x = F.softmax(x, dim=1)
 95 |             x = x.reshape(batch, -1)
 96 |         else:
 97 |             x = torch.sigmoid(x)
 98 |         return x
 99 | 
100 | 


--------------------------------------------------------------------------------
/backbone_nets/ghostnet_backbone.py:
--------------------------------------------------------------------------------
  1 | # 2020.06.09-Changed for building GhostNet
  2 | #            Huawei Technologies Co., Ltd. <foss@huawei.com>
  3 | """
  4 | Creates a GhostNet Model as defined in:
  5 | GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu.
  6 | https://arxiv.org/abs/1911.11907
  7 | Modified from https://github.com/d-li14/mobilenetv3.pytorch and https://github.com/rwightman/pytorch-image-models
  8 | """
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | import math
 13 | 
 14 | 
 15 | __all__ = ['ghostnet']
 16 | 
 17 | 
 18 | def _make_divisible(v, divisor, min_value=None):
 19 |     """
 20 |     This function is taken from the original tf repo.
 21 |     It ensures that all layers have a channel number that is divisible by 8
 22 |     It can be seen here:
 23 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 24 |     """
 25 |     if min_value is None:
 26 |         min_value = divisor
 27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 28 |     # Make sure that round down does not go down by more than 10%.
 29 |     if new_v < 0.9 * v:
 30 |         new_v += divisor
 31 |     return new_v
 32 | 
 33 | 
 34 | def hard_sigmoid(x, inplace: bool = False):
 35 |     if inplace:
 36 |         return x.add_(3.).clamp_(0., 6.).div_(6.)
 37 |     else:
 38 |         return F.relu6(x + 3.) / 6.
 39 | 
 40 | 
 41 | class SqueezeExcite(nn.Module):
 42 |     def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,
 43 |                  act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_):
 44 |         super(SqueezeExcite, self).__init__()
 45 |         self.gate_fn = gate_fn
 46 |         reduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)
 47 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 48 |         self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
 49 |         self.act1 = act_layer(inplace=True)
 50 |         self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
 51 | 
 52 |     def forward(self, x):
 53 |         x_se = self.avg_pool(x)
 54 |         x_se = self.conv_reduce(x_se)
 55 |         x_se = self.act1(x_se)
 56 |         x_se = self.conv_expand(x_se)
 57 |         x = x * self.gate_fn(x_se)
 58 |         return x    
 59 | 
 60 |     
 61 | class ConvBnAct(nn.Module):
 62 |     def __init__(self, in_chs, out_chs, kernel_size,
 63 |                  stride=1, act_layer=nn.ReLU):
 64 |         super(ConvBnAct, self).__init__()
 65 |         self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size//2, bias=False)
 66 |         self.bn1 = nn.BatchNorm2d(out_chs)
 67 |         self.act1 = act_layer(inplace=True)
 68 | 
 69 |     def forward(self, x):
 70 |         x = self.conv(x)
 71 |         x = self.bn1(x)
 72 |         x = self.act1(x)
 73 |         return x
 74 | 
 75 | 
 76 | class GhostModule(nn.Module):
 77 |     def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True):
 78 |         super(GhostModule, self).__init__()
 79 |         self.oup = oup
 80 |         init_channels = math.ceil(oup / ratio)
 81 |         new_channels = init_channels*(ratio-1)
 82 | 
 83 |         self.primary_conv = nn.Sequential(
 84 |             nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
 85 |             nn.BatchNorm2d(init_channels),
 86 |             nn.ReLU(inplace=True) if relu else nn.Sequential(),
 87 |         )
 88 | 
 89 |         self.cheap_operation = nn.Sequential(
 90 |             nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
 91 |             nn.BatchNorm2d(new_channels),
 92 |             nn.ReLU(inplace=True) if relu else nn.Sequential(),
 93 |         )
 94 | 
 95 |     def forward(self, x):
 96 |         x1 = self.primary_conv(x)
 97 |         x2 = self.cheap_operation(x1)
 98 |         out = torch.cat([x1,x2], dim=1)
 99 |         return out[:,:self.oup,:,:]
100 | 
101 | 
102 | class GhostBottleneck(nn.Module):
103 |     """ Ghost bottleneck w/ optional SE"""
104 | 
105 |     def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3,
106 |                  stride=1, act_layer=nn.ReLU, se_ratio=0.):
107 |         super(GhostBottleneck, self).__init__()
108 |         has_se = se_ratio is not None and se_ratio > 0.
109 |         self.stride = stride
110 | 
111 |         # Point-wise expansion
112 |         self.ghost1 = GhostModule(in_chs, mid_chs, relu=True)
113 | 
114 |         # Depth-wise convolution
115 |         if self.stride > 1:
116 |             self.conv_dw = nn.Conv2d(mid_chs, mid_chs, dw_kernel_size, stride=stride,
117 |                              padding=(dw_kernel_size-1)//2,
118 |                              groups=mid_chs, bias=False)
119 |             self.bn_dw = nn.BatchNorm2d(mid_chs)
120 | 
121 |         # Squeeze-and-excitation
122 |         if has_se:
123 |             self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)
124 |         else:
125 |             self.se = None
126 | 
127 |         # Point-wise linear projection
128 |         self.ghost2 = GhostModule(mid_chs, out_chs, relu=False)
129 |         
130 |         # shortcut
131 |         if (in_chs == out_chs and self.stride == 1):
132 |             self.shortcut = nn.Sequential()
133 |         else:
134 |             self.shortcut = nn.Sequential(
135 |                 nn.Conv2d(in_chs, in_chs, dw_kernel_size, stride=stride,
136 |                        padding=(dw_kernel_size-1)//2, groups=in_chs, bias=False),
137 |                 nn.BatchNorm2d(in_chs),
138 |                 nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
139 |                 nn.BatchNorm2d(out_chs),
140 |             )
141 | 
142 | 
143 |     def forward(self, x):
144 |         residual = x
145 | 
146 |         # 1st ghost bottleneck
147 |         x = self.ghost1(x)
148 | 
149 |         # Depth-wise convolution
150 |         if self.stride > 1:
151 |             x = self.conv_dw(x)
152 |             x = self.bn_dw(x)
153 | 
154 |         # Squeeze-and-excitation
155 |         if self.se is not None:
156 |             x = self.se(x)
157 | 
158 |         # 2nd ghost bottleneck
159 |         x = self.ghost2(x)
160 |         
161 |         x += self.shortcut(residual)
162 |         return x
163 | 
164 | 
165 | class GhostNet(nn.Module):
166 |     def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2):
167 |         super(GhostNet, self).__init__()
168 |         # setting of inverted residual blocks
169 |         self.cfgs = cfgs
170 |         self.dropout = dropout
171 | 
172 |         # building first layer
173 |         output_channel = _make_divisible(16 * width, 4)
174 |         self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False)
175 |         self.bn1 = nn.BatchNorm2d(output_channel)
176 |         self.act1 = nn.ReLU(inplace=True)
177 |         input_channel = output_channel
178 | 
179 |         # building inverted residual blocks
180 |         stages = []
181 |         block = GhostBottleneck
182 |         for cfg in self.cfgs:
183 |             layers = []
184 |             for k, exp_size, c, se_ratio, s in cfg:
185 |                 output_channel = _make_divisible(c * width, 4)
186 |                 hidden_channel = _make_divisible(exp_size * width, 4)
187 |                 layers.append(block(input_channel, hidden_channel, output_channel, k, s,
188 |                               se_ratio=se_ratio))
189 |                 input_channel = output_channel
190 |             stages.append(nn.Sequential(*layers))
191 | 
192 |         output_channel = _make_divisible(exp_size * width, 4)
193 |         stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1)))
194 |         input_channel = output_channel
195 |         
196 |         self.blocks = nn.Sequential(*stages)        
197 | 
198 |         # building last several layers
199 |         output_channel = 1280
200 |         self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
201 |         self.conv_head = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
202 |         self.act2 = nn.ReLU(inplace=True)
203 | 
204 |         self.num_ori = 12
205 |         self.num_shape = 40
206 |         self.num_exp = 10
207 |         self.num_texture = 40
208 | 
209 |         self.classifier_ori = nn.Linear(output_channel, self.num_ori)
210 |         self.classifier_shape = nn.Linear(output_channel, self.num_shape)
211 |         self.classifier_exp = nn.Linear(output_channel, self.num_exp)
212 |         self.classifier_texture = nn.Linear(output_channel, self.num_texture)
213 | 
214 |     def forward(self, x):
215 |         x = self.conv_stem(x)
216 |         x = self.bn1(x)
217 |         x = self.act1(x)
218 |         x = self.blocks(x)
219 |         x = self.global_pool(x)
220 |         x = self.conv_head(x)
221 |         x = self.act2(x)
222 |         x = x.view(x.size(0), -1)
223 |         if self.dropout > 0.:
224 |             x = F.dropout(x, p=self.dropout, training=self.training)
225 |         #x = self.classifier(x)
226 | 
227 |         x_ori = self.classifier_ori(x)
228 |         x_shape = self.classifier_shape(x)
229 |         x_exp = self.classifier_exp(x)
230 |         x_tex = self.classifier_texture(x)
231 |         x = torch.cat((x_ori, x_shape, x_exp, x_tex), dim=1)
232 | 
233 |         return x
234 | 
235 | 
236 | def ghostnet(**kwargs):
237 |     """
238 |     Constructs a GhostNet model
239 |     """
240 |     cfgs = [
241 |         # k, t, c, SE, s 
242 |         # stage1
243 |         [[3,  16,  16, 0, 1]],
244 |         # stage2
245 |         [[3,  48,  24, 0, 2]],
246 |         [[3,  72,  24, 0, 1]],
247 |         # stage3
248 |         [[5,  72,  40, 0.25, 2]],
249 |         [[5, 120,  40, 0.25, 1]],
250 |         # stage4
251 |         [[3, 240,  80, 0, 2]],
252 |         [[3, 200,  80, 0, 1],
253 |          [3, 184,  80, 0, 1],
254 |          [3, 184,  80, 0, 1],
255 |          [3, 480, 112, 0.25, 1],
256 |          [3, 672, 112, 0.25, 1]
257 |         ],
258 |         # stage5
259 |         [[5, 672, 160, 0.25, 2]],
260 |         [[5, 960, 160, 0, 1],
261 |          [5, 960, 160, 0.25, 1],
262 |          [5, 960, 160, 0, 1],
263 |          [5, 960, 160, 0.25, 1]
264 |         ]
265 |     ]
266 |     return GhostNet(cfgs, **kwargs)
267 | 
268 | 
269 | if __name__=='__main__':
270 |     model = ghostnet()
271 |     model.eval()
272 |     print(model)
273 |     input = torch.randn(32,3,320,256)
274 |     y = model(input)
275 |     print(y.size())


--------------------------------------------------------------------------------
/backbone_nets/mobilenetv1_backbone.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding: utf-8
  3 | 
  4 | from __future__ import division
  5 | 
  6 | """ 
  7 | Creates a MobileNet Model as defined in:
  8 | Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017). 
  9 | MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. 
 10 | Copyright (c) Yang Lu, 2017
 11 | 
 12 | Modified By cleardusk
 13 | """
 14 | import math
 15 | import torch
 16 | import torch.nn as nn
 17 | 
 18 | __all__ = ['mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05', 'mobilenet_025']
 19 | 
 20 | 
 21 | class DepthWiseBlock(nn.Module):
 22 |     def __init__(self, inplanes, planes, stride=1, prelu=False):
 23 |         super(DepthWiseBlock, self).__init__()
 24 |         inplanes, planes = int(inplanes), int(planes)
 25 |         self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=1, stride=stride, groups=inplanes,
 26 |                                  bias=False)
 27 |         self.bn_dw = nn.BatchNorm2d(inplanes)
 28 |         self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)
 29 |         self.bn_sep = nn.BatchNorm2d(planes)
 30 |         if prelu:
 31 |             self.relu = nn.PReLU()
 32 |         else:
 33 |             self.relu = nn.ReLU(inplace=True)
 34 | 
 35 |     def forward(self, x):
 36 |         out = self.conv_dw(x)
 37 |         out = self.bn_dw(out)
 38 |         out = self.relu(out)
 39 | 
 40 |         out = self.conv_sep(out)
 41 |         out = self.bn_sep(out)
 42 |         out = self.relu(out)
 43 | 
 44 |         return out
 45 | 
 46 | 
 47 | class MobileNet(nn.Module):
 48 |     def __init__(self, widen_factor=1.0, num_classes=1000, prelu=False, input_channel=3):
 49 |         """ Constructor
 50 |         Args:
 51 |             widen_factor: config of widen_factor
 52 |             num_classes: number of classes
 53 |         """
 54 |         super(MobileNet, self).__init__()
 55 | 
 56 |         block = DepthWiseBlock
 57 |         self.conv1 = nn.Conv2d(input_channel, int(32 * widen_factor), kernel_size=3, stride=2, padding=1,
 58 |                                bias=False)
 59 | 
 60 |         self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
 61 |         if prelu:
 62 |             self.relu = nn.PReLU()
 63 |         else:
 64 |             self.relu = nn.ReLU(inplace=True)
 65 | 
 66 |         self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu)
 67 |         self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2, prelu=prelu)
 68 | 
 69 |         self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu)
 70 |         self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2, prelu=prelu)
 71 | 
 72 |         self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu)
 73 |         self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2, prelu=prelu)
 74 | 
 75 |         self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
 76 |         self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
 77 |         self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
 78 |         self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
 79 |         self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
 80 |         self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2, prelu=prelu)
 81 | 
 82 |         self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu)
 83 | 
 84 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
 85 |         #self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
 86 | 
 87 |         self.num_ori = 12
 88 |         self.num_shape = 40
 89 |         self.num_exp = 10
 90 |         self.num_texture = 40
 91 | 
 92 |         #self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
 93 |         
 94 |         ### Multi-decoder output
 95 |         self.fc_ori = nn.Linear(int(1024 * widen_factor), self.num_ori)
 96 |         self.fc_shape = nn.Linear(int(1024 * widen_factor), self.num_shape)
 97 |         self.fc_exp = nn.Linear(int(1024 * widen_factor), self.num_exp)
 98 |         self.fc_tex = nn.Linear(int(1024 * widen_factor), self.num_texture)
 99 | 
100 |         for m in self.modules():
101 |             if isinstance(m, nn.Conv2d):
102 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
103 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
104 |             elif isinstance(m, nn.BatchNorm2d):
105 |                 m.weight.data.fill_(1)
106 |                 m.bias.data.zero_()
107 | 
108 |     def forward(self, x):
109 |         x = self.conv1(x)
110 |         x = self.bn1(x)
111 |         x = self.relu(x)
112 | 
113 |         x = self.dw2_1(x)
114 |         x = self.dw2_2(x)
115 |         x = self.dw3_1(x)
116 |         x = self.dw3_2(x)
117 |         x = self.dw4_1(x)
118 |         x = self.dw4_2(x)
119 |         x = self.dw5_1(x)
120 |         x = self.dw5_2(x)
121 |         x = self.dw5_3(x)
122 |         x = self.dw5_4(x)
123 |         x = self.dw5_5(x)
124 |         x = self.dw5_6(x)
125 |         x = self.dw6(x)
126 | 
127 |         x = self.avgpool(x)
128 |         x = x.view(x.size(0), -1)
129 |         #x = self.fc(x)
130 | 
131 |         ### Multi-decoder output
132 |         x_ori = self.fc_ori(x)
133 |         x_shp = self.fc_shape(x)
134 |         x_exp = self.fc_exp(x)
135 |         #x = torch.cat((x_ori, x_shp, x_exp), dim=1)
136 |         
137 |         x_tex = self.fc_tex(x)
138 |         x = torch.cat((x_ori, x_shp, x_exp, x_tex), dim=1)
139 | 
140 |         return x
141 | 
142 | class MobileNet_ori(nn.Module):
143 |     def __init__(self, widen_factor=1.0, num_classes=1000, prelu=False, input_channel=3):
144 |         """ Constructor
145 |         Args:
146 |             widen_factor: config of widen_factor
147 |             num_classes: number of classes
148 |         """
149 |         super(MobileNet_ori, self).__init__()
150 | 
151 |         block = DepthWiseBlock
152 |         self.conv1 = nn.Conv2d(input_channel, int(32 * widen_factor), kernel_size=3, stride=2, padding=1,
153 |                                bias=False)
154 | 
155 |         self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
156 |         if prelu:
157 |             self.relu = nn.PReLU()
158 |         else:
159 |             self.relu = nn.ReLU(inplace=True)
160 | 
161 |         self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu)
162 |         self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2, prelu=prelu)
163 | 
164 |         self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu)
165 |         self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2, prelu=prelu)
166 | 
167 |         self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu)
168 |         self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2, prelu=prelu)
169 | 
170 |         self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
171 |         self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
172 |         self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
173 |         self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
174 |         self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
175 |         self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2, prelu=prelu)
176 | 
177 |         self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu)
178 | 
179 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
180 |         self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
181 | 
182 |         for m in self.modules():
183 |             if isinstance(m, nn.Conv2d):
184 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
185 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
186 |             elif isinstance(m, nn.BatchNorm2d):
187 |                 m.weight.data.fill_(1)
188 |                 m.bias.data.zero_()
189 | 
190 |     def forward(self, x):
191 |         x = self.conv1(x)
192 |         x = self.bn1(x)
193 |         x = self.relu(x)
194 | 
195 |         x = self.dw2_1(x)
196 |         x = self.dw2_2(x)
197 |         x = self.dw3_1(x)
198 |         x = self.dw3_2(x)
199 |         x = self.dw4_1(x)
200 |         x = self.dw4_2(x)
201 |         x = self.dw5_1(x)
202 |         x = self.dw5_2(x)
203 |         x = self.dw5_3(x)
204 |         x = self.dw5_4(x)
205 |         x = self.dw5_5(x)
206 |         x = self.dw5_6(x)
207 |         x = self.dw6(x)
208 | 
209 |         x = self.avgpool(x)
210 |         x = x.view(x.size(0), -1)
211 |         x = self.fc(x)
212 |         return x
213 | 
214 | 
215 | def mobilenet(widen_factor=1.0, num_classes=1000):
216 |     """
217 |     Construct MobileNet.
218 |     widen_factor=1.0  for mobilenet_1
219 |     widen_factor=0.75 for mobilenet_075
220 |     widen_factor=0.5  for mobilenet_05
221 |     widen_factor=0.25 for mobilenet_025
222 |     """
223 |     model = MobileNet(widen_factor=widen_factor, num_classes=num_classes)
224 |     return model
225 | 
226 | 
227 | def mobilenet_2(num_classes=62, input_channel=3):
228 |     model = MobileNet(widen_factor=2.0, num_classes=num_classes, input_channel=input_channel)
229 |     return model
230 | 
231 | 
232 | def mobilenet_1(num_classes=62, input_channel=3):
233 |     #model = MobileNet(widen_factor=1.0, num_classes=num_classes, input_channel=input_channel)
234 |     model = MobileNet(widen_factor=1.0, num_classes=num_classes, input_channel=input_channel)
235 |     return model
236 | 
237 | 
238 | def mobilenet_075(num_classes=62, input_channel=3):
239 |     model = MobileNet(widen_factor=0.75, num_classes=num_classes, input_channel=input_channel)
240 |     return model
241 | 
242 | 
243 | def mobilenet_05(num_classes=62, input_channel=3):
244 |     model = MobileNet(widen_factor=0.5, num_classes=num_classes, input_channel=input_channel)
245 |     return model
246 | 
247 | 
248 | def mobilenet_025(num_classes=62, input_channel=3):
249 |     model = MobileNet(widen_factor=0.25, num_classes=num_classes, input_channel=input_channel)
250 |     return model
251 | 


--------------------------------------------------------------------------------
/backbone_nets/mobilenetv2_backbone.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | __all__ = ['MobileNetV2', 'mobilenet_v2']
  6 | 
  7 | 
  8 | model_urls = {
  9 |     'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
 10 | }
 11 | 
 12 | 
 13 | def _make_divisible(v, divisor, min_value=None):
 14 |     """
 15 |     This function is taken from the original tf repo.
 16 |     It ensures that all layers have a channel number that is divisible by 8
 17 |     It can be seen here:
 18 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 19 |     :param v:
 20 |     :param divisor:
 21 |     :param min_value:
 22 |     :return:
 23 |     """
 24 |     if min_value is None:
 25 |         min_value = divisor
 26 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 27 |     # Make sure that round down does not go down by more than 10%.
 28 |     if new_v < 0.9 * v:
 29 |         new_v += divisor
 30 |     return new_v
 31 | 
 32 | 
 33 | class ConvBNReLU(nn.Sequential):
 34 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
 35 |         padding = (kernel_size - 1) // 2
 36 |         if norm_layer is None:
 37 |             norm_layer = nn.BatchNorm2d
 38 |         super(ConvBNReLU, self).__init__(
 39 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 40 |             norm_layer(out_planes),
 41 |             nn.ReLU6(inplace=True)
 42 |         )
 43 | 
 44 | 
 45 | class InvertedResidual(nn.Module):
 46 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
 47 |         super(InvertedResidual, self).__init__()
 48 |         self.stride = stride
 49 |         assert stride in [1, 2]
 50 | 
 51 |         if norm_layer is None:
 52 |             norm_layer = nn.BatchNorm2d
 53 | 
 54 |         hidden_dim = int(round(inp * expand_ratio))
 55 |         self.use_res_connect = self.stride == 1 and inp == oup
 56 | 
 57 |         layers = []
 58 |         if expand_ratio != 1:
 59 |             # pw
 60 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
 61 |         layers.extend([
 62 |             # dw
 63 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
 64 |             # pw-linear
 65 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 66 |             norm_layer(oup),
 67 |         ])
 68 |         self.conv = nn.Sequential(*layers)
 69 | 
 70 |     def forward(self, x):
 71 |         if self.use_res_connect:
 72 |             return x + self.conv(x)
 73 |         else:
 74 |             return self.conv(x)
 75 | 
 76 | 
 77 | class MobileNetV2(nn.Module):
 78 |     def __init__(self,
 79 |                  num_classes=1000,
 80 |                  width_mult=1.0,
 81 |                  inverted_residual_setting=None,
 82 |                  round_nearest=8,
 83 |                  block=None,
 84 |                  norm_layer=None):
 85 |         """
 86 |         MobileNet V2 main class
 87 |         Args:
 88 |             num_classes (int): Number of classes
 89 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 90 |             inverted_residual_setting: Network structure
 91 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 92 |             Set to 1 to turn off rounding
 93 |             block: Module specifying inverted residual building block for mobilenet
 94 |             norm_layer: Module specifying the normalization layer to use
 95 |         """
 96 |         super(MobileNetV2, self).__init__()
 97 | 
 98 |         if block is None:
 99 |             block = InvertedResidual
100 | 
101 |         if norm_layer is None:
102 |             norm_layer = nn.BatchNorm2d
103 | 
104 |         input_channel = 32
105 |         last_channel = 1280
106 | 
107 |         if inverted_residual_setting is None:
108 |             inverted_residual_setting = [
109 |                 # t, c, n, s
110 |                 [1, 16, 1, 1],
111 |                 [6, 24, 2, 2],
112 |                 [6, 32, 3, 2],
113 |                 [6, 64, 4, 2],
114 |                 [6, 96, 3, 1],
115 |                 [6, 160, 3, 2],
116 |                 [6, 320, 1, 1],
117 |             ]
118 | 
119 |         # only check the first element, assuming user knows t,c,n,s are required
120 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
121 |             raise ValueError("inverted_residual_setting should be non-empty "
122 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
123 | 
124 |         # building first layer
125 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
126 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
127 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
128 |         # building inverted residual blocks
129 |         for t, c, n, s in inverted_residual_setting:
130 |             output_channel = _make_divisible(c * width_mult, round_nearest)
131 |             for i in range(n):
132 |                 stride = s if i == 0 else 1
133 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
134 |                 input_channel = output_channel
135 |         # building last several layers
136 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
137 |         # make it nn.Sequential
138 |         self.features = nn.Sequential(*features)
139 | 
140 |         # building classifier
141 |         
142 |         self.num_ori = 12
143 |         self.num_shape = 40
144 |         self.num_exp = 10
145 | 
146 | 
147 |         self.classifier_ori = nn.Sequential(
148 |             nn.Dropout(0.2),
149 |             nn.Linear(self.last_channel, self.num_ori),
150 |         )
151 |         self.classifier_shape = nn.Sequential(
152 |             nn.Dropout(0.2),
153 |             nn.Linear(self.last_channel, self.num_shape),
154 |         )
155 |         self.classifier_exp = nn.Sequential(
156 |             nn.Dropout(0.2),
157 |             nn.Linear(self.last_channel, self.num_exp),
158 |         )
159 | 
160 |         # weight initialization
161 |         for m in self.modules():
162 |             if isinstance(m, nn.Conv2d):
163 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
164 |                 if m.bias is not None:
165 |                     nn.init.zeros_(m.bias)
166 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
167 |                 nn.init.ones_(m.weight)
168 |                 nn.init.zeros_(m.bias)
169 |             elif isinstance(m, nn.Linear):
170 |                 nn.init.normal_(m.weight, 0, 0.01)
171 |                 nn.init.zeros_(m.bias)
172 | 
173 |     def _forward_impl(self, x):
174 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
175 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
176 | 
177 |         x = self.features(x)
178 | 
179 |         x = nn.functional.adaptive_avg_pool2d(x, 1)
180 |         x = x.reshape(x.shape[0], -1)
181 | 
182 |         pool_x = x.clone()
183 | 
184 |         x_ori = self.classifier_ori(x)
185 |         x_shape = self.classifier_shape(x)
186 |         x_exp = self.classifier_exp(x)
187 | 
188 |         x = torch.cat((x_ori, x_shape, x_exp), dim=1)
189 |         return x, pool_x
190 | 
191 |     def forward(self, x):
192 |         return self._forward_impl(x)
193 | 
194 | 
195 | def mobilenet_v2(pretrained=False, progress=True, **kwargs):
196 |     """
197 |     Constructs a MobileNetV2 architecture from
198 |     `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
199 |     Args:
200 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
201 |         progress (bool): If True, displays a progress bar of the download to stderr
202 |     """
203 |     model = MobileNetV2(**kwargs)
204 |     if pretrained:
205 |         state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
206 |                                               progress=progress)
207 |         model.load_state_dict(state_dict, strict=False)
208 |     return model


--------------------------------------------------------------------------------
/backbone_nets/pointnet_backbone.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | 
  5 | __all__ = ['MLP_for', 'MLP_rev']
  6 | 
  7 | class MLP_for(nn.Module):
  8 | 	def __init__(self, num_pts):
  9 | 		super(MLP_for,self).__init__()
 10 | 		self.conv1 = torch.nn.Conv1d(3,64,1)
 11 | 		self.conv2 = torch.nn.Conv1d(64,64,1)
 12 | 		self.conv3 = torch.nn.Conv1d(64,64,1)
 13 | 		self.conv4 = torch.nn.Conv1d(64,128,1)
 14 | 		self.conv5 = torch.nn.Conv1d(128,1024,1)
 15 | 		self.conv6 = nn.Conv1d(2418, 512, 1) # 1024 + 64 + 1280 = 2368
 16 | 		self.conv7 = nn.Conv1d(512, 256, 1)
 17 | 		self.conv8 = nn.Conv1d(256, 128, 1)
 18 | 		self.conv9 = nn.Conv1d(128, 3, 1)
 19 | 		self.bn1 = nn.BatchNorm1d(64)
 20 | 		self.bn2 = nn.BatchNorm1d(64)
 21 | 		self.bn3 = nn.BatchNorm1d(64)
 22 | 		self.bn4 = nn.BatchNorm1d(128)
 23 | 		self.bn5 = nn.BatchNorm1d(1024)
 24 | 		self.bn6 = nn.BatchNorm1d(512)
 25 | 		self.bn7 = nn.BatchNorm1d(256)
 26 | 		self.bn8 = nn.BatchNorm1d(128)
 27 | 		self.bn9 = nn.BatchNorm1d(3)
 28 | 		self.num_pts = num_pts
 29 | 		self.max_pool = nn.MaxPool1d(num_pts)
 30 | 		
 31 | 	def forward(self,x, other_input1=None, other_input2=None, other_input3=None):
 32 | 		out = F.relu(self.bn1(self.conv1(x)))
 33 | 		out = F.relu(self.bn2(self.conv2(out)))
 34 | 		point_features = out
 35 | 		out = F.relu(self.bn3(self.conv3(out)))
 36 | 		out = F.relu(self.bn4(self.conv4(out)))
 37 | 		out = F.relu(self.bn5(self.conv5(out)))
 38 | 		global_features = self.max_pool(out)
 39 | 		global_features_repeated = global_features.repeat(1,1, self.num_pts)
 40 | 
 41 | 		#out = F.relu(self.bn6(self.conv6(torch.cat([point_features, global_features_repeated],1))))
 42 | 		
 43 | 		# Avg_pool
 44 | 		# avgpool = other_input1
 45 | 		# avgpool = avgpool.unsqueeze(2).repeat(1,1,self.num_pts)
 46 | 		# out = F.relu(self.bn6(self.conv6(torch.cat([point_features, global_features_repeated, avgpool],1))))
 47 | 
 48 | 		#3DMMImg
 49 | 		avgpool = other_input1
 50 | 		avgpool = avgpool.unsqueeze(2).repeat(1,1,self.num_pts)
 51 | 		
 52 | 		shape_code = other_input2
 53 | 		shape_code = shape_code.unsqueeze(2).repeat(1,1,self.num_pts)
 54 | 
 55 | 		expr_code = other_input3
 56 | 		expr_code = expr_code.unsqueeze(2).repeat(1,1,self.num_pts)
 57 | 
 58 | 		out = F.relu(self.bn6(self.conv6(torch.cat([point_features, global_features_repeated, avgpool, shape_code, expr_code],1))))
 59 | 
 60 | 
 61 | 		out = F.relu(self.bn7(self.conv7(out)))
 62 | 		out = F.relu(self.bn8(self.conv8(out)))
 63 | 		out = F.relu(self.bn9(self.conv9(out)))
 64 | 		return out
 65 | 
 66 | 
 67 | class MLP_rev(nn.Module):
 68 | 	def __init__(self, num_pts):
 69 | 		super(MLP_rev,self).__init__()
 70 | 		self.conv1 = torch.nn.Conv1d(3,64,1)
 71 | 		self.conv2 = torch.nn.Conv1d(64,64,1)
 72 | 		self.conv3 = torch.nn.Conv1d(64,64,1)
 73 | 		self.conv4 = torch.nn.Conv1d(64,128,1)
 74 | 		self.conv5 = torch.nn.Conv1d(128,1024,1)
 75 | 		self.conv6_1 = nn.Conv1d(1024, 12, 1)
 76 | 		self.conv6_2 = nn.Conv1d(1024, 40, 1)
 77 | 		self.conv6_3 = nn.Conv1d(1024, 10, 1)
 78 | 
 79 | 		self.bn1 = nn.BatchNorm1d(64)
 80 | 		self.bn2 = nn.BatchNorm1d(64)
 81 | 		self.bn3 = nn.BatchNorm1d(64)
 82 | 		self.bn4 = nn.BatchNorm1d(128)
 83 | 		self.bn5 = nn.BatchNorm1d(1024)
 84 | 		self.bn6_1 = nn.BatchNorm1d(12)
 85 | 		self.bn6_2 = nn.BatchNorm1d(40)
 86 | 		self.bn6_3 = nn.BatchNorm1d(10)
 87 | 		self.num_pts = num_pts
 88 | 		self.max_pool = nn.MaxPool1d(num_pts)
 89 | 
 90 | 	def forward(self,x, other_input1=None, other_input2=None, other_input3=None):
 91 | 		out = F.relu(self.bn1(self.conv1(x)))
 92 | 		out = F.relu(self.bn2(self.conv2(out)))
 93 | 		out = F.relu(self.bn3(self.conv3(out)))
 94 | 		out = F.relu(self.bn4(self.conv4(out)))
 95 | 		out = F.relu(self.bn5(self.conv5(out)))
 96 | 		global_features = self.max_pool(out)
 97 | 
 98 | 		# Global point feature
 99 | 		out_rot = F.relu(self.bn6_1(self.conv6_1(global_features)))
100 | 		out_shape = F.relu(self.bn6_2(self.conv6_2(global_features)))
101 | 		out_expr = F.relu(self.bn6_3(self.conv6_3(global_features)))
102 | 
103 | 
104 | 		out = torch.cat([out_rot, out_shape, out_expr], 1).squeeze(2)
105 | 
106 | 		return out


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.data as data
  4 | import torchvision.transforms as transforms
  5 | import torch.backends.cudnn as cudnn
  6 | import time
  7 | import numpy as np
  8 | 
  9 | from utils.ddfa import ToTensor, Normalize, CenterCrop, DDFATestDataset
 10 | from model_building import SynergyNet
 11 | from benchmark_aflw2000 import calc_nme as calc_nme_alfw2000
 12 | from benchmark_aflw2000 import ana_msg as ana_alfw2000
 13 | 
 14 | import argparse
 15 | import os
 16 | import glob
 17 | import math
 18 | from math import cos, atan2, asin
 19 | import cv2
 20 | 
 21 | from utils.params import ParamsPack
 22 | param_pack = ParamsPack()
 23 | 
 24 | def parse_pose(param):
 25 |     '''parse parameters into pose'''
 26 |     if len(param)==62:
 27 |         param = param * param_pack.param_std[:62] + param_pack.param_mean[:62]
 28 |     else:
 29 |         param = param * param_pack.param_std + param_pack.param_mean
 30 |     Ps = param[:12].reshape(3, -1)  # camera matrix
 31 |     s, R, t3d = P2sRt(Ps)
 32 |     P = np.concatenate((R, t3d.reshape(3, -1)), axis=1)  # without scale
 33 |     pose = matrix2angle(R)  # yaw, pitch, roll
 34 |     return P, pose
 35 | 
 36 | def P2sRt(P):
 37 |     '''decomposing camera matrix P'''   
 38 |     t3d = P[:, 3]
 39 |     R1 = P[0:1, :3]
 40 |     R2 = P[1:2, :3]
 41 |     s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0
 42 |     r1 = R1 / np.linalg.norm(R1)
 43 |     r2 = R2 / np.linalg.norm(R2)
 44 |     r3 = np.cross(r1, r2)
 45 |     R = np.concatenate((r1, r2, r3), 0)
 46 |     return s, R, t3d
 47 | 
 48 | def matrix2angle(R):
 49 |     '''convert matrix to angle'''
 50 |     if R[2, 0] != 1 and R[2, 0] != -1:
 51 |         x = asin(R[2, 0])
 52 |         y = atan2(R[1, 2] / cos(x), R[2, 2] / cos(x))
 53 |         z = atan2(R[0, 1] / cos(x), R[0, 0] / cos(x))
 54 | 
 55 |     else:  # Gimbal lock
 56 |         z = 0  # can be anything
 57 |         if R[2, 0] == -1:
 58 |             x = np.pi / 2
 59 |             y = z + atan2(R[0, 1], R[0, 2])
 60 |         else:
 61 |             x = -np.pi / 2
 62 |             y = -z + atan2(-R[0, 1], -R[0, 2])
 63 |     
 64 |     rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
 65 | 
 66 |     return [rx, ry, rz]
 67 | 
 68 | def parsing(param):
 69 |     p_ = param[:, :12].reshape(-1, 3, 4)
 70 |     p = p_[:, :, :3]
 71 |     offset = p_[:, :, -1].reshape(-1, 3, 1)
 72 |     alpha_shp = param[:, 12:52].reshape(-1, 40, 1)
 73 |     alpha_exp = param[:, 52:62].reshape(-1, 10, 1)
 74 |     return p, offset, alpha_shp, alpha_exp
 75 | 
 76 | def reconstruct_vertex(param, data_param, whitening=True, transform=True, lmk_pts=68):
 77 |     """
 78 |     This function includes parameter de-whitening, reconstruction of landmarks, and transform from coordinate space (x,y) to image space (u,v)
 79 |     """
 80 |     param_mean, param_std, w_shp_base, u_base, w_exp_base = data_param
 81 | 
 82 |     if whitening:
 83 |         if param.shape[1] == 62:
 84 |             param = param * param_std[:62] + param_mean[:62]
 85 |         else:
 86 |             raise NotImplementedError("Parameter length must be 62")
 87 | 
 88 |     if param.shape[1] == 62:
 89 |         p, offset, alpha_shp, alpha_exp = parsing(param)
 90 |     else:
 91 |         raise NotImplementedError("Parameter length must be 62")
 92 | 
 93 |     vertex = p @ (u_base + w_shp_base @ alpha_shp + w_exp_base @ alpha_exp).contiguous().view(-1, lmk_pts, 3).transpose(1,2) + offset
 94 |     if transform:
 95 |         vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
 96 | 
 97 |     return vertex
 98 | 
 99 | def extract_param(checkpoint_fp, root='', args=None, filelists=None, device_ids=[0],
100 |                   batch_size=128, num_workers=4):
101 |     map_location = {'cuda:{}'.format(i): 'cuda:0' for i in range(8)}
102 |     checkpoint = torch.load(checkpoint_fp, map_location=map_location)['state_dict']
103 |     
104 |     # Need to take off these for different numbers of base landmark points
105 |     # del checkpoint['module.u_base']
106 |     # del checkpoint['module.w_shp_base']
107 |     # del checkpoint['module.w_exp_base']
108 | 
109 |     torch.cuda.set_device(device_ids[0])
110 | 
111 |     model = SynergyNet(args)
112 |     model = nn.DataParallel(model, device_ids=device_ids).cuda()
113 |     model.load_state_dict(checkpoint, strict=False)
114 | 
115 |     dataset = DDFATestDataset(filelists=filelists, root=root,
116 |                               transform=transforms.Compose([ToTensor(), CenterCrop(5, mode='test') , Normalize(mean=127.5, std=128)  ]))
117 |     data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)
118 | 
119 |     cudnn.benchmark = True
120 |     model.eval()
121 | 
122 |     end = time.time()
123 |     outputs = []
124 |     with torch.no_grad():
125 |         for _, inputs in enumerate(data_loader):
126 |             inputs = inputs.cuda()
127 |             output = model.module.forward_test(inputs)
128 | 
129 |             for i in range(output.shape[0]):
130 |               param_prediction = output[i].cpu().numpy().flatten()
131 | 
132 |               outputs.append(param_prediction)
133 |         outputs = np.array(outputs, dtype=np.float32)
134 | 
135 |     print('Extracting params take {: .3f}s'.format(time.time() - end))
136 |     return outputs, model.module.data_param
137 | 
138 | 
139 | def _benchmark_aflw2000(outputs):
140 |     '''Calculate the error statistics.'''
141 |     return ana_alfw2000(calc_nme_alfw2000(outputs,option='ori'))
142 | 
143 | # AFLW2000 facial alignment
144 | img_list = sorted(glob.glob('./aflw2000_data/AFLW2000-3D_crop/*.jpg'))
145 | def benchmark_aflw2000_params(params, data_param):
146 |     '''Reconstruct the landmark points and calculate the statistics'''
147 |     outputs = []
148 |     params = torch.Tensor(params).cuda()
149 | 
150 |     batch_size = 50
151 |     num_samples = params.shape[0]
152 |     iter_num = math.floor(num_samples / batch_size)
153 |     residual = num_samples % batch_size
154 |     for i in range(iter_num+1):
155 |         if i == iter_num:
156 |             if residual == 0:
157 |                 break
158 |             batch_data = params[i*batch_size: i*batch_size + residual]
159 |             lm = reconstruct_vertex(batch_data, data_param, lmk_pts=68)
160 |             lm = lm.cpu().numpy()
161 |             for j in range(residual):
162 |                 outputs.append(lm[j, :2, :])
163 |         else:
164 |             batch_data = params[i*batch_size: (i+1)*batch_size]
165 |             lm = reconstruct_vertex(batch_data, data_param, lmk_pts=68)
166 |             lm = lm.cpu().numpy()
167 |             for j in range(batch_size):
168 |                 if i == 0:
169 |                     #plot the first 50 samples for validation
170 |                     bkg = cv2.imread(img_list[i*batch_size+j],-1)
171 |                     lm_sample = lm[j]
172 |                     c0 = np.clip((lm_sample[1,:]).astype(np.int64), 0, 119)
173 |                     c1 = np.clip((lm_sample[0,:]).astype(np.int64), 0, 119)
174 |                     for y, x, in zip([c0,c0,c0-1,c0-1],[c1,c1-1,c1,c1-1]):
175 |                         bkg[y, x, :] = np.array([233,193,133])
176 |                     cv2.imwrite(f'./results/{i*batch_size+j}.png', bkg)
177 | 
178 |                 outputs.append(lm[j, :2, :])
179 |     return _benchmark_aflw2000(outputs)
180 | 
181 | 
182 | # AFLW2000 face orientation estimation
183 | def benchmark_FOE(params):
184 |     """
185 |     FOE benchmark validation. Only calculate the groundtruth of angles within [-99, 99] (following FSA-Net https://github.com/shamangary/FSA-Net)
186 |     """
187 | 
188 |     # AFLW200 groundturh and indices for skipping, whose yaw angle lies outside [-99, 99]
189 |     exclude_aflw2000 = './aflw2000_data/eval/ALFW2000-3D_pose_3ANG_excl.npy'
190 |     skip_aflw2000 = './aflw2000_data/eval/ALFW2000-3D_pose_3ANG_skip.npy'
191 | 
192 |     if not os.path.isfile(exclude_aflw2000) or not os.path.isfile(skip_aflw2000):
193 |         raise RuntimeError('Missing data')
194 | 
195 |     pose_GT = np.load(exclude_aflw2000) 
196 |     skip_indices = np.load(skip_aflw2000)
197 |     pose_mat = np.ones((pose_GT.shape[0],3))
198 | 
199 |     idx = 0
200 |     for i in range(params.shape[0]):
201 |         if i in skip_indices:
202 |             continue
203 |         P, angles = parse_pose(params[i])
204 |         angles[0], angles[1], angles[2] = angles[1], angles[0], angles[2] # we decode raw-ptich-yaw order
205 |         pose_mat[idx,:] = np.array(angles)
206 |         idx += 1
207 | 
208 |     pose_analyis = np.mean(np.abs(pose_mat-pose_GT),axis=0) # pose GT uses [pitch-yaw-roll] order
209 |     MAE = np.mean(pose_analyis)
210 |     yaw = pose_analyis[1]
211 |     pitch = pose_analyis[0]
212 |     roll = pose_analyis[2]
213 |     msg = 'Mean MAE = %3.3f (in deg), [yaw,pitch,roll] = [%3.3f, %3.3f, %3.3f]'%(MAE, yaw, pitch, roll)
214 |     print('\nFace orientation estimation:')
215 |     print(msg)
216 |     return msg
217 | 
218 | def benchmark(checkpoint_fp, args):
219 |     '''benchmark validation pipeline'''
220 |     device_ids = [0]
221 | 
222 |     def aflw2000():
223 |         root = './aflw2000_data/AFLW2000-3D_crop'
224 |         filelists = './aflw2000_data/AFLW2000-3D_crop.list'
225 | 
226 |         if not os.path.isdir(root) or not os.path.isfile(filelists):
227 |             raise RuntimeError('check if the testing data exist')
228 | 
229 |         params, data_param = extract_param(
230 |             checkpoint_fp=checkpoint_fp,
231 |             root=root,
232 |             args= args,
233 |             filelists=filelists,
234 |             device_ids=device_ids,
235 |             batch_size=128)
236 | 
237 |         info_out_fal = benchmark_aflw2000_params(params, data_param)
238 |         print(info_out_fal)
239 |         info_out_foe = benchmark_FOE(params)
240 | 
241 |     aflw2000()
242 | 
243 | def main():
244 |     parser = argparse.ArgumentParser(description='SynergyNet benchmark on AFLW2000-3D')
245 |     parser.add_argument('-a', '--arch', default='mobilenet_v2', type=str)
246 |     parser.add_argument('-w', '--weights', default='models/best.pth.tar', type=str)
247 |     parser.add_argument('-d', '--device', default='0', type=str)
248 |     parser.add_argument('--img_size', default='120', type=int)
249 |     args = parser.parse_args()
250 |     args.device = [int(d) for d in args.device.split(',')]
251 | 
252 |     benchmark(args.weights, args)
253 | 
254 | 
255 | if __name__ == '__main__':
256 |     main()
257 | 


--------------------------------------------------------------------------------
/benchmark_aflw2000.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This evaluation script follows 3DDFA and 3DDFA_V2
  3 | https://github.com/cleardusk/3DDFA
  4 | https://github.com/cleardusk/3DDFA_V2
  5 | """
  6 | 
  7 | import os.path as osp
  8 | import numpy as np
  9 | from math import sqrt
 10 | from utils.io import _load
 11 | 
 12 | 
 13 | d = './aflw2000_data/eval'
 14 | yaws_list = _load(osp.join(d, 'AFLW2000-3D.pose.npy'))
 15 | # origin
 16 | pts68_all_ori = _load(osp.join(d, 'AFLW2000-3D.pts68.npy'))
 17 | # reannonated
 18 | pts68_all_re = _load(osp.join(d, 'AFLW2000-3D-Reannotated.pts68.npy'))
 19 | roi_boxs = _load(osp.join(d, 'AFLW2000-3D_crop.roi_box.npy'))
 20 | 
 21 | 
 22 | def ana(nme_list):
 23 |     yaw_list_abs = np.abs(yaws_list)
 24 |     ind_yaw_1 = yaw_list_abs <= 30
 25 |     ind_yaw_2 = np.bitwise_and(yaw_list_abs > 30, yaw_list_abs <= 60)
 26 |     ind_yaw_3 = yaw_list_abs > 60
 27 | 
 28 |     nme_1 = nme_list[ind_yaw_1]
 29 |     nme_2 = nme_list[ind_yaw_2]
 30 |     nme_3 = nme_list[ind_yaw_3]
 31 | 
 32 |     mean_nme_1 = np.mean(nme_1) * 100
 33 |     mean_nme_2 = np.mean(nme_2) * 100
 34 |     mean_nme_3 = np.mean(nme_3) * 100
 35 | 
 36 |     std_nme_1 = np.std(nme_1) * 100
 37 |     std_nme_2 = np.std(nme_2) * 100
 38 |     std_nme_3 = np.std(nme_3) * 100
 39 | 
 40 |     mean_all = [mean_nme_1, mean_nme_2, mean_nme_3]
 41 |     mean = np.mean(mean_all)
 42 |     std = np.std(mean_all)
 43 | 
 44 |     s1 = '[ 0, 30]\tMean: \x1b[32m{:.3f}\x1b[0m, Std: {:.3f}'.format(mean_nme_1, std_nme_1)
 45 |     s2 = '[30, 60]\tMean: \x1b[32m{:.3f}\x1b[0m, Std: {:.3f}'.format(mean_nme_2, std_nme_2)
 46 |     s3 = '[60, 90]\tMean: \x1b[32m{:.3f}\x1b[0m, Std: {:.3f}'.format(mean_nme_3, std_nme_3)
 47 |     s5 = '[ 0, 90]\tMean: \x1b[31m{:.3f}\x1b[0m, Std: \x1b[31m{:.3f}\x1b[0m'.format(mean, std)
 48 | 
 49 |     s = '\n'.join([s1, s2, s3, s5])
 50 | 
 51 |     print(s)
 52 | 
 53 |     return mean_nme_1, mean_nme_2, mean_nme_3, mean, std
 54 | 
 55 | def ana_msg(nme_list):
 56 | 
 57 |     leng = nme_list.shape[0]
 58 |     yaw_list_abs = np.abs(yaws_list)[:leng]
 59 |     ind_yaw_1 = yaw_list_abs <= 30
 60 |     ind_yaw_2 = np.bitwise_and(yaw_list_abs > 30, yaw_list_abs <= 60)
 61 |     ind_yaw_3 = yaw_list_abs > 60
 62 | 
 63 |     nme_1 = nme_list[ind_yaw_1]
 64 |     nme_2 = nme_list[ind_yaw_2]
 65 |     nme_3 = nme_list[ind_yaw_3]
 66 | 
 67 |     mean_nme_1 = np.mean(nme_1) * 100
 68 |     mean_nme_2 = np.mean(nme_2) * 100
 69 |     mean_nme_3 = np.mean(nme_3) * 100
 70 | 
 71 |     std_nme_1 = np.std(nme_1) * 100
 72 |     std_nme_2 = np.std(nme_2) * 100
 73 |     std_nme_3 = np.std(nme_3) * 100
 74 | 
 75 |     mean_all = [mean_nme_1, mean_nme_2, mean_nme_3]
 76 |     mean = np.mean(mean_all)
 77 |     std = np.std(mean_all)
 78 | 
 79 |     s0 = '\nFacial Alignment on AFLW2000-3D (NME):'
 80 |     s1 = '[ 0, 30]\tMean: {:.3f}, Std: {:.3f}'.format(mean_nme_1, std_nme_1)
 81 |     s2 = '[30, 60]\tMean: {:.3f}, Std: {:.3f}'.format(mean_nme_2, std_nme_2)
 82 |     s3 = '[60, 90]\tMean: {:.3f}, Std: {:.3f}'.format(mean_nme_3, std_nme_3)
 83 |     s4 = '[ 0, 90]\tMean: {:.3f}, Std: {:.3f}'.format(mean, std)
 84 | 
 85 |     s = '\n'.join([s0, s1, s2, s3, s4])
 86 | 
 87 |     return  s
 88 | 
 89 | def convert_to_ori(lms, i):
 90 |     std_size = 120
 91 |     sx, sy, ex, ey = roi_boxs[i]
 92 |     scale_x = (ex - sx) / std_size
 93 |     scale_y = (ey - sy) / std_size
 94 |     lms[0, :] = lms[0, :] * scale_x + sx
 95 |     lms[1, :] = lms[1, :] * scale_y + sy
 96 |     return lms
 97 | 
 98 | def convert_to_crop(lms, i):
 99 |     std_size = 120
100 |     sx, sy, ex, ey = roi_boxs[i]
101 |     scale_x = (ex - sx) / std_size
102 |     scale_y = (ey - sy) / std_size
103 |     lms[0, :] = (lms[0, :] - sx)/ scale_x 
104 |     lms[1, :] = (lms[1, :] - sy)/ scale_y 
105 |     return lms
106 | 
107 | def calc_nme(pts68_fit_all, option='ori'):
108 |     if option == 'ori':
109 |         pts68_all = pts68_all_ori
110 |     elif option == 're':
111 |         pts68_all = pts68_all_re
112 |     std_size = 120
113 | 
114 |     nme_list = []
115 |     length_list = []
116 |     for i in range(len(roi_boxs)):
117 |         pts68_fit = pts68_fit_all[i]
118 |         pts68_gt = pts68_all[i]
119 | 
120 |         sx, sy, ex, ey = roi_boxs[i]
121 |         scale_x = (ex - sx) / std_size
122 |         scale_y = (ey - sy) / std_size
123 |         pts68_fit[0, :] = pts68_fit[0, :] * scale_x + sx
124 |         pts68_fit[1, :] = pts68_fit[1, :] * scale_y + sy
125 | 
126 |         # build bbox
127 |         minx, maxx = np.min(pts68_gt[0, :]), np.max(pts68_gt[0, :])
128 |         miny, maxy = np.min(pts68_gt[1, :]), np.max(pts68_gt[1, :])
129 |         llength = sqrt((maxx - minx) * (maxy - miny))
130 |         length_list.append(llength)
131 | 
132 |         dis = pts68_fit - pts68_gt[:2, :]
133 |         dis = np.sqrt(np.sum(np.power(dis, 2), 0))
134 |         dis = np.mean(dis)
135 |         nme = dis / llength
136 |         nme_list.append(nme)
137 | 
138 |     nme_list = np.array(nme_list, dtype=np.float32)
139 |     return nme_list
140 | 
141 | 
142 | if __name__ == '__main__':
143 |     pass
144 | 


--------------------------------------------------------------------------------
/benchmark_validate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding: utf-8
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.utils.data as data
  7 | import torchvision.transforms as transforms
  8 | import torch.backends.cudnn as cudnn
  9 | import time
 10 | import numpy as np
 11 | 
 12 | from benchmark_aflw2000 import calc_nme as calc_nme_alfw2000
 13 | from benchmark_aflw2000 import ana_msg as ana_alfw2000
 14 | 
 15 | from utils.ddfa import ToTensor, Normalize, DDFATestDataset, CenterCrop
 16 | import argparse
 17 | 
 18 | import logging
 19 | import os
 20 | from utils.params import ParamsPack
 21 | param_pack = ParamsPack()
 22 | import glob
 23 | import scipy.io as sio
 24 | import math
 25 | from math import cos, sin, atan2, asin, sqrt
 26 | 
 27 | # Only work with numpy without batch
 28 | def parse_pose(param):
 29 |     """
 30 |     Parse the parameters into 3x4 affine matrix and pose angles
 31 |     """
 32 |     param = param * param_pack.param_std[:62] + param_pack.param_mean[:62]
 33 |     Ps = param[:12].reshape(3, -1)  # camera matrix
 34 |     s, R, t3d = P2sRt(Ps)
 35 |     P = np.concatenate((R, t3d.reshape(3, -1)), axis=1)  # without scale
 36 |     pose = matrix2angle_corr(R)  # yaw, pitch, roll
 37 |     return P, pose
 38 | 
 39 | def P2sRt(P):
 40 |     ''' 
 41 |     Decompositing camera matrix P.
 42 |     '''
 43 |     t3d = P[:, 3]
 44 |     R1 = P[0:1, :3]
 45 |     R2 = P[1:2, :3]
 46 |     s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0
 47 |     r1 = R1 / np.linalg.norm(R1)
 48 |     r2 = R2 / np.linalg.norm(R2)
 49 |     r3 = np.cross(r1, r2)
 50 | 
 51 |     R = np.concatenate((r1, r2, r3), 0)
 52 |     return s, R, t3d
 53 | 
 54 | # def matrix2angle(R):
 55 | #     ''' 
 56 | #     Compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf
 57 | #     '''
 58 | 
 59 | #     if R[2, 0] != 1 and R[2, 0] != -1:
 60 | #         x = asin(R[2, 0])
 61 | #         y = atan2(R[2, 1] / cos(x), R[2, 2] / cos(x))
 62 | #         z = atan2(R[1, 0] / cos(x), R[0, 0] / cos(x))
 63 | 
 64 | #     else:  # Gimbal lock
 65 | #         z = 0  # can be anything
 66 | #         if R[2, 0] == -1:
 67 | #             x = np.pi / 2
 68 | #             y = z + atan2(R[0, 1], R[0, 2])
 69 | #         else:
 70 | #             x = -np.pi / 2
 71 | #             y = -z + atan2(-R[0, 1], -R[0, 2])
 72 |     
 73 | #     rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
 74 | 
 75 | #     return [rx, ry, rz]
 76 | 
 77 | #numpy
 78 | def matrix2angle_corr(R):
 79 |     ''' 
 80 |     Compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf
 81 |     '''
 82 | 
 83 |     if R[2, 0] != 1 and R[2, 0] != -1:
 84 |         x = asin(R[2, 0])
 85 |         y = atan2(R[1, 2] / cos(x), R[2, 2] / cos(x))
 86 |         z = atan2(R[0, 1] / cos(x), R[0, 0] / cos(x))
 87 | 
 88 |     else:  # Gimbal lock
 89 |         z = 0  # can be anything
 90 |         if R[2, 0] == -1:
 91 |             x = np.pi / 2
 92 |             y = z + atan2(R[0, 1], R[0, 2])
 93 |         else:
 94 |             x = -np.pi / 2
 95 |             y = -z + atan2(-R[0, 1], -R[0, 2])
 96 |     
 97 |     rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
 98 | 
 99 |     return [rx, ry, rz]
100 | 
101 | 
102 | 
103 | def parse_param_62_batch(param):
104 |     """batch styler"""
105 |     p_ = param[:, :12].reshape(-1, 3, 4)
106 |     p = p_[:, :, :3]
107 |     offset = p_[:, :, -1].reshape(-1, 3, 1)
108 |     alpha_shp = param[:, 12:52].reshape(-1, 40, 1)
109 |     alpha_exp = param[:, 52:62].reshape(-1, 10, 1)
110 |     return p, offset, alpha_shp, alpha_exp
111 | 
112 | 
113 | # 62-with-false-rot
114 | def reconstruct_vertex(param, data_param, whitening=True, dense=False, transform=True):
115 |     """
116 |     Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
117 |     dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
118 |     image coordinate space, but without alignment caused by face cropping.
119 |     transform: whether transform to image space
120 |     Working with Tensor with batch. Using Fortan-type reshape.
121 |     """
122 |     param_mean, param_std, w_shp_base, u_base, w_exp_base = data_param
123 | 
124 |     if whitening:
125 |         if param.shape[1] == 62:
126 |             param = param * param_std[:62] + param_mean[:62]
127 | 
128 |     p, offset, alpha_shp, alpha_exp = parse_param_62_batch(param)
129 | 
130 |     """For 68 pts"""
131 |     vertex = p @ (u_base + w_shp_base @ alpha_shp + w_exp_base @ alpha_exp).contiguous().view(-1, 68, 3).transpose(1,2) + offset
132 | 
133 |     if transform:
134 |         # transform to image coordinate space
135 |         vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :] ## corrected
136 | 
137 | 
138 |     return vertex
139 | 
140 | 
141 | def extract_param(model, root='', filelists=None,
142 |                   batch_size=128, num_workers=4):
143 |     
144 |     dataset = DDFATestDataset(filelists=filelists, root=root,
145 |                               transform=transforms.Compose([ToTensor(), CenterCrop(5, mode='test'), Normalize(mean=127.5, std=130)]))
146 |     data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)
147 | 
148 |     cudnn.benchmark = True
149 |     model.eval()
150 | 
151 |     end = time.time()
152 |     outputs = []
153 |     with torch.no_grad():
154 |         for _, inputs in enumerate(data_loader):
155 |             inputs = inputs.cuda()
156 |             output = model.module.forward_test(inputs)
157 | 
158 |             for i in range(output.shape[0]):
159 |                 param_prediction = output[i].cpu().numpy().flatten()
160 | 
161 |                 outputs.append(param_prediction)
162 |         outputs = np.array(outputs, dtype=np.float32)
163 | 
164 |     logging.info('Extracting params take {: .3f}s\n'.format(time.time() - end))
165 |     return outputs
166 | 
167 | 
168 | def _benchmark_aflw2000(outputs):
169 |     """
170 |     Calculate the error statistics.
171 |     """
172 |     return ana_alfw2000(calc_nme_alfw2000(outputs, option='ori'))
173 | 
174 | 
175 | def benchmark_aflw2000_params(params, data_param):
176 |     """
177 |     Reconstruct the landmark points and calculate the statistics
178 |     """
179 |     outputs = []
180 |     params = torch.Tensor(params).cuda()
181 | 
182 |     batch_size = 50
183 |     num_samples = params.shape[0]
184 |     iter_num = math.floor(num_samples / batch_size)
185 |     residual = num_samples % batch_size
186 |     for i in range(iter_num+1):
187 |         if i == iter_num:
188 |             if residual == 0:
189 |                 break
190 |             batch_data = params[i*batch_size: i*batch_size + residual]
191 |             lm = reconstruct_vertex(batch_data, data_param)
192 |             lm = lm.cpu().numpy()
193 |             for j in range(residual):
194 |                 outputs.append(lm[j, :2, :])
195 |         else:
196 |             batch_data = params[i*batch_size: (i+1)*batch_size]
197 |             lm = reconstruct_vertex(batch_data, data_param)
198 |             lm = lm.cpu().numpy()
199 |             for j in range(batch_size):
200 |                 outputs.append(lm[j, :2, :])
201 |     return _benchmark_aflw2000(outputs)
202 | 
203 | 
204 | def benchmark_FOE(params):
205 |     """
206 |     FOE benchmark validation. Only calculate the groundtruth of angles within [-99, 99]
207 |     """
208 | 
209 |     # Define the data path for AFLW200 groundturh and skip indices, where the data and structure lie on S3 buckets (fixed structure)
210 |     groundtruth_excl = './aflw2000_data/eval/ALFW2000-3D_pose_3ANG_excl.npy'
211 |     skip_aflw2000 = './aflw2000_data/eval/ALFW2000-3D_pose_3ANG_skip.npy'
212 | 
213 |     if not os.path.isfile(groundtruth_excl) or not os.path.isfile(skip_aflw2000):
214 |         raise RuntimeError('The data is not properly downloaded from the S3 bucket. Please check your S3 bucket access permission')
215 | 
216 | 
217 |     pose_GT = np.load(groundtruth_excl) # groundtruth load
218 |     skip_indices = np.load(skip_aflw2000) # load the skip indices in AFLW2000
219 |     pose_mat = np.ones((pose_GT.shape[0],3))
220 | 
221 |     total = 0
222 |     for i in range(params.shape[0]):
223 |         if i in skip_indices:
224 |             continue
225 |         P, angles = parse_pose(params[i]) # original per-sample decode
226 |         angles[0], angles[1], angles[2] = angles[1], angles[0], angles[2]
227 |         pose_mat[total,:] = np.array(angles)
228 |         total += 1
229 | 
230 |     pose_analy = np.mean(np.abs(pose_mat-pose_GT),axis=0)
231 |     MAE = np.mean(pose_analy)
232 |     yaw = pose_analy[1]
233 |     pitch = pose_analy[0]
234 |     roll = pose_analy[2]
235 |     msg = 'MAE = %3.3f, [yaw,pitch,roll] = [%3.3f, %3.3f, %3.3f]'%(MAE, yaw, pitch, roll)
236 |     print('\n--------------------------------------------------------------------------------')
237 |     print(msg)
238 |     print('--------------------------------------------------------------------------------')
239 |     return msg
240 | 
241 | 
242 | # 102
243 | def benchmark_pipeline(model):
244 |     """
245 |     Run the benchmark validation pipeline for Facial Alignments: AFLW and AFLW2000, FOE: AFLW2000.
246 |     """
247 | 
248 |     def aflw2000(data_param):
249 |         root = './aflw2000_data/AFLW2000-3D_crop'
250 |         filelists = './aflw2000_data/AFLW2000-3D_crop.list'
251 | 
252 |         if not os.path.isdir(root) or not os.path.isfile(filelists):
253 |             raise RuntimeError('The data is not properly downloaded from the S3 bucket. Please check your S3 bucket access permission')
254 | 
255 |         params = extract_param(
256 |             model=model,
257 |             root=root,
258 |             filelists=filelists,
259 |             batch_size=128)
260 | 
261 |         s2 = benchmark_aflw2000_params(params, data_param)
262 |         logging.info(s2)
263 |         # s3 = benchmark_FOE(params)
264 |         # logging.info(s3)
265 | 
266 |     aflw2000(model.module.data_param)
267 | 
268 | 
269 | def main():
270 |     parser = argparse.ArgumentParser(description='3DDFA Benchmark')
271 |     parser.add_argument('--arch', default='mobilenet_1', type=str)
272 |     parser.add_argument('-c', '--checkpoint-fp', default='models/phase1_wpdc.pth.tar', type=str)
273 |     args = parser.parse_args()
274 | 
275 |     benchmark_pipeline(args.arch, args.checkpoint_fp)
276 | 
277 | 
278 | if __name__ == '__main__':
279 |     main()
280 | 


--------------------------------------------------------------------------------
/demo/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/0.png


--------------------------------------------------------------------------------
/demo/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/1.png


--------------------------------------------------------------------------------
/demo/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/10.png


--------------------------------------------------------------------------------
/demo/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/11.png


--------------------------------------------------------------------------------
/demo/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/12.png


--------------------------------------------------------------------------------
/demo/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/2.png


--------------------------------------------------------------------------------
/demo/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/3.png


--------------------------------------------------------------------------------
/demo/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/4.png


--------------------------------------------------------------------------------
/demo/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/5.png


--------------------------------------------------------------------------------
/demo/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/6.png


--------------------------------------------------------------------------------
/demo/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/7.png


--------------------------------------------------------------------------------
/demo/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/8.png


--------------------------------------------------------------------------------
/demo/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/9.png


--------------------------------------------------------------------------------
/demo/AF-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/AF-1.png


--------------------------------------------------------------------------------
/demo/AF-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/AF-2.png


--------------------------------------------------------------------------------
/demo/alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/alignment.png


--------------------------------------------------------------------------------
/demo/comparison-deca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/comparison-deca.png


--------------------------------------------------------------------------------
/demo/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/demo.gif


--------------------------------------------------------------------------------
/demo/multiple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/multiple.png


--------------------------------------------------------------------------------
/demo/orientation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/orientation.png


--------------------------------------------------------------------------------
/demo/single.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/single.png


--------------------------------------------------------------------------------
/demo/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/demo/teaser.png


--------------------------------------------------------------------------------
/img/sample_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/img/sample_1.jpg


--------------------------------------------------------------------------------
/img/sample_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/img/sample_2.jpg


--------------------------------------------------------------------------------
/img/sample_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/img/sample_3.jpg


--------------------------------------------------------------------------------
/img/sample_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/img/sample_4.jpg


--------------------------------------------------------------------------------
/loss_definition.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from utils.params import ParamsPack
 4 | param_pack = ParamsPack()
 5 | import math
 6 | 
 7 | 
 8 | class WingLoss(nn.Module):
 9 |     def __init__(self, omega=10, epsilon=2):
10 |         super(WingLoss, self).__init__()
11 |         self.omega = omega
12 |         self.epsilon = epsilon
13 |         self.log_term = math.log(1 + self.omega / self.epsilon)
14 | 
15 |     def forward(self, pred, target, kp=False):
16 |         n_points = pred.shape[2]
17 |         pred = pred.transpose(1,2).contiguous().view(-1, 3*n_points)
18 |         target = target.transpose(1,2).contiguous().view(-1, 3*n_points)
19 |         y = target
20 |         y_hat = pred
21 |         delta_y = (y - y_hat).abs()
22 |         delta_y1 = delta_y[delta_y < self.omega]
23 |         delta_y2 = delta_y[delta_y >= self.omega]
24 |         loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
25 |         C = self.omega - self.omega * self.log_term
26 |         loss2 = delta_y2 - C
27 |         return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))
28 | 
29 | class ParamLoss(nn.Module):
30 |     """Input and target are all 62-d param"""
31 |     def __init__(self):
32 |         super(ParamLoss, self).__init__()
33 |         self.criterion = nn.MSELoss(reduction="none")
34 | 
35 |     def forward(self, input, target, mode = 'normal'):
36 |         if mode == 'normal':
37 |             loss = self.criterion(input[:,:12], target[:,:12]).mean(1) + self.criterion(input[:,12:], target[:,12:]).mean(1)
38 |             return torch.sqrt(loss)
39 |         elif mode == 'only_3dmm':
40 |             loss = self.criterion(input[:,:50], target[:,12:62]).mean(1)
41 |             return torch.sqrt(loss)
42 |         return torch.sqrt(loss.mean(1))
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     pass
47 | 
48 | 


--------------------------------------------------------------------------------
/main_train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding: utf-8
  3 | import os
  4 | import os.path as osp
  5 | from pathlib import Path
  6 | import numpy as np
  7 | import argparse 
  8 | import time
  9 | import logging
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torchvision.transforms as transforms
 14 | from torch.utils.data import DataLoader
 15 | import torch.backends.cudnn as cudnn
 16 | cudnn.benchmark=True
 17 | 
 18 | from utils.ddfa import DDFADataset, ToTensor, Normalize, SGD_NanHandler, CenterCrop, Compose_GT, ColorJitter
 19 | from utils.ddfa import str2bool, AverageMeter
 20 | from utils.io import mkdir
 21 | from model_building import SynergyNet as SynergyNet
 22 | from benchmark_validate import benchmark_pipeline
 23 | 
 24 | 
 25 | # global args (configuration)
 26 | args = None # define the static training setting, which wouldn't and shouldn't be changed over the whole experiements.
 27 | 
 28 | def parse_args():
 29 |     parser = argparse.ArgumentParser(description='3DMM Fitting')
 30 |     parser.add_argument('-j', '--workers', default=6, type=int)
 31 |     parser.add_argument('--epochs', default=40, type=int)
 32 |     parser.add_argument('--start-epoch', default=1, type=int)
 33 |     parser.add_argument('-b', '--batch-size', default=128, type=int)
 34 |     parser.add_argument('-vb', '--val-batch-size', default=32, type=int)
 35 |     parser.add_argument('--base-lr', '--learning-rate', default=0.001, type=float)
 36 |     parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 37 |                         help='momentum')
 38 |     parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float)
 39 |     parser.add_argument('--print-freq', '-p', default=20, type=int)
 40 |     parser.add_argument('--resume', default='', type=str, metavar='PATH')
 41 |     parser.add_argument('--resume_pose', default='', type=str, metavar='PATH')
 42 |     parser.add_argument('--devices-id', default='0,1', type=str)
 43 |     parser.add_argument('--filelists-train',default='', type=str)
 44 |     parser.add_argument('--root', default='')
 45 |     parser.add_argument('--snapshot', default='', type=str)
 46 |     parser.add_argument('--log-file', default='output.log', type=str)
 47 |     parser.add_argument('--log-mode', default='w', type=str)
 48 |     parser.add_argument('--arch', default='mobilenet_v2', type=str, help="Please choose [mobilenet_v2, mobilenet_1, resnet50, resnet101, or ghostnet]")
 49 |     parser.add_argument('--milestones', default='15,25,30', type=str)
 50 |     parser.add_argument('--task', default='all', type=str)
 51 |     parser.add_argument('--test_initial', default='false', type=str2bool)
 52 |     parser.add_argument('--warmup', default=-1, type=int)
 53 |     parser.add_argument('--param-fp-train',default='',type=str)
 54 |     parser.add_argument('--img_size', default=120, type=int)
 55 |     parser.add_argument('--save_val_freq', default=10, type=int)
 56 | 
 57 |     global args
 58 |     args = parser.parse_args()
 59 | 
 60 |     # some other operations
 61 |     args.devices_id = [int(d) for d in args.devices_id.split(',')]
 62 |     args.milestones = [int(m) for m in args.milestones.split(',')]
 63 | 
 64 |     snapshot_dir = osp.split(args.snapshot)[0]
 65 |     mkdir(snapshot_dir)
 66 | 
 67 | 
 68 | def print_args(args):
 69 |     for arg in vars(args):
 70 |         s = arg + ': ' + str(getattr(args, arg))
 71 |         logging.info(s)
 72 | 
 73 | 
 74 | def adjust_learning_rate(optimizer, epoch, milestones=None):
 75 |     """Sets the learning rate: milestone is a list/tuple"""
 76 | 
 77 |     def to(epoch):
 78 |         if epoch <= args.warmup:
 79 |             return 1
 80 |         elif args.warmup < epoch <= milestones[0]:
 81 |             return 0
 82 |         for i in range(1, len(milestones)):
 83 |             if milestones[i - 1] < epoch <= milestones[i]:
 84 |                 return i
 85 |         return len(milestones)
 86 | 
 87 |     n = to(epoch)
 88 | 
 89 |     #global lr
 90 |     lr = args.base_lr * (0.2 ** n)
 91 |     for param_group in optimizer.param_groups:
 92 |         param_group['lr'] = lr
 93 | 
 94 |     return lr
 95 | 
 96 | def save_checkpoint(state, filename='checkpoint.pth.tar'):
 97 |     torch.save(state, filename)
 98 |     logging.info(f'Save checkpoint to {filename}')
 99 | 
100 | def count_parameters(model):
101 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
102 | 
103 | def train(train_loader, model, optimizer, epoch, lr):
104 |     """Network training, loss updates, and backward calculation"""
105 | 
106 |     # AverageMeter for statistics
107 |     batch_time = AverageMeter()
108 |     data_time = AverageMeter()
109 |     losses_name = list(model.module.get_losses())
110 |     losses_name.append('loss_total')
111 |     losses_meter = [AverageMeter() for i in range(len(losses_name))]
112 | 
113 |     model.train()
114 | 
115 |     end = time.time()
116 |     for i, (input, target) in enumerate(train_loader):
117 | 
118 |         input = input.cuda(non_blocking=True)
119 | 
120 |         target = target[:,:62]
121 |         target.requires_grad = False  
122 |         target = target.float().cuda(non_blocking=True)
123 |         
124 |         losses = model(input, target)
125 | 
126 |         data_time.update(time.time() - end)
127 | 
128 |         loss_total = 0
129 |         for j, name in enumerate(losses):
130 |             mean_loss = losses[name].mean()
131 |             losses_meter[j].update(mean_loss, input.size(0))
132 |             loss_total += mean_loss
133 | 
134 |         losses_meter[j+1].update(loss_total, input.size(0))
135 |         
136 |         ### compute gradient and do SGD step
137 |         optimizer.zero_grad()
138 |         loss_total.backward()
139 |         flag, _ = optimizer.step_handleNan()
140 | 
141 |         if flag:
142 |             print("Nan encounter! Backward gradient error. Not updating the associated gradients.")
143 | 
144 |         batch_time.update(time.time() - end)
145 |         end = time.time()
146 | 
147 |         if i % args.print_freq == 0:
148 |             msg = 'Epoch: [{}][{}/{}]\t'.format(epoch, i, len(train_loader)) + \
149 |                   'LR: {:.8f}\t'.format(lr) + \
150 |                   'Time: {:.3f} ({:.3f})\t'.format(batch_time.val, batch_time.avg)
151 |             for k in range(len(losses_meter)):
152 |                 msg += '{}: {:.4f} ({:.4f})\t'.format(losses_name[k], losses_meter[k].val, losses_meter[k].avg)
153 |             logging.info(msg)
154 | 
155 | 
156 | def main():
157 |     """ Main funtion for the training process"""
158 |     parse_args()  # parse global argsl
159 | 
160 |     # logging setup
161 |     logging.basicConfig(
162 |         format='[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s',
163 |         level=logging.INFO,
164 |         handlers=[
165 |             logging.FileHandler(args.log_file, mode=args.log_mode),
166 |             logging.StreamHandler()
167 |         ]
168 |     )
169 | 
170 |     print_args(args)  # print args
171 | 
172 |     # step1: define the model structure
173 |     model = SynergyNet(args)
174 |     torch.cuda.set_device(args.devices_id[0])
175 | 
176 |     model = nn.DataParallel(model, device_ids=args.devices_id).cuda()  # -> GPU
177 | 
178 |     # step2: optimization: loss and optimization method
179 | 
180 |     optimizer = SGD_NanHandler(model.parameters(),
181 |                                 lr=args.base_lr,
182 |                                 momentum=args.momentum,
183 |                                 weight_decay=args.weight_decay,
184 |                                 nesterov=True)
185 | 
186 |     # step 2.1 resume
187 |     if args.resume:
188 |         if Path(args.resume).is_file():
189 |             logging.info(f'=> loading checkpoint {args.resume}')
190 |             checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage)['state_dict']
191 |             model.load_state_dict(checkpoint, strict=False)
192 |             
193 |         else:
194 |             logging.info(f'=> no checkpoint found at {args.resume}')
195 | 
196 |     # step3: data
197 |     normalize = Normalize(mean=127.5, std=128)
198 | 
199 |     train_dataset = DDFADataset(
200 |         root=args.root,
201 |         filelists=args.filelists_train,
202 |         param_fp=args.param_fp_train,
203 |         gt_transform=True,
204 |         transform=Compose_GT([ColorJitter(0.4,0.4,0.4), ToTensor(), CenterCrop(5), normalize]) #
205 |     )
206 | 
207 |     train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers,
208 |                               shuffle=True, pin_memory=True, drop_last=True)
209 | 
210 |     
211 |     # step4: run
212 |     cudnn.benchmark = True
213 |     if args.test_initial: # if testing the performance from the initial
214 |         logging.info('Testing from initial')
215 |         benchmark_pipeline(model)
216 |         
217 | 
218 |     for epoch in range(args.start_epoch, args.epochs + 1):
219 |         # adjust learning rate
220 |         lr = adjust_learning_rate(optimizer, epoch, args.milestones)
221 | 
222 |         # train for one epoch
223 |         train(train_loader, model, optimizer, epoch, lr)
224 | 
225 |         filename = f'{args.snapshot}_checkpoint_epoch_{epoch}.pth.tar'
226 |         # save checkpoints and current model validation
227 |         if (epoch % args.save_val_freq == 0) or (epoch==args.epochs):
228 |             save_checkpoint(
229 |                 {
230 |                     'epoch': epoch,
231 |                     'state_dict': model.state_dict(),
232 |                 },
233 |                 filename
234 |             )
235 |             logging.info('\nVal[{}]'.format(epoch))
236 |             benchmark_pipeline(model)
237 | 
238 | if __name__ == '__main__':
239 |     main()
240 | 


--------------------------------------------------------------------------------
/model_building.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from torchvision import transforms as T
  5 | import scipy.io as sio
  6 | 
  7 | # All data parameters import
  8 | from utils.params import ParamsPack
  9 | param_pack = ParamsPack()
 10 | 
 11 | from backbone_nets import resnet_backbone
 12 | from backbone_nets import mobilenetv1_backbone
 13 | from backbone_nets import mobilenetv2_backbone
 14 | from backbone_nets import ghostnet_backbone
 15 | from backbone_nets.pointnet_backbone import MLP_for, MLP_rev
 16 | from loss_definition import ParamLoss, WingLoss
 17 | 
 18 | from backbone_nets.ResNeSt import resnest50, resnest101
 19 | import time
 20 | from utils.inference import predict_sparseVert, predict_denseVert, predict_pose, crop_img
 21 | from FaceBoxes import FaceBoxes
 22 | import cv2
 23 | import types
 24 | 
 25 | def parse_param_62(param):
 26 | 	"""Work for only tensor"""
 27 | 	p_ = param[:, :12].reshape(-1, 3, 4)
 28 | 	p = p_[:, :, :3]
 29 | 	offset = p_[:, :, -1].reshape(-1, 3, 1)
 30 | 	alpha_shp = param[:, 12:52].reshape(-1, 40, 1)
 31 | 	alpha_exp = param[:, 52:62].reshape(-1, 10, 1)
 32 | 	return p, offset, alpha_shp, alpha_exp
 33 | 
 34 | # Image-to-parameter
 35 | class I2P(nn.Module):
 36 | 	def __init__(self, args):
 37 | 		super(I2P, self).__init__()
 38 | 		self.args = args
 39 | 		# backbone definition
 40 | 		if 'mobilenet_v2' in self.args.arch:
 41 | 			self.backbone = getattr(mobilenetv2_backbone, args.arch)(pretrained=False)
 42 | 		elif 'mobilenet' in self.args.arch:
 43 | 			self.backbone = getattr(mobilenetv1_backbone, args.arch)()		
 44 | 		elif 'resnet' in self.args.arch:
 45 | 			self.backbone = getattr(resnet_backbone, args.arch)(pretrained=False)
 46 | 		elif 'ghostnet' in self.args.arch:
 47 | 			self.backbone = getattr(ghostnet_backbone, args.arch)()
 48 | 		elif 'resnest' in self.args.arch:
 49 | 			self.backbone = resnest50()
 50 | 		else:
 51 | 			raise RuntimeError("Please choose [mobilenet_v2, mobilenet_1, resnet50, or ghostnet]")
 52 | 
 53 | 	def forward(self,input, target):
 54 | 		"""Training time forward"""
 55 | 		_3D_attr, avgpool = self.backbone(input)
 56 | 		_3D_attr_GT = target.type(torch.cuda.FloatTensor)
 57 | 		return _3D_attr, _3D_attr_GT, avgpool
 58 | 
 59 | 	def forward_test(self, input):
 60 | 		""" Testing time forward."""
 61 | 		_3D_attr, avgpool = self.backbone(input)
 62 | 		return _3D_attr, avgpool
 63 | 
 64 | # Main model SynergyNet definition
 65 | class SynergyNet(nn.Module):
 66 | 	def __init__(self, args):
 67 | 		super(SynergyNet, self).__init__()
 68 | 		self.triangles = sio.loadmat('./3dmm_data/tri.mat')['tri'] -1
 69 | 		self.triangles = torch.Tensor(self.triangles.astype(np.int64)).long().cuda()
 70 | 		self.img_size = args.img_size
 71 | 		# Image-to-parameter
 72 | 		self.I2P = I2P(args)
 73 | 		# Forward
 74 | 		self.forwardDirection = MLP_for(68)
 75 | 		# Reverse
 76 | 		self.reverseDirection = MLP_rev(68)
 77 | 		self.LMKLoss_3D = WingLoss()
 78 | 		self.ParamLoss = ParamLoss()
 79 | 		
 80 | 		self.loss = {'loss_LMK_f0':0.0,
 81 | 					'loss_LMK_pointNet': 0.0,
 82 | 					'loss_Param_In':0.0,
 83 | 					'loss_Param_S2': 0.0,
 84 | 					'loss_Param_S1S2': 0.0,
 85 | 					}
 86 | 
 87 | 		self.register_buffer('param_mean', torch.Tensor(param_pack.param_mean).cuda(non_blocking=True))
 88 | 		self.register_buffer('param_std', torch.Tensor(param_pack.param_std).cuda(non_blocking=True))
 89 | 		self.register_buffer('w_shp', torch.Tensor(param_pack.w_shp).cuda(non_blocking=True))
 90 | 		self.register_buffer('u', torch.Tensor(param_pack.u).cuda(non_blocking=True))
 91 | 		self.register_buffer('w_exp', torch.Tensor(param_pack.w_exp).cuda(non_blocking=True))
 92 | 
 93 | 		# If doing only offline evaluation, use these
 94 | 		# self.u_base = torch.Tensor(param_pack.u_base).cuda(non_blocking=True)
 95 | 		# self.w_shp_base = torch.Tensor(param_pack.w_shp_base).cuda(non_blocking=True)
 96 | 		# self.w_exp_base = torch.Tensor(param_pack.w_exp_base).cuda(non_blocking=True)
 97 | 
 98 | 		# Online training needs these to parallel
 99 | 		self.register_buffer('u_base', torch.Tensor(param_pack.u_base).cuda(non_blocking=True))
100 | 		self.register_buffer('w_shp_base', torch.Tensor(param_pack.w_shp_base).cuda(non_blocking=True))
101 | 		self.register_buffer('w_exp_base', torch.Tensor(param_pack.w_exp_base).cuda(non_blocking=True))
102 | 		self.keypoints = torch.Tensor(param_pack.keypoints).long()
103 |  
104 | 		self.data_param = [self.param_mean, self.param_std, self.w_shp_base, self.u_base, self.w_exp_base]
105 | 
106 | 	def reconstruct_vertex_62(self, param, whitening=True, dense=False, transform=True, lmk_pts=68):
107 | 		"""
108 | 		Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
109 | 		dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
110 | 		image coordinate space, but without alignment caused by face cropping.
111 | 		transform: whether transform to image space
112 | 		Working with batched tensors. Using Fortan-type reshape.
113 | 		"""
114 | 
115 | 		if whitening:
116 | 			if param.shape[1] == 62:
117 | 				param_ = param * self.param_std[:62] + self.param_mean[:62]
118 | 			else:
119 | 				raise RuntimeError('length of params mismatch')
120 | 
121 | 		p, offset, alpha_shp, alpha_exp = parse_param_62(param_)
122 | 
123 | 		if dense:
124 | 			
125 | 			vertex = p @ (self.u + self.w_shp @ alpha_shp + self.w_exp @ alpha_exp).contiguous().view(-1, 53215, 3).transpose(1,2) + offset
126 | 			
127 | 			if transform: 
128 | 				# transform to image coordinate space
129 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
130 | 
131 | 		else:
132 | 			"""For 68 pts"""
133 | 			vertex = p @ (self.u_base + self.w_shp_base @ alpha_shp + self.w_exp_base @ alpha_exp).contiguous().view(-1, lmk_pts, 3).transpose(1,2) + offset
134 | 
135 | 			if transform: 
136 | 				# transform to image coordinate space
137 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
138 | 
139 | 		return vertex
140 | 
141 | 	def forward(self, input, target):
142 | 		_3D_attr, _3D_attr_GT, avgpool = self.I2P(input, target)
143 | 			
144 | 		vertex_lmk = self.reconstruct_vertex_62(_3D_attr, dense=False)
145 | 		vertex_GT_lmk = self.reconstruct_vertex_62(_3D_attr_GT, dense=False)
146 | 		self.loss['loss_LMK_f0'] = 0.05 *self.LMKLoss_3D(vertex_lmk, vertex_GT_lmk, kp=True)		
147 | 		self.loss['loss_Param_In'] = 0.02 * self.ParamLoss(_3D_attr, _3D_attr_GT)
148 | 
149 | 		point_residual = self.forwardDirection(vertex_lmk, avgpool, _3D_attr[:,12:52], _3D_attr[:,52:62])
150 | 		vertex_lmk = vertex_lmk + 0.05 * point_residual
151 | 		self.loss['loss_LMK_pointNet'] = 0.05 * self.LMKLoss_3D(vertex_lmk, vertex_GT_lmk, kp=True)
152 | 
153 | 		_3D_attr_S2 = self.reverseDirection(vertex_lmk)
154 | 		self.loss['loss_Param_S2'] = 0.02 * self.ParamLoss(_3D_attr_S2, _3D_attr_GT, mode='only_3dmm')
155 | 		self.loss['loss_Param_S1S2'] = 0.001 * self.ParamLoss(_3D_attr_S2, _3D_attr, mode='only_3dmm')
156 | 
157 | 		return self.loss
158 | 
159 | 	def forward_test(self, input):
160 | 		"""test time forward"""
161 | 		_3D_attr, _ = self.I2P.forward_test(input)
162 | 		return _3D_attr
163 | 
164 | 	def get_losses(self):
165 | 		return self.loss.keys()
166 | 
167 | 
168 | # Main model SynergyNet definition
169 | class WrapUpSynergyNet(nn.Module):
170 | 	def __init__(self):
171 | 		super(WrapUpSynergyNet, self).__init__()
172 | 		self.triangles = sio.loadmat('./3dmm_data/tri.mat')['tri'] -1
173 | 		self.triangles = torch.Tensor(self.triangles.astype(np.int64)).long()
174 | 		args = types.SimpleNamespace()
175 | 		args.arch = 'mobilenet_v2'
176 | 		args.checkpoint_fp = 'pretrained/best.pth.tar'
177 | 
178 | 		# Image-to-parameter
179 | 		self.I2P = I2P(args)
180 | 		# Forward
181 | 		self.forwardDirection = MLP_for(68)
182 | 		# Reverse
183 | 		self.reverseDirection = MLP_rev(68)
184 | 		self.LMKLoss_3D = WingLoss()
185 | 		self.ParamLoss = ParamLoss()
186 | 		
187 | 		self.loss = {'loss_LMK_f0':0.0,
188 | 					'loss_LMK_pointNet': 0.0,
189 | 					'loss_Param_In':0.0,
190 | 					'loss_Param_S2': 0.0,
191 | 					'loss_Param_S1S2': 0.0,
192 | 					}
193 | 
194 | 		self.register_buffer('param_mean', torch.Tensor(param_pack.param_mean))
195 | 		self.register_buffer('param_std', torch.Tensor(param_pack.param_std))
196 | 		self.register_buffer('w_shp', torch.Tensor(param_pack.w_shp))
197 | 		self.register_buffer('u', torch.Tensor(param_pack.u))
198 | 		self.register_buffer('w_exp', torch.Tensor(param_pack.w_exp))
199 | 
200 | 		# Online training needs these to parallel
201 | 		self.register_buffer('u_base', torch.Tensor(param_pack.u_base))
202 | 		self.register_buffer('w_shp_base', torch.Tensor(param_pack.w_shp_base))
203 | 		self.register_buffer('w_exp_base', torch.Tensor(param_pack.w_exp_base))
204 | 		self.keypoints = torch.Tensor(param_pack.keypoints).long()
205 |  
206 | 		self.data_param = [self.param_mean, self.param_std, self.w_shp_base, self.u_base, self.w_exp_base]
207 | 
208 | 		try:
209 | 			print("loading weights from ", args.checkpoint_fp)
210 | 			self.load_weights(args.checkpoint_fp)
211 | 		except:
212 | 			pass
213 | 		self.eval()
214 | 
215 | 	def reconstruct_vertex_62(self, param, whitening=True, dense=False, transform=True, lmk_pts=68):
216 | 		"""
217 | 		Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
218 | 		dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
219 | 		image coordinate space, but without alignment caused by face cropping.
220 | 		transform: whether transform to image space
221 | 		Working with batched tensors. Using Fortan-type reshape.
222 | 		"""
223 | 
224 | 		if whitening:
225 | 			if param.shape[1] == 62:
226 | 				param_ = param * self.param_std[:62] + self.param_mean[:62]
227 | 			else:
228 | 				raise RuntimeError('length of params mismatch')
229 | 
230 | 		p, offset, alpha_shp, alpha_exp = parse_param_62(param_)
231 | 
232 | 		if dense:
233 | 			
234 | 			vertex = p @ (self.u + self.w_shp @ alpha_shp + self.w_exp @ alpha_exp).contiguous().view(-1, 53215, 3).transpose(1,2) + offset
235 | 			
236 | 			if transform: 
237 | 				# transform to image coordinate space
238 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
239 | 
240 | 		else:
241 | 			"""For 68 pts"""
242 | 			vertex = p @ (self.u_base + self.w_shp_base @ alpha_shp + self.w_exp_base @ alpha_exp).contiguous().view(-1, lmk_pts, 3).transpose(1,2) + offset
243 | 
244 | 			if transform: 
245 | 				# transform to image coordinate space
246 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
247 | 
248 | 		return vertex
249 | 
250 | 	def forward_test(self, input):
251 | 		"""test time forward"""
252 | 		_3D_attr, _ = self.I2P.forward_test(input)
253 | 		return _3D_attr
254 | 
255 | 	def load_weights(self, path):
256 | 		model_dict = self.state_dict()
257 | 		checkpoint = torch.load(path, map_location=lambda storage, loc: storage)['state_dict']
258 | 
259 | 		# because the model is trained by multiple gpus, prefix 'module' should be removed
260 | 		for k in checkpoint.keys():
261 | 			model_dict[k.replace('module.', '')] = checkpoint[k]
262 | 
263 | 		self.load_state_dict(model_dict, strict=False)
264 | 
265 | 
266 | 	def get_all_outputs(self, input):
267 | 		"""convenient api to get 3d landmarks, face pose, 3d faces"""
268 | 
269 | 		face_boxes = FaceBoxes()
270 | 		rects = face_boxes(input)
271 | 
272 | 		# storage
273 | 		pts_res = []
274 | 		poses = []
275 | 		vertices_lst = []
276 | 		for idx, rect in enumerate(rects):
277 | 			roi_box = rect
278 | 
279 | 			# enlarge the bbox a little and do a square crop
280 | 			HCenter = (rect[1] + rect[3])/2
281 | 			WCenter = (rect[0] + rect[2])/2
282 | 			side_len = roi_box[3]-roi_box[1]
283 | 			margin = side_len * 1.2 // 2
284 | 			roi_box[0], roi_box[1], roi_box[2], roi_box[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin
285 | 
286 | 			img = crop_img(input, roi_box)
287 | 			img = cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LANCZOS4)
288 | 			img = torch.from_numpy(img)
289 | 			img = img.permute(2,0,1)
290 | 			img = img.unsqueeze(0)
291 | 			img = (img - 127.5)/ 128.0
292 | 
293 | 			with torch.no_grad():
294 | 				param = self.forward_test(img)
295 | 			
296 | 			param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
297 | 
298 | 			lmks = predict_sparseVert(param, roi_box, transform=True)
299 | 			vertices = predict_denseVert(param, roi_box,  transform=True)
300 | 			angles, translation = predict_pose(param, roi_box)
301 | 
302 | 			pts_res.append(lmks)
303 | 			vertices_lst.append(vertices)
304 | 			poses.append([angles, translation])
305 | 
306 | 		return pts_res, vertices_lst, poses
307 | 
308 | if __name__ == '__main__':
309 | 	pass
310 | 


--------------------------------------------------------------------------------
/pretrained/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/pretrained/__init__.py


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='synergy-3dmm',
 5 |     version='0.0.1',
 6 |     description='Library for accurate and fast 3d landmarks, face mesh, and face pose prediction',
 7 |     packages=find_packages(exclude=('artistic*', 'benchmark*', 'loss_definition*', 'demo*', 'img*', 'pretrained*', 'Sim3DR', 'main_train*', 'singleImage*', 'model_building*', 'uv_*', 'FaceBoxes*', '3dmm_data*')),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |     ],
12 | )


--------------------------------------------------------------------------------
/singleImage.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as transforms
  3 | import numpy as np
  4 | import cv2
  5 | from utils.ddfa import ToTensor, Normalize
  6 | from model_building import SynergyNet
  7 | from utils.inference import crop_img, predict_sparseVert, draw_landmarks, predict_denseVert, predict_pose, draw_axis
  8 | import argparse
  9 | import torch.backends.cudnn as cudnn
 10 | cudnn.benchmark = True
 11 | import os
 12 | import os.path as osp
 13 | import glob
 14 | from FaceBoxes import FaceBoxes
 15 | from utils.render import render
 16 | 
 17 | # Following 3DDFA-V2, we also use 120x120 resolution
 18 | IMG_SIZE = 120
 19 | 
 20 | def main(args):
 21 |     # load pre-tained model
 22 |     checkpoint_fp = 'pretrained/best.pth.tar' 
 23 |     args.arch = 'mobilenet_v2'
 24 |     args.devices_id = [0]
 25 | 
 26 |     checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
 27 |     
 28 |     model = SynergyNet(args)
 29 |     model_dict = model.state_dict()
 30 | 
 31 |     # because the model is trained by multiple gpus, prefix 'module' should be removed
 32 |     for k in checkpoint.keys():
 33 |         model_dict[k.replace('module.', '')] = checkpoint[k]
 34 | 
 35 |     model.load_state_dict(model_dict, strict=False)
 36 |     model = model.cuda()
 37 |     model.eval()
 38 | 
 39 |     # face detector
 40 |     face_boxes = FaceBoxes()
 41 | 
 42 |     # preparation
 43 |     transform = transforms.Compose([ToTensor(), Normalize(mean=127.5, std=128)])
 44 |     if osp.isdir(args.files):
 45 |         if not args.files[-1] == '/':
 46 |             args.files = args.files + '/'
 47 |         if not args.png:
 48 |             files = sorted(glob.glob(args.files+'*.jpg'))
 49 |         else:
 50 |             files = sorted(glob.glob(args.files+'*.png'))
 51 |     else:
 52 |         files = [args.files]
 53 | 
 54 |     for img_fp in files:
 55 |         print("Process the image: ", img_fp)
 56 | 
 57 |         img_ori = cv2.imread(img_fp)
 58 | 
 59 |         # crop faces
 60 |         rects = face_boxes(img_ori)
 61 | 
 62 |         # storage
 63 |         pts_res = []
 64 |         poses = []
 65 |         vertices_lst = []
 66 |         for idx, rect in enumerate(rects):
 67 |             roi_box = rect
 68 | 
 69 |             # enlarge the bbox a little and do a square crop
 70 |             HCenter = (rect[1] + rect[3])/2
 71 |             WCenter = (rect[0] + rect[2])/2
 72 |             side_len = roi_box[3]-roi_box[1]
 73 |             margin = side_len * 1.2 // 2
 74 |             roi_box[0], roi_box[1], roi_box[2], roi_box[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin
 75 | 
 76 |             img = crop_img(img_ori, roi_box)
 77 |             img = cv2.resize(img, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
 78 |             # cv2.imwrite(f'validate_{idx}.png', img)
 79 |             
 80 |             input = transform(img).unsqueeze(0)
 81 |             with torch.no_grad():
 82 |                 input = input.cuda()
 83 |                 param = model.forward_test(input)
 84 |                 param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
 85 | 
 86 |             # inferences
 87 |             lmks = predict_sparseVert(param, roi_box, transform=True)
 88 |             vertices = predict_denseVert(param, roi_box, transform=True)
 89 |             angles, translation = predict_pose(param, roi_box)
 90 | 
 91 |             pts_res.append(lmks)
 92 |             vertices_lst.append(vertices)
 93 |             poses.append([angles, translation, lmks])
 94 | 
 95 |         if not osp.exists(f'inference_output/rendering_overlay/'):
 96 |             os.makedirs(f'inference_output/rendering_overlay/')
 97 |         if not osp.exists(f'inference_output/landmarks/'):
 98 |             os.makedirs(f'inference_output/landmarks/')
 99 |         if not osp.exists(f'inference_output/poses/'):
100 |             os.makedirs(f'inference_output/poses/')
101 |         
102 |         name = img_fp.rsplit('/',1)[-1][:-4]
103 |         img_ori_copy = img_ori.copy()
104 | 
105 |         # mesh
106 |         render(img_ori, vertices_lst, alpha=0.6, wfp=f'inference_output/rendering_overlay/{name}.jpg')
107 |         
108 |         # landmarks
109 |         draw_landmarks(img_ori_copy, pts_res, wfp=f'inference_output/landmarks/{name}.jpg')
110 |         
111 |         # face orientation
112 |         img_axis_plot = img_ori_copy
113 |         for angles, translation, lmks in poses:
114 |             img_axis_plot = draw_axis(img_axis_plot, angles[0], angles[1],
115 |                 angles[2], translation[0], translation[1], size = 50, pts68=lmks)
116 |         wfp = f'inference_output/poses/{name}.jpg'
117 |         cv2.imwrite(wfp, img_axis_plot)
118 |         print(f'Save pose result to {wfp}')
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     parser = argparse.ArgumentParser()
123 |     parser.add_argument('-f', '--files', default='', help='path to a single image or path to a folder containing multiple images')
124 |     parser.add_argument("--png", action="store_true", help="if images are with .png extension")
125 |     parser.add_argument('--img_size', default=120, type=int)
126 |     parser.add_argument('-b', '--batch-size', default=1, type=int)
127 | 
128 |     args = parser.parse_args()
129 |     main(args)


--------------------------------------------------------------------------------
/singleImage_simple.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from synergy3DMM import SynergyNet
 3 |     
 4 | 
 5 | if __name__ == '__main__':
 6 |     model = SynergyNet()
 7 |     I=cv2.imread('img/sample_2.jpg', -1)
 8 |     # get landmark [[y, x, z], 68 (points)], mesh [[y, x, z], 53215 (points)], and face pose (Euler angles [yaw, pitch, roll] and translation [y, x, z])
 9 |     lmk3d, mesh, pose = model.get_all_outputs(I)
10 |     print(lmk3d[0].shape)
11 |     print(mesh[0].shape)
12 |     print(pose[0])
13 | 


--------------------------------------------------------------------------------
/synergy3DMM.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from torchvision import transforms as T
  5 | import scipy.io as sio
  6 | 
  7 | # All data parameters import
  8 | from utils.params import ParamsPack
  9 | param_pack = ParamsPack()
 10 | 
 11 | from backbone_nets import resnet_backbone
 12 | from backbone_nets import mobilenetv1_backbone
 13 | from backbone_nets import mobilenetv2_backbone
 14 | from backbone_nets import ghostnet_backbone
 15 | from backbone_nets.pointnet_backbone import MLP_for, MLP_rev
 16 | import loss_definition
 17 | from loss_definition import ParamLoss, WingLoss
 18 | 
 19 | from backbone_nets.ResNeSt import resnest50, resnest101
 20 | import time
 21 | from utils.inference import predict_sparseVert, predict_denseVert, predict_pose, crop_img
 22 | from FaceBoxes import FaceBoxes
 23 | import cv2
 24 | import types
 25 | import os
 26 | 
 27 | prefix_path = os.path.abspath(loss_definition.__file__).rsplit('/',1)[0]
 28 | print(prefix_path)
 29 | 
 30 | def parse_param_62(param):
 31 | 	"""Work for only tensor"""
 32 | 	p_ = param[:, :12].reshape(-1, 3, 4)
 33 | 	p = p_[:, :, :3]
 34 | 	offset = p_[:, :, -1].reshape(-1, 3, 1)
 35 | 	alpha_shp = param[:, 12:52].reshape(-1, 40, 1)
 36 | 	alpha_exp = param[:, 52:62].reshape(-1, 10, 1)
 37 | 	return p, offset, alpha_shp, alpha_exp
 38 | 
 39 | # Image-to-parameter
 40 | class I2P(nn.Module):
 41 | 	def __init__(self, args):
 42 | 		super(I2P, self).__init__()
 43 | 		self.args = args
 44 | 		# backbone definition
 45 | 		if 'mobilenet_v2' in self.args.arch:
 46 | 			self.backbone = getattr(mobilenetv2_backbone, args.arch)(pretrained=False)
 47 | 		elif 'mobilenet' in self.args.arch:
 48 | 			self.backbone = getattr(mobilenetv1_backbone, args.arch)()		
 49 | 		elif 'resnet' in self.args.arch:
 50 | 			self.backbone = getattr(resnet_backbone, args.arch)(pretrained=False)
 51 | 		elif 'ghostnet' in self.args.arch:
 52 | 			self.backbone = getattr(ghostnet_backbone, args.arch)()
 53 | 		elif 'resnest' in self.args.arch:
 54 | 			self.backbone = resnest50()
 55 | 		else:
 56 | 			raise RuntimeError("Please choose [mobilenet_v2, mobilenet_1, resnet50, or ghostnet]")
 57 | 
 58 | 	def forward(self,input, target):
 59 | 		"""Training time forward"""
 60 | 		_3D_attr, avgpool = self.backbone(input)
 61 | 		_3D_attr_GT = target.type(torch.cuda.FloatTensor)
 62 | 		return _3D_attr, _3D_attr_GT, avgpool
 63 | 
 64 | 	def forward_test(self, input):
 65 | 		""" Testing time forward."""
 66 | 		_3D_attr, avgpool = self.backbone(input)
 67 | 		return _3D_attr, avgpool
 68 |         
 69 | # Main model SynergyNet definition
 70 | class SynergyNet(nn.Module):
 71 | 	def __init__(self):
 72 | 		super(SynergyNet, self).__init__()
 73 | 		self.triangles = sio.loadmat(os.path.join(prefix_path, '3dmm_data/tri.mat'))['tri'] -1
 74 | 		self.triangles = torch.Tensor(self.triangles.astype(np.int64)).long()
 75 | 		args = types.SimpleNamespace()
 76 | 		args.arch = 'mobilenet_v2'
 77 | 		args.checkpoint_fp = os.path.join(prefix_path, 'pretrained/best.pth.tar')
 78 | 
 79 | 		# Image-to-parameter
 80 | 		self.I2P = I2P(args)
 81 | 		# Forward
 82 | 		self.forwardDirection = MLP_for(68)
 83 | 		# Reverse
 84 | 		self.reverseDirection = MLP_rev(68)
 85 | 		self.LMKLoss_3D = WingLoss()
 86 | 		self.ParamLoss = ParamLoss()
 87 | 		
 88 | 		self.loss = {'loss_LMK_f0':0.0,
 89 | 					'loss_LMK_pointNet': 0.0,
 90 | 					'loss_Param_In':0.0,
 91 | 					'loss_Param_S2': 0.0,
 92 | 					'loss_Param_S1S2': 0.0,
 93 | 					}
 94 | 
 95 | 		self.register_buffer('param_mean', torch.Tensor(param_pack.param_mean))
 96 | 		self.register_buffer('param_std', torch.Tensor(param_pack.param_std))
 97 | 		self.register_buffer('w_shp', torch.Tensor(param_pack.w_shp))
 98 | 		self.register_buffer('u', torch.Tensor(param_pack.u))
 99 | 		self.register_buffer('w_exp', torch.Tensor(param_pack.w_exp))
100 | 
101 | 		# Online training needs these to parallel
102 | 		self.register_buffer('u_base', torch.Tensor(param_pack.u_base))
103 | 		self.register_buffer('w_shp_base', torch.Tensor(param_pack.w_shp_base))
104 | 		self.register_buffer('w_exp_base', torch.Tensor(param_pack.w_exp_base))
105 | 		self.keypoints = torch.Tensor(param_pack.keypoints).long()
106 |  
107 | 		self.data_param = [self.param_mean, self.param_std, self.w_shp_base, self.u_base, self.w_exp_base]
108 | 
109 | 		try:
110 | 			print("loading weights from ", args.checkpoint_fp)
111 | 			self.load_weights(args.checkpoint_fp)
112 | 		except:
113 | 			pass
114 | 		self.eval()
115 | 
116 | 	def reconstruct_vertex_62(self, param, whitening=True, dense=False, transform=True, lmk_pts=68):
117 | 		"""
118 | 		Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
119 | 		dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
120 | 		image coordinate space, but without alignment caused by face cropping.
121 | 		transform: whether transform to image space
122 | 		Working with batched tensors. Using Fortan-type reshape.
123 | 		"""
124 | 
125 | 		if whitening:
126 | 			if param.shape[1] == 62:
127 | 				param_ = param * self.param_std[:62] + self.param_mean[:62]
128 | 			else:
129 | 				raise RuntimeError('length of params mismatch')
130 | 
131 | 		p, offset, alpha_shp, alpha_exp = parse_param_62(param_)
132 | 
133 | 		if dense:
134 | 			
135 | 			vertex = p @ (self.u + self.w_shp @ alpha_shp + self.w_exp @ alpha_exp).contiguous().view(-1, 53215, 3).transpose(1,2) + offset
136 | 			
137 | 			if transform: 
138 | 				# transform to image coordinate space
139 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
140 | 
141 | 		else:
142 | 			"""For 68 pts"""
143 | 			vertex = p @ (self.u_base + self.w_shp_base @ alpha_shp + self.w_exp_base @ alpha_exp).contiguous().view(-1, lmk_pts, 3).transpose(1,2) + offset
144 | 
145 | 			if transform: 
146 | 				# transform to image coordinate space
147 | 				vertex[:, 1, :] = param_pack.std_size + 1 - vertex[:, 1, :]
148 | 
149 | 		return vertex
150 | 
151 | 	def forward_test(self, input):
152 | 		"""test time forward"""
153 | 		_3D_attr, _ = self.I2P.forward_test(input)
154 | 		return _3D_attr
155 | 
156 | 	def load_weights(self, path):
157 | 		model_dict = self.state_dict()
158 | 		checkpoint = torch.load(path, map_location=lambda storage, loc: storage)['state_dict']
159 | 
160 | 		# because the model is trained by multiple gpus, prefix 'module' should be removed
161 | 		for k in checkpoint.keys():
162 | 			model_dict[k.replace('module.', '')] = checkpoint[k]
163 | 
164 | 		self.load_state_dict(model_dict, strict=False)
165 | 
166 | 
167 | 	def get_all_outputs(self, input):
168 | 		"""convenient api to get 3d landmarks, face pose, 3d faces"""
169 | 
170 | 		face_boxes = FaceBoxes()
171 | 		rects = face_boxes(input)
172 | 
173 | 		# storage
174 | 		pts_res = []
175 | 		poses = []
176 | 		vertices_lst = []
177 | 		for idx, rect in enumerate(rects):
178 | 			roi_box = rect
179 | 
180 | 			# enlarge the bbox a little and do a square crop
181 | 			HCenter = (rect[1] + rect[3])/2
182 | 			WCenter = (rect[0] + rect[2])/2
183 | 			side_len = roi_box[3]-roi_box[1]
184 | 			margin = side_len * 1.2 // 2
185 | 			roi_box[0], roi_box[1], roi_box[2], roi_box[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin
186 | 
187 | 			img = crop_img(input, roi_box)
188 | 			img = cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LANCZOS4)
189 | 			img = torch.from_numpy(img)
190 | 			img = img.permute(2,0,1)
191 | 			img = img.unsqueeze(0)
192 | 			img = (img - 127.5)/ 128.0
193 | 
194 | 			with torch.no_grad():
195 | 				param = self.forward_test(img)
196 | 			
197 | 			param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
198 | 
199 | 			lmks = predict_sparseVert(param, roi_box, transform=True)
200 | 			vertices = predict_denseVert(param, roi_box,  transform=True)
201 | 			angles, translation = predict_pose(param, roi_box)
202 | 
203 | 			pts_res.append(lmks)
204 | 			vertices_lst.append(vertices)
205 | 			poses.append([angles, translation])
206 | 
207 | 		return pts_res, vertices_lst, poses
208 | 
209 | if __name__ == '__main__':
210 | 	pass
211 | 


--------------------------------------------------------------------------------
/train_script.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | LOG_ALIAS=$1
 4 | LOG_DIR="ckpts/logs"
 5 | mkdir -p ${LOG_DIR}
 6 | 
 7 | LOG_FILE="${LOG_DIR}/`date +'%Y-%m-%d_%H:%M.%S'`.log"
 8 | 
 9 | python3 main_train.py --arch="mobilenet_v2" \
10 |     --start-epoch=1 \
11 |     --snapshot="ckpts/SynergyNet" \
12 |     --param-fp-train='./3dmm_data/param_all_norm_v201.pkl' \
13 |     --warmup=5 \
14 |     --batch-size=1024 \
15 |     --base-lr=0.08 \
16 |     --epochs=80 \
17 |     --milestones=48,64 \
18 |     --print-freq=50 \
19 |     --devices-id=0 \
20 |     --workers=8 \
21 |     --filelists-train="./3dmm_data/train_aug_120x120.list.train" \
22 |     --root="./train_aug_120x120" \
23 |     --log-file="${LOG_FILE}" \
24 |     --test_initial=True \
25 |     --save_val_freq=5 \
26 |     --resume="" \
27 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/choyingw/SynergyNet/1352b871e58a3169ecf50312aa6185a6412b5e08/utils/__init__.py


--------------------------------------------------------------------------------
/utils/inference.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from utils.params import ParamsPack
  4 | param_pack = ParamsPack()
  5 | from math import cos, sin, atan2, asin, sqrt
  6 | import cv2
  7 | 
  8 | def write_obj(obj_name, vertices, triangles):
  9 |     triangles = triangles.copy() # meshlab start with 1
 10 | 
 11 |     if obj_name.split('.')[-1] != 'obj':
 12 |         obj_name = obj_name + '.obj'
 13 | 
 14 |     # write obj
 15 |     with open(obj_name, 'w') as f:
 16 |         # write vertices & colors
 17 |         for i in range(vertices.shape[1]):
 18 |             s = 'v {:.4f} {:.4f} {:.4f}\n'.format(vertices[0, i], vertices[1, i], vertices[2, i])
 19 |             f.write(s)
 20 |         # write f: ver ind/ uv ind
 21 |         for i in range(triangles.shape[1]):
 22 |             s = 'f {} {} {}\n'.format(triangles[2, i], triangles[1, i], triangles[0, i])
 23 |             f.write(s)
 24 | 
 25 | def parse_param(param):
 26 |     p_ = param[:12].reshape(3, 4)
 27 |     p = p_[:, :3]
 28 |     offset = p_[:, -1].reshape(3, 1)
 29 |     alpha_shp = param[12:52].reshape(40, 1)
 30 |     alpha_exp = param[52:62].reshape(10, 1)
 31 |     return p, offset, alpha_shp, alpha_exp
 32 | 
 33 | def P2sRt(P):
 34 |     t3d = P[:, 3]
 35 |     R1 = P[0:1, :3]
 36 |     R2 = P[1:2, :3]
 37 |     s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0
 38 |     r1 = R1 / np.linalg.norm(R1)
 39 |     r2 = R2 / np.linalg.norm(R2)
 40 |     r3 = np.cross(r1, r2)
 41 | 
 42 |     R = np.concatenate((r1, r2, r3), 0)
 43 |     return s, R, t3d
 44 | 
 45 | def matrix2angle_corr(R):
 46 |     if R[2, 0] != 1 and R[2, 0] != -1:
 47 |         x = asin(R[2, 0])
 48 |         y = atan2(R[1, 2] / cos(x), R[2, 2] / cos(x))
 49 |         z = atan2(R[0, 1] / cos(x), R[0, 0] / cos(x))
 50 | 
 51 |     else:  # Gimbal lock
 52 |         z = 0
 53 |         if R[2, 0] == -1:
 54 |             x = np.pi / 2
 55 |             y = z + atan2(R[0, 1], R[0, 2])
 56 |         else:
 57 |             x = -np.pi / 2
 58 |             y = -z + atan2(-R[0, 1], -R[0, 2])
 59 |     
 60 |     rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
 61 | 
 62 |     return [rx, ry, rz]
 63 | 
 64 | def param2vert(param, dense=False, transform=True):
 65 |     if param.shape[0] == 62:
 66 |         param_ = param * param_pack.param_std[:62] + param_pack.param_mean[:62]
 67 |     else:
 68 |         raise RuntimeError('length of params mismatch')
 69 | 
 70 |     p, offset, alpha_shp, alpha_exp = parse_param(param_)
 71 | 
 72 |     if dense:
 73 |         vertex = p @ (param_pack.u + param_pack.w_shp @ alpha_shp + param_pack.w_exp @ alpha_exp).reshape(3, -1, order='F') + offset
 74 |         if transform: 
 75 |             # transform to image coordinate space
 76 |             vertex[1, :] = param_pack.std_size + 1 - vertex[1, :]
 77 | 
 78 |     else:
 79 |         vertex = p @ (param_pack.u_base + param_pack.w_shp_base @ alpha_shp + param_pack.w_exp_base @ alpha_exp).reshape(3, -1, order='F') + offset
 80 |         if transform: 
 81 |             # transform to image coordinate space
 82 |             vertex[1, :] = param_pack.std_size + 1 - vertex[1, :]
 83 | 
 84 |     return vertex
 85 | 
 86 | def parse_pose(param):
 87 |     param = param * param_pack.param_std[:62] + param_pack.param_mean[:62]
 88 |     Ps = param[:12].reshape(3, -1)  # camera matrix
 89 |     s, R, t3d = P2sRt(Ps)
 90 |     P = np.concatenate((R, t3d.reshape(3, -1)), axis=1)  # without scale
 91 |     pose = matrix2angle_corr(R)  # yaw, pitch, roll
 92 |     return P, pose, t3d
 93 | 
 94 | 
 95 | def crop_img(img, roi_box):
 96 |     h, w = img.shape[:2]
 97 | 
 98 |     sx, sy, ex, ey, _ = [int(round(_)) for _ in roi_box]
 99 |     dh, dw = ey - sy, ex - sx
100 |     if len(img.shape) == 3:
101 |         res = np.zeros((dh, dw, 3), dtype=np.uint8)
102 |     else:
103 |         res = np.zeros((dh, dw), dtype=np.uint8)
104 |     if sx < 0:
105 |         sx, dsx = 0, -sx
106 |     else:
107 |         dsx = 0
108 | 
109 |     if ex > w:
110 |         ex, dex = w, dw - (ex - w)
111 |     else:
112 |         dex = dw
113 | 
114 |     if sy < 0:
115 |         sy, dsy = 0, -sy
116 |     else:
117 |         dsy = 0
118 | 
119 |     if ey > h:
120 |         ey, dey = h, dh - (ey - h)
121 |     else:
122 |         dey = dh
123 | 
124 |     res[dsy:dey, dsx:dex] = img[sy:ey, sx:ex]
125 |     return res
126 | 
127 | def _predict_vertices(param, roi_bbox, dense, transform=True):
128 |     vertex = param2vert(param, dense=dense, transform=transform)
129 |     sx, sy, ex, ey, _ = roi_bbox
130 |     scale_x = (ex - sx) / 120
131 |     scale_y = (ey - sy) / 120
132 |     vertex[0, :] = vertex[0, :] * scale_x + sx
133 |     vertex[1, :] = vertex[1, :] * scale_y + sy
134 | 
135 |     s = (scale_x + scale_y) / 2
136 |     vertex[2, :] *= s
137 | 
138 |     return vertex
139 | 
140 | def predict_sparseVert(param, roi_box, transform=False):
141 |     return _predict_vertices(param, roi_box, dense=False, transform=transform)
142 | 
143 | def predict_denseVert(param, roi_box, transform=False):
144 |     return _predict_vertices(param, roi_box, dense=True, transform=transform)
145 | 
146 | def predict_pose(param, roi_bbox, ret_mat=False):
147 |     P, angles, t3d = parse_pose(param)
148 | 
149 |     sx, sy, ex, ey, _ = roi_bbox
150 |     scale_x = (ex - sx) / 120
151 |     scale_y = (ey - sy) / 120
152 |     t3d[0] = t3d[0] * scale_x + sx
153 |     t3d[1] = t3d[1] * scale_y + sy
154 | 
155 |     if ret_mat:
156 |         return P
157 |     return angles, t3d
158 | 
159 | def draw_landmarks(img, pts, wfp):
160 |     height, width = img.shape[:2]
161 |     base = 6.4 
162 |     plt.figure(figsize=(base, height / width * base))
163 |     plt.imshow(img[:, :, ::-1])
164 |     plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
165 |     plt.axis('off')
166 | 
167 |     if not type(pts) in [tuple, list]:
168 |         pts = [pts]
169 |     for i in range(len(pts)):
170 |         alpha = 0.8
171 |         markersize = 1.5
172 |         lw = 0.7 
173 |         color = 'g'
174 |         markeredgecolor = 'green'
175 | 
176 |         nums = [0, 17, 22, 27, 31, 36, 42, 48, 60, 68]
177 | 
178 |         # close eyes and mouths
179 |         plot_close = lambda i1, i2: plt.plot([pts[i][0, i1], pts[i][0, i2]], [pts[i][1, i1], pts[i][1, i2]],
180 |                                                 color=color, lw=lw, alpha=alpha - 0.1)
181 |         plot_close(41, 36)
182 |         plot_close(47, 42)
183 |         plot_close(59, 48)
184 |         plot_close(67, 60)
185 | 
186 |         for ind in range(len(nums) - 1):
187 |             l, r = nums[ind], nums[ind + 1]
188 |             plt.plot(pts[i][0, l:r], pts[i][1, l:r], color=color, lw=lw, alpha=alpha - 0.1)
189 | 
190 |             plt.plot(pts[i][0, l:r], pts[i][1, l:r], marker='o', linestyle='None', markersize=markersize,
191 |                         color=color,
192 |                         markeredgecolor=markeredgecolor, alpha=alpha)
193 | 
194 |     plt.savefig(wfp, dpi=200)
195 |     print('Save landmark result to {}'.format(wfp))
196 |     plt.close()
197 | 
198 | 
199 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100, pts68=None):
200 |     pitch = pitch * np.pi / 180
201 |     yaw = -(yaw * np.pi / 180)
202 |     roll = roll * np.pi / 180
203 | 
204 |     if tdx != None and tdy != None:
205 |         tdx = tdx
206 |         tdy = tdy
207 |     else:
208 |         height, width = img.shape[:2]
209 |         tdx = width / 2
210 |         tdy = height / 2
211 | 
212 |     tdx = pts68[0,30]
213 |     tdy = pts68[1,30]
214 | 
215 | 
216 |     minx, maxx = np.min(pts68[0, :]), np.max(pts68[0, :])
217 |     miny, maxy = np.min(pts68[1, :]), np.max(pts68[1, :])
218 |     llength = sqrt((maxx - minx) * (maxy - miny))
219 |     size = llength * 0.5
220 | 
221 | 
222 |     # if pts8 != None:
223 |     #     tdx = 
224 | 
225 |     # X-Axis pointing to right. drawn in red
226 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
227 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
228 | 
229 |     # Y-Axis | drawn in green
230 |     #        v
231 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
232 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
233 | 
234 |     # Z-Axis (out of the screen) drawn in blue
235 |     x3 = size * (sin(yaw)) + tdx
236 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
237 | 
238 |     minus=0
239 | 
240 |     cv2.line(img, (int(tdx), int(tdy)-minus), (int(x1),int(y1)),(0,0,255),4)
241 |     cv2.line(img, (int(tdx), int(tdy)-minus), (int(x2),int(y2)),(0,255,0),4)
242 |     cv2.line(img, (int(tdx), int(tdy)-minus), (int(x3),int(y3)),(255,0,0),4)
243 | 
244 |     return img
245 | 


--------------------------------------------------------------------------------
/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import pickle
  5 | import scipy.io as sio
  6 | 
  7 | 
  8 | def mkdir(d):
  9 |     """only works on *nix system"""
 10 |     if not os.path.isdir(d) and not os.path.exists(d):
 11 |         os.system('mkdir -p {}'.format(d))
 12 | 
 13 | 
 14 | def _get_suffix(filename):
 15 |     """a.jpg -> jpg"""
 16 |     pos = filename.rfind('.')
 17 |     if pos == -1:
 18 |         return ''
 19 |     return filename[pos + 1:]
 20 | 
 21 | 
 22 | def _load(fp):
 23 |     suffix = _get_suffix(fp)
 24 |     if suffix == 'npy':
 25 |         return np.load(fp)
 26 |     elif suffix == 'pkl':
 27 |         return pickle.load(open(fp, 'rb'))
 28 | 
 29 | 
 30 | def _dump(wfp, obj):
 31 |     suffix = _get_suffix(wfp)
 32 |     if suffix == 'npy':
 33 |         np.save(wfp, obj)
 34 |     elif suffix == 'pkl':
 35 |         pickle.dump(obj, open(wfp, 'wb'))
 36 |     else:
 37 |         raise Exception('Unknown Type: {}'.format(suffix))
 38 | 
 39 | 
 40 | def _load_tensor(fp, mode='cpu'):
 41 |     if mode.lower() == 'cpu':
 42 |         return torch.from_numpy(_load(fp))
 43 |     elif mode.lower() == 'gpu':
 44 |         return torch.from_numpy(_load(fp)).cuda()
 45 | 
 46 | 
 47 | def _tensor_to_cuda(x):
 48 |     if x.is_cuda:
 49 |         return x
 50 |     else:
 51 |         return x.cuda()
 52 | 
 53 | 
 54 | def _load_gpu(fp):
 55 |     return torch.from_numpy(_load(fp)).cuda()
 56 | 
 57 | 
 58 | def load_bfm(model_path):
 59 |     suffix = _get_suffix(model_path)
 60 |     if suffix == 'mat':
 61 |         C = sio.loadmat(model_path)
 62 |         model = C['model_refine']
 63 |         model = model[0, 0]
 64 | 
 65 |         model_new = {}
 66 |         w_shp = model['w'].astype(np.float32)
 67 |         model_new['w_shp_sim'] = w_shp[:, :40]
 68 |         w_exp = model['w_exp'].astype(np.float32)
 69 |         model_new['w_exp_sim'] = w_exp[:, :10]
 70 | 
 71 |         u_shp = model['mu_shape']
 72 |         u_exp = model['mu_exp']
 73 |         u = (u_shp + u_exp).astype(np.float32)
 74 |         model_new['mu'] = u
 75 |         model_new['tri'] = model['tri'].astype(np.int32) - 1
 76 | 
 77 |         # flatten it, pay attention to index value
 78 |         keypoints = model['keypoints'].astype(np.int32) - 1
 79 |         keypoints = np.concatenate((3 * keypoints, 3 * keypoints + 1, 3 * keypoints + 2), axis=0)
 80 | 
 81 |         model_new['keypoints'] = keypoints.T.flatten()
 82 | 
 83 |         #
 84 |         w = np.concatenate((w_shp, w_exp), axis=1)
 85 |         w_base = w[keypoints]
 86 |         w_norm = np.linalg.norm(w, axis=0)
 87 |         w_base_norm = np.linalg.norm(w_base, axis=0)
 88 | 
 89 |         dim = w_shp.shape[0] // 3
 90 |         u_base = u[keypoints].reshape(-1, 1)
 91 |         w_shp_base = w_shp[keypoints]
 92 |         w_exp_base = w_exp[keypoints]
 93 | 
 94 |         model_new['w_norm'] = w_norm
 95 |         model_new['w_base_norm'] = w_base_norm
 96 |         model_new['dim'] = dim
 97 |         model_new['u_base'] = u_base
 98 |         model_new['w_shp_base'] = w_shp_base
 99 |         model_new['w_exp_base'] = w_exp_base
100 | 
101 |         _dump(model_path.replace('.mat', '.pkl'), model_new)
102 |         return model_new
103 |     else:
104 |         return _load(model_path)
105 | 
106 | 
107 | _load_cpu = _load
108 | _numpy_to_tensor = lambda x: torch.from_numpy(x)
109 | _tensor_to_numpy = lambda x: x.cpu()
110 | _numpy_to_cuda = lambda x: _tensor_to_cuda(torch.from_numpy(x))
111 | _cuda_to_tensor = lambda x: x.cpu()
112 | _cuda_to_numpy = lambda x: x.cpu().numpy()
113 | 


--------------------------------------------------------------------------------
/utils/params.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from .io import _load
 4 | 
 5 | def make_abs_path(d):
 6 |     return osp.join(osp.dirname(osp.realpath(__file__)), d)
 7 | 
 8 | class ParamsPack():
 9 | 	"""Parameter package"""
10 | 	def __init__(self):
11 | 		try:
12 | 			d = make_abs_path('../3dmm_data')
13 | 			self.keypoints = _load(osp.join(d, 'keypoints_sim.npy'))
14 | 
15 | 			# PCA basis for shape, expression, texture
16 | 			self.w_shp = _load(osp.join(d, 'w_shp_sim.npy'))
17 | 			self.w_exp = _load(osp.join(d, 'w_exp_sim.npy'))
18 | 			# param_mean and param_std are used for re-whitening
19 | 			meta = _load(osp.join(d, 'param_whitening.pkl'))
20 | 			self.param_mean = meta.get('param_mean')
21 | 			self.param_std = meta.get('param_std')
22 | 			# mean values
23 | 			self.u_shp = _load(osp.join(d, 'u_shp.npy'))
24 | 			self.u_exp = _load(osp.join(d, 'u_exp.npy'))
25 | 			self.u = self.u_shp + self.u_exp
26 | 			self.w = np.concatenate((self.w_shp, self.w_exp), axis=1)
27 | 			# base vector for landmarks
28 | 			self.w_base = self.w[self.keypoints]
29 | 			self.w_norm = np.linalg.norm(self.w, axis=0)
30 | 			self.w_base_norm = np.linalg.norm(self.w_base, axis=0)
31 | 			self.u_base = self.u[self.keypoints].reshape(-1, 1)
32 | 			self.w_shp_base = self.w_shp[self.keypoints]
33 | 			self.w_exp_base = self.w_exp[self.keypoints]
34 | 			self.std_size = 120
35 | 			self.dim = self.w_shp.shape[0] // 3
36 | 		except:
37 | 			raise RuntimeError('Missing data')


--------------------------------------------------------------------------------
/utils/render.py:
--------------------------------------------------------------------------------
 1 | # modified from 3DDFA-V2
 2 | 
 3 | import sys
 4 | 
 5 | sys.path.append('..')
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import scipy.io as sio
10 | 
11 | from Sim3DR import RenderPipeline
12 | 
13 | def _to_ctype(arr):
14 |     if not arr.flags.c_contiguous:
15 |         return arr.copy(order='C')
16 |     return arr
17 | 
18 | cfg = {
19 |     'intensity_ambient': 0.75,
20 |     'color_ambient': (1, 1, 1),
21 |     'intensity_directional': 0.7,
22 |     'color_directional': (1, 1, 1),
23 |     'intensity_specular': 0.2,
24 |     'specular_exp': 5,
25 |     'light_pos': (0, 0, 5),
26 |     'view_pos': (0, 0, 5)
27 | }
28 | 
29 | render_app = RenderPipeline(**cfg)
30 | 
31 | def render(img, ver_lst, alpha=0.6, wfp=None, tex=None, connectivity=None):
32 |     tri = sio.loadmat('./3dmm_data/tri.mat')['tri'] - 1
33 |     tri = _to_ctype(tri.T).astype(np.int32)
34 |     # save solid mesh rendering and alpha overlaying on images
35 |     if not connectivity is None:
36 |         tri = _to_ctype(connectivity.T).astype(np.int32)
37 | 
38 |     overlap = img.copy()
39 |     for ver_ in ver_lst:
40 |         ver_ = ver_.astype(np.float32)
41 |         ver = _to_ctype(ver_.T)  # transpose
42 |         overlap = render_app(ver, tri, overlap, texture=tex)
43 |     cv2.imwrite(wfp[:-4]+'_solid'+'.png', overlap)
44 | 
45 |     res = cv2.addWeighted(img, 1 - alpha, overlap, alpha, 0)
46 |     if wfp is not None:
47 |         cv2.imwrite(wfp, res)
48 |         print(f'Save mesh result to {wfp}')
49 | 
50 |     return res
51 | 


--------------------------------------------------------------------------------
/uv_texture_realFaces.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as transforms
  3 | import numpy as np
  4 | import cv2
  5 | from utils.ddfa import ToTensor, Normalize
  6 | from model_building import SynergyNet
  7 | from utils.inference import crop_img, predict_denseVert
  8 | import argparse
  9 | import torch.backends.cudnn as cudnn
 10 | cudnn.benchmark = True
 11 | import os
 12 | import os.path as osp
 13 | import glob
 14 | from FaceBoxes import FaceBoxes
 15 | from utils.render import render
 16 | 
 17 | 
 18 | # Following 3DDFA-V2, we also use 120x120 resolution
 19 | IMG_SIZE = 120
 20 | 
 21 | def write_obj_with_colors(obj_name, vertices, triangles, colors):
 22 |     triangles = triangles.copy()
 23 | 
 24 |     if obj_name.split('.')[-1] != 'obj':
 25 |         obj_name = obj_name + '.obj'
 26 |     with open(obj_name, 'w') as f:
 27 |         for i in range(vertices.shape[1]):
 28 |             s = 'v {:.4f} {:.4f} {:.4f} {} {} {}\n'.format(vertices[0, i], vertices[1, i], vertices[2, i], colors[i, 2],
 29 |                                                colors[i, 1], colors[i, 0])
 30 |             f.write(s)
 31 |         for i in range(triangles.shape[1]):
 32 |             s = 'f {} {} {}\n'.format(triangles[0, i], triangles[1, i], triangles[2, i])
 33 |             f.write(s)
 34 | 
 35 | def main(args):
 36 |     # load pre-tained model
 37 |     checkpoint_fp = 'pretrained/best.pth.tar' 
 38 |     args.arch = 'mobilenet_v2'
 39 |     args.devices_id = [0]
 40 | 
 41 |     checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
 42 |     
 43 |     model = SynergyNet(args)
 44 |     model_dict = model.state_dict()
 45 | 
 46 |     # load BFM_UV mapping and kept indicies and deleted triangles
 47 |     uv_vert=np.load('3dmm_data/BFM_UV.npy')
 48 |     coord_u = (uv_vert[:,1]*255.0).astype(np.int32)
 49 |     coord_v = (uv_vert[:,0]*255.0).astype(np.int32)
 50 |     keep_ind = np.load('3dmm_data/keptInd.npy')
 51 |     tri_deletion = np.load('3dmm_data/deletedTri.npy')
 52 | 
 53 |     # because the model is trained by multiple gpus, prefix 'module' should be removed
 54 |     for k in checkpoint.keys():
 55 |         model_dict[k.replace('module.', '')] = checkpoint[k]
 56 | 
 57 |     model.load_state_dict(model_dict, strict=False)
 58 |     model = model.cuda()
 59 |     model.eval()
 60 | 
 61 |     # face detector
 62 |     face_boxes = FaceBoxes()
 63 | 
 64 |     # preparation
 65 |     transform = transforms.Compose([ToTensor(), Normalize(mean=127.5, std=128)])
 66 |     if osp.isdir(args.files):
 67 |         if not args.files[-1] == '/':
 68 |             args.files = args.files + '/'
 69 |         if not args.png:
 70 |             files = sorted(glob.glob(args.files+'*.jpg'))
 71 |         else:
 72 |             files = sorted(glob.glob(args.files+'*.png'))
 73 |     else:
 74 |         files = [args.files]
 75 | 
 76 |     for img_fp in files:
 77 |         print("Process the image: ", img_fp)
 78 | 
 79 |         img_ori = cv2.imread(img_fp)
 80 | 
 81 |         # crop faces
 82 |         rect = [0,0,256,256,1.0] # pre-cropped faces
 83 | 
 84 |         # storage
 85 |         vertices_lst = []
 86 |         roi_box = rect
 87 |         img = crop_img(img_ori, roi_box)
 88 |         img = cv2.resize(img, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
 89 |         
 90 |         input = transform(img).unsqueeze(0)
 91 |         with torch.no_grad():
 92 |             input = input.cuda()
 93 |             param = model.forward_test(input)
 94 |             param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
 95 | 
 96 |         # dense pts
 97 |         vertices = predict_denseVert(param, roi_box, transform=True)
 98 |         vertices = vertices[:,keep_ind]
 99 |         vertices_lst.append(vertices)
100 | 
101 |         # textured obj file output
102 |         if not osp.exists(f'inference_output/obj/'):
103 |             os.makedirs(f'inference_output/obj/')
104 |         if not osp.exists(f'inference_output/rendering_overlay/'):
105 |             os.makedirs(f'inference_output/rendering_overlay/')
106 |         
107 |         name = img_fp.rsplit('/',1)[-1][:-11] # drop off the postfix
108 |         colors = cv2.imread(f'texture_data/uv_real/{name}_fake_B.png',-1)
109 |         colors = np.flip(colors,axis=0)
110 |         colors_uv = (colors[coord_u, coord_v,:])
111 | 
112 |         wfp = f'inference_output/obj/{name}.obj'
113 |         write_obj_with_colors(wfp, vertices, tri_deletion, colors_uv[keep_ind,:].astype(np.float32))
114 | 
115 |         tex = colors_uv[keep_ind,:].astype(np.float32)/255.0
116 |         render(img_ori, vertices_lst, alpha=0.6, wfp=f'inference_output/rendering_overlay/{name}.jpg', tex=tex, connectivity=tri_deletion-1)
117 | 
118 | if __name__ == '__main__':
119 |     parser = argparse.ArgumentParser()
120 |     parser.add_argument('-f', '--files', default='', help='path to a single image or path to a folder containing multiple images')
121 |     parser.add_argument("--png", action="store_true", help="if images are with .png extension")
122 |     parser.add_argument('--img_size', default=120, type=int)
123 |     parser.add_argument('-b', '--batch-size', default=1, type=int)
124 | 
125 |     args = parser.parse_args()
126 |     main(args)


--------------------------------------------------------------------------------