├── src
    ├── __init__.py
    ├── __pycache__
    │   ├── nets.cpython-37.pyc
    │   ├── utils.cpython-37.pyc
    │   ├── detect.cpython-37.pyc
    │   ├── models.cpython-37.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── base_camera.cpython-37.pyc
    │   └── camera_opencv.cpython-37.pyc
    ├── camera_opencv.py
    ├── test.py
    ├── utils.py
    ├── base_camera.py
    ├── models.py
    └── detect.py
├── .gitignore
├── assets
    ├── yuanm.png
    ├── example.png
    ├── office1.jpg
    ├── office2.jpg
    ├── office3.jpg
    ├── office4.jpg
    └── office5.jpg
├── scripts
    ├── onet.npy
    ├── pnet.npy
    ├── rnet.npy
    ├── test.py
    └── caffemodel_to_pytorchmodel.py
├── weights
    ├── onet.npy
    ├── pnet.npy
    └── rnet.npy
├── caffe_models
    ├── det1.caffemodel
    ├── det2.caffemodel
    ├── det3.caffemodel
    ├── det4.caffemodel
    ├── det1.prototxt
    ├── det2.prototxt
    ├── det3.prototxt
    └── det4.prototxt
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | 


--------------------------------------------------------------------------------
/assets/yuanm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/yuanm.png


--------------------------------------------------------------------------------
/scripts/onet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/scripts/onet.npy


--------------------------------------------------------------------------------
/scripts/pnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/scripts/pnet.npy


--------------------------------------------------------------------------------
/scripts/rnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/scripts/rnet.npy


--------------------------------------------------------------------------------
/weights/onet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/weights/onet.npy


--------------------------------------------------------------------------------
/weights/pnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/weights/pnet.npy


--------------------------------------------------------------------------------
/weights/rnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/weights/rnet.npy


--------------------------------------------------------------------------------
/assets/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/example.png


--------------------------------------------------------------------------------
/assets/office1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/office1.jpg


--------------------------------------------------------------------------------
/assets/office2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/office2.jpg


--------------------------------------------------------------------------------
/assets/office3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/office3.jpg


--------------------------------------------------------------------------------
/assets/office4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/office4.jpg


--------------------------------------------------------------------------------
/assets/office5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/assets/office5.jpg


--------------------------------------------------------------------------------
/caffe_models/det1.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/caffe_models/det1.caffemodel


--------------------------------------------------------------------------------
/caffe_models/det2.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/caffe_models/det2.caffemodel


--------------------------------------------------------------------------------
/caffe_models/det3.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/caffe_models/det3.caffemodel


--------------------------------------------------------------------------------
/caffe_models/det4.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/caffe_models/det4.caffemodel


--------------------------------------------------------------------------------
/src/__pycache__/nets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/nets.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/detect.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/detect.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/models.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/base_camera.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/base_camera.cpython-37.pyc


--------------------------------------------------------------------------------
/src/__pycache__/camera_opencv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayuanjason/MTCNN_face_detection_alignment_pytorch/HEAD/src/__pycache__/camera_opencv.cpython-37.pyc


--------------------------------------------------------------------------------
/scripts/test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | weights = np.load('./pnet.npy', encoding='bytes', allow_pickle=True)[()]
4 | doc = open('pnet.txt', 'a')
5 | print(weights, file=doc)


--------------------------------------------------------------------------------
/src/camera_opencv.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from base_camera import BaseCamera
 3 | 
 4 | 
 5 | class Camera(BaseCamera):
 6 |     video_source = 0
 7 |     cap = None
 8 | 
 9 |     @staticmethod
10 |     def set_video_source(source):
11 |         Camera.video_source = source
12 | 
13 |     @staticmethod
14 |     def frames():
15 |         Camera.cap = cv2.VideoCapture(Camera.video_source)
16 |         if not Camera.cap.isOpened():
17 |             raise RuntimeError('Could not start camera.')
18 | 
19 |         while True:
20 |             # read current frame
21 |             _, img = Camera.cap.read()
22 | 
23 |             # encode as a jpeg image and return it
24 |             # yield cv2.imencode('.jpg', img)[1].tobytes()
25 |             yield img
26 | 
27 |     @staticmethod
28 |     def close():
29 |         print('1release camera resource')
30 |         if Camera.cap:
31 |             print('2release camera resource')
32 |             Camera.cap.release()


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import os
 4 | sys.path.append(os.pardir)
 5 | from importlib import import_module
 6 | import cv2
 7 | from src.detect import FaceDetector
 8 | 
 9 | # import camera driver
10 | if os.environ.get('CAMERA'):
11 |     Camera = import_module('camera_' + os.environ['CAMERA']).Camera
12 | else:
13 |     from camera import Camera
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     detector = FaceDetector()
18 |     
19 |     while True:
20 |         frame = Camera().get_frame()
21 |         image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)        
22 |         bounding_boxes = detector.detect(image)
23 | 
24 |         for i in range(len(bounding_boxes)):
25 |             cv2.rectangle(frame, (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])),
26 |                          (int(bounding_boxes[i][2]), int(bounding_boxes[i][3])), (255, 0, 0), 2)
27 | 
28 |         cv2.imshow('capture', frame)
29 |         key = cv2.waitKey(1)
30 |         if key & 0xFF == ord('q'):
31 |             break


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from IPython import display
 4 | import matplotlib.pyplot as plt
 5 | import torch
 6 | from PIL import ImageDraw
 7 | 
 8 | 
 9 | def use_svg_display():
10 |     """用矢量图显示
11 |     """
12 | 
13 |     display.set_matplotlib_formats('svg')
14 | 
15 | 
16 | def set_figsize(figsize=(3.5, 2.5)):
17 |     """Set matplotlib figure size.
18 | 
19 |     Keyword Arguments:
20 |         figsize {tuple} -- [description] (default: {(3.5, 2.5)})
21 |     """
22 | 
23 |     use_svg_display()
24 |     plt.rcParams['figure.figsize'] = figsize
25 | 
26 | 
27 | def try_gpu():
28 |     use_cuda = torch.cuda.is_available()
29 |     return torch.device("cuda" if use_cuda else "cpu")
30 | 
31 | 
32 | def show_bboxes(img, bboxes, facial_landmarks=[]):
33 |     """Draw bounding boxes and facial landmarks.
34 | 
35 |     Arguments:
36 |         img {[type]} -- an instance of PIL.Image.
37 |         bboxes {[type]} -- a float numpy array of shape [n, 5].
38 | 
39 |     Keyword Arguments:
40 |         facial_landmarks {list} -- a float numpy array of shape [n, 10]. (default: {[]})
41 | 
42 |     Returns:
43 |         [type] -- an instance of PIL.Image.
44 |     """
45 | 
46 |     img_copy = img.copy()
47 |     draw = ImageDraw.Draw(img_copy)
48 | 
49 |     for b in bboxes:
50 |         draw.rectangle([
51 |             (b[0], b[1]), (b[2], b[3])
52 |         ], outline='white')
53 | 
54 |     for p in facial_landmarks:
55 |         for i in range(5):
56 |             draw.ellipse([
57 |                 (p[i] - 1.0, p[i + 5] - 1.0),
58 |                 (p[i] + 1.0, p[i + 5] + 1.0)
59 |             ], outline='blue')
60 | 
61 |     return img_copy
62 | 


--------------------------------------------------------------------------------
/caffe_models/det1.prototxt:
--------------------------------------------------------------------------------
  1 | name: "PNet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 12
  6 | input_dim: 12
  7 | 
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   param {
 14 |     lr_mult: 1
 15 |     decay_mult: 1
 16 |   }
 17 |   param {
 18 |     lr_mult: 2
 19 |     decay_mult: 0
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 10
 23 |     kernel_size: 3
 24 |     stride: 1
 25 |     weight_filler {
 26 |       type: "xavier"
 27 |     }
 28 |     bias_filler {
 29 |       type: "constant"
 30 |       value: 0
 31 |     }
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "PReLU1"
 36 |   type: "PReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 2
 48 |     stride: 2
 49 |   }
 50 | }
 51 | 
 52 | layer {
 53 |   name: "conv2"
 54 |   type: "Convolution"
 55 |   bottom: "pool1"
 56 |   top: "conv2"
 57 |   param {
 58 |     lr_mult: 1
 59 |     decay_mult: 1
 60 |   }
 61 |   param {
 62 |     lr_mult: 2
 63 |     decay_mult: 0
 64 |   }
 65 |   convolution_param {
 66 |     num_output: 16
 67 |     kernel_size: 3
 68 |     stride: 1
 69 |      weight_filler {
 70 |       type: "xavier"
 71 |     }
 72 |     bias_filler {
 73 |       type: "constant"
 74 |       value: 0
 75 |     }
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "PReLU2"
 80 |   type: "PReLU"
 81 |   bottom: "conv2"
 82 |   top: "conv2"
 83 | }
 84 | 
 85 | layer {
 86 |   name: "conv3"
 87 |   type: "Convolution"
 88 |   bottom: "conv2"
 89 |   top: "conv3"
 90 |   param {
 91 |     lr_mult: 1
 92 |     decay_mult: 1
 93 |   }
 94 |   param {
 95 |     lr_mult: 2
 96 |     decay_mult: 0
 97 |   }
 98 |   convolution_param {
 99 |     num_output: 32
100 |     kernel_size: 3
101 |     stride: 1
102 |      weight_filler {
103 |       type: "xavier"
104 |     }
105 |     bias_filler {
106 | 	  type: "constant"
107 |       value: 0
108 |     }
109 |   }
110 | }
111 | layer {
112 |   name: "PReLU3"
113 |   type: "PReLU"
114 |   bottom: "conv3"
115 |   top: "conv3"
116 | }
117 | 
118 | 
119 | layer {
120 |   name: "conv4-1"
121 |   type: "Convolution"
122 |   bottom: "conv3"
123 |   top: "conv4-1"
124 |   param {
125 |     lr_mult: 1
126 |     decay_mult: 1
127 |   }
128 |   param {
129 |     lr_mult: 2
130 |     decay_mult: 0
131 |   }
132 |   convolution_param {
133 |     num_output: 2
134 |     kernel_size: 1
135 |     stride: 1
136 |      weight_filler {
137 |       type: "xavier"
138 |     }
139 |     bias_filler {
140 |       type: "constant"
141 |       value: 0
142 |     }
143 |   }
144 | }
145 | 
146 | layer {
147 |   name: "conv4-2"
148 |   type: "Convolution"
149 |   bottom: "conv3"
150 |   top: "conv4-2"
151 |   param {
152 |     lr_mult: 1
153 |     decay_mult: 1
154 |   }
155 |   param {
156 |     lr_mult: 2
157 |     decay_mult: 0
158 |   }
159 |   convolution_param {
160 |     num_output: 4
161 |     kernel_size: 1
162 |     stride: 1
163 |      weight_filler {
164 |       type: "xavier"
165 | 	}
166 |     bias_filler {
167 |       type: "constant"
168 |       value: 0
169 |     }
170 |   }
171 | }
172 | layer {
173 |   name: "prob1"
174 |   type: "Softmax"
175 |   bottom: "conv4-1"
176 |   top: "prob1"
177 | }
178 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MTCNN
  2 | 
  3 | `PyTorch` implementation of **inference stage** of face detection algorithm described in  
  4 | [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
  5 | 
  6 | ![example of a face detection](assets/example.png)
  7 | 
  8 | ## Why this projects
  9 | [mtcnn-pytorch](https://github.com/TropComplique/mtcnn-pytorch) This is the most popular pytorch implementation of mtcnn. There are some disadvantages I found when using it for real-time detection task.
 10 | 
 11 | * Mix torch operation and numpy operation together, which resulting in slow inference speed (Cannot run on GPU).
 12 | * Based on the old version of pytorch (0.2).
 13 | 
 14 | So I create this project and make some improvments:
 15 | * Transfer all numpy operation to torch operation, so that it can benefit from GPU acceleration.
 16 | * Automatic run on 'CPU' or 'GPU'.
 17 | * Based on the latest version of pytorch (1.3)
 18 | * Real-time face tracking
 19 | 
 20 | ## Installation
 21 | 1. Create virtual environment 
 22 |    ```
 23 |    # conda create -n face_detection
 24 |    ```
 25 | 2. Activate virtual environment
 26 |    ```
 27 |    # source activate face_detection
 28 |    ```
 29 | 3. [Install PyTorch](https://pytorch.org/)
 30 |    ```
 31 |    (face_detection)# conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
 32 |    ```
 33 | 
 34 | 4. Install Jupyter Notebook
 35 |    ```
 36 |    (face_detection)# conda install jupyter notebook
 37 |    ```
 38 | 
 39 | 5. Install `opencv`
 40 |    ```
 41 |    (face_detection)# pip install opencv-python
 42 |    ```
 43 | 
 44 | ## How to use it
 45 | Just download the repository and then do this
 46 | ```
 47 | import sys
 48 | import os
 49 | sys.path.append(os.pardir)
 50 | from importlib import import_module
 51 | import cv2
 52 | from src.detect import FaceDetector
 53 | 
 54 | # import camera driver
 55 | if os.environ.get('CAMERA'):
 56 |     Camera = import_module('camera_' + os.environ['CAMERA']).Camera
 57 | else:
 58 |     from camera import Camera
 59 | 
 60 | if __name__ == "__main__":
 61 |     detector = FaceDetector()
 62 |     
 63 |     while True:
 64 |         frame = Camera().get_frame()
 65 |         image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)        
 66 |         bounding_boxes = detector.detect(image)
 67 | 
 68 |         for i in range(len(bounding_boxes)):
 69 |             cv2.rectangle(frame, (int(bounding_boxes[i][0]), int(bounding_boxes[i][1])),
 70 |                          (int(bounding_boxes[i][2]), int(bounding_boxes[i][3])), (255, 0, 0), 2)
 71 | 
 72 |         cv2.imshow('capture', frame)
 73 |         key = cv2.waitKey(1)
 74 |         if key & 0xFF == ord('q'):
 75 |             break
 76 | ```
 77 | or just run:
 78 | ```
 79 | (face_detection)# cd MTCNN_face_detection_alignment_pytorch/src/
 80 | (face_detection)# CAMERA=opencv python test.py
 81 | ```
 82 | 
 83 | ## Tutorial
 84 | [Detect step by step](./notebooks/try_mtcnn_step_by_step.ipynb)
 85 | 
 86 | ## Credit
 87 | This implementation is heavily inspired by:
 88 | * [TropComplique/mtcnn-pytorch](https://github.com/TropComplique/mtcnn-pytorch)
 89 | * [faciallab/FaceDetector](https://github.com/faciallab/FaceDetector)
 90 | 
 91 | ## Citation
 92 | ```
 93 | @article{7553523,
 94 |     author={K. Zhang and Z. Zhang and Z. Li and Y. Qiao}, 
 95 |     journal={IEEE Signal Processing Letters}, 
 96 |     title={Joint Face Detection and Alignment Using Multitask Cascaded Convolutional Networks}, 
 97 |     year={2016}, 
 98 |     volume={23}, 
 99 |     number={10}, 
100 |     pages={1499-1503}, 
101 |     keywords={Benchmark testing;Computer architecture;Convolution;Detectors;Face;Face detection;Training;Cascaded convolutional neural network (CNN);face alignment;face detection}, 
102 |     doi={10.1109/LSP.2016.2603342}, 
103 |     ISSN={1070-9908}, 
104 |     month={Oct}
105 | }
106 | ```


--------------------------------------------------------------------------------
/scripts/caffemodel_to_pytorchmodel.py:
--------------------------------------------------------------------------------
  1 | import caffe
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | """
  6 | # PNet
  7 | # conv1.weight (10, 3, 3, 3)
  8 | # conv1.bias (10,)
  9 | # prelu1.weight (10,)
 10 | # conv2.weight (16, 10, 3, 3)
 11 | # conv2.bias (16,)
 12 | # prelu2.weight (16,)
 13 | # conv3.weight (32, 16, 3, 3)
 14 | # conv3.bias (32,)
 15 | # prelu3.weight (32,)
 16 | # conv4-1.weight (2, 32, 1, 1)
 17 | # conv4-1.bias (2,)
 18 | # conv4-2.weight (4, 32, 1, 1)
 19 | # conv4-2.bias (4,)
 20 | 
 21 | # RNet
 22 | # conv1.weight (28, 3, 3, 3)
 23 | # conv1.bias (28,)
 24 | # prelu1.weight (28,)
 25 | # conv2.weight (48, 28, 3, 3)
 26 | # conv2.bias (48,)
 27 | # prelu2.weight (48,)
 28 | # conv3.weight (64, 48, 2, 2)
 29 | # conv3.bias (64,)
 30 | # prelu3.weight (64,)
 31 | # conv4.weight (128, 576)
 32 | # conv4.bias (128,)
 33 | # prelu4.weight (128,)
 34 | # conv5-1.weight (2, 128)
 35 | # conv5-1.bias (2,)
 36 | # conv5-2.weight (4, 128)
 37 | # conv5-2.bias (4,)
 38 | 
 39 | # ONet
 40 | # conv1.weight (32, 3, 3, 3)
 41 | # conv1.bias (32,)
 42 | # prelu1.weight (32,)
 43 | # conv2.weight (64, 32, 3, 3)
 44 | # conv2.bias (64,)
 45 | # prelu2.weight (64,)
 46 | # conv3.weight (64, 64, 3, 3)
 47 | # conv3.bias (64,)
 48 | # prelu3.weight (64,)
 49 | # conv4.weight (128, 64, 2, 2)
 50 | # conv4.bias (128,)
 51 | # prelu4.weight (128,)
 52 | # conv5.weight (256, 1152)
 53 | # conv5.bias (256,)
 54 | # prelu5.weight (256,)
 55 | # conv6-1.weight (2, 256)
 56 | # conv6-1.bias (2,)
 57 | # conv6-2.weight (4, 256)
 58 | # conv6-2.bias (4,)
 59 | # conv6-3.weight (10, 256)
 60 | # conv6-3.bias (10,)
 61 | """
 62 | 
 63 | def dump_layer(net):
 64 |     for param in net.params.keys():
 65 |         print(param.lower() + '.weight', net.params[param][0].data.shape)
 66 |         if len(net.params[param]) == 2:
 67 |             print(param.lower() + '.bias', net.params[param][1].data.shape)
 68 | 
 69 | def convert_to_pytorch_model(net, **net_info):
 70 |     model_state = {}
 71 | 
 72 |     for param in net.params.keys(): 
 73 |         if  net_info['cls_prob'] in param:
 74 |             prefix = 'cls_prob.' + param.lower().replace('-', '_')
 75 |         elif net_info['bbox_offset'] in param:
 76 |             prefix = 'bbox_offset.' + param.lower().replace('-', '_')
 77 |         elif net_info['landmarks'] is not None and net_info['landmarks'] in param:
 78 |             prefix = 'landmarks.' + param.lower().replace('-', '_')
 79 |         else:
 80 |             prefix = 'backend.' + param.lower()
 81 | 
 82 |         if 'prelu' in prefix:
 83 |             model_state[prefix + '.weight'] = torch.tensor(net.params[param][0].data)
 84 |         else:
 85 |             if len(net.params[param][0].data.shape) == 4:
 86 |                 model_state[prefix + '.weight'] = torch.tensor(net.params[param][0].data.transpose((0, 1, 3, 2)))
 87 |             else:
 88 |                 model_state[prefix + '.weight'] = torch.tensor(net.params[param][0].data)
 89 |             
 90 |             model_state[prefix + '.bias'] = torch.tensor(net.params[param][1].data)
 91 | 
 92 |     return model_state
 93 | 
 94 | 
 95 | def covnver_pnet():
 96 |     net = caffe.Net('../caffe_models/det1.prototxt', '../caffe_models/det1.caffemodel', caffe.TEST)
 97 |     # dump_layer(net)
 98 |     p = convert_to_pytorch_model(net, cls_prob='conv4-1', bbox_offset='conv4-2', landmarks=None)
 99 |     np.save('pnet.npy', p, allow_pickle=True)
100 | 
101 | def covnver_rnet():
102 |     net = caffe.Net('../caffe_models/det2.prototxt', '../caffe_models/det2.caffemodel', caffe.TEST)
103 |     # dump_layer(net)
104 |     p = convert_to_pytorch_model(net, cls_prob='conv5-1', bbox_offset='conv5-2', landmarks=None)
105 |     np.save('rnet.npy', p, allow_pickle=True)
106 | 
107 | def covnver_onet():
108 |     net = caffe.Net('../caffe_models/det3.prototxt', '../caffe_models/det3.caffemodel', caffe.TEST)
109 |     # dump_layer(net)
110 |     p = convert_to_pytorch_model(net, cls_prob='conv6-1', bbox_offset='conv6-2', landmarks='conv6-3')
111 |     np.save('onet.npy', p, allow_pickle=True)
112 | 
113 | if __name__ == "__main__":
114 |     covnver_pnet()
115 |     covnver_rnet()
116 |     covnver_onet()
117 | 


--------------------------------------------------------------------------------
/src/base_camera.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import threading
  3 | try:
  4 |     from greenlet import getcurrent as get_ident
  5 | except ImportError:
  6 |     try:
  7 |         from thread import get_ident
  8 |     except ImportError:
  9 |         from _thread import get_ident
 10 | 
 11 | 
 12 | class CameraEvent(object):
 13 |     """An Event-like class that signals all active clients when a new frame is
 14 |     available.
 15 |     """
 16 |     def __init__(self):
 17 |         self.events = {}
 18 | 
 19 |     def wait(self):
 20 |         """Invoked from each client's thread to wait for the next frame."""
 21 |         ident = get_ident()
 22 |         if ident not in self.events:
 23 |             # this is a new client
 24 |             # add an entry for it in the self.events dict
 25 |             # each entry has two elements, a threading.Event() and a timestamp
 26 |             self.events[ident] = [threading.Event(), time.time()]
 27 |         return self.events[ident][0].wait()
 28 | 
 29 |     def set(self):
 30 |         """Invoked by the camera thread when a new frame is available."""
 31 |         now = time.time()
 32 |         remove = None
 33 |         for ident, event in self.events.items():
 34 |             if not event[0].isSet():
 35 |                 # if this client's event is not set, then set it
 36 |                 # also update the last set timestamp to now
 37 |                 event[0].set()
 38 |                 event[1] = now
 39 |             else:
 40 |                 # if the client's event is already set, it means the client
 41 |                 # did not process a previous frame
 42 |                 # if the event stays set for more than 5 seconds, then assume
 43 |                 # the client is gone and remove it
 44 |                 if now - event[1] > 5:
 45 |                     remove = ident
 46 |         if remove:
 47 |             del self.events[remove]
 48 | 
 49 |     def clear(self):
 50 |         """Invoked from each client's thread after a frame was processed."""
 51 |         self.events[get_ident()][0].clear()
 52 | 
 53 | 
 54 | class BaseCamera(object):
 55 |     thread = None  # background thread that reads frames from camera
 56 |     frame = None  # current frame is stored here by background thread
 57 |     last_access = 0  # time of last client access to the camera
 58 |     event = CameraEvent()
 59 | 
 60 |     def __init__(self):
 61 |         """Start the background camera thread if it isn't running yet."""
 62 |         if BaseCamera.thread is None:
 63 |             BaseCamera.last_access = time.time()
 64 | 
 65 |             # start background frame thread
 66 |             BaseCamera.thread = threading.Thread(target=self._thread)
 67 |             BaseCamera.thread.start()
 68 | 
 69 |             # wait until frames are available
 70 |             while self.get_frame() is None:
 71 |                 time.sleep(0)
 72 | 
 73 |     def get_frame(self):
 74 |         """Return the current camera frame."""
 75 |         BaseCamera.last_access = time.time()
 76 | 
 77 |         # wait for a signal from the camera thread
 78 |         BaseCamera.event.wait()
 79 |         BaseCamera.event.clear()
 80 | 
 81 |         return BaseCamera.frame
 82 | 
 83 |     @staticmethod
 84 |     def frames():
 85 |         """"Generator that returns frames from the camera."""
 86 |         raise RuntimeError('Must be implemented by subclasses.')
 87 |     
 88 |     @staticmethod
 89 |     def close():
 90 |         raise RuntimeError('Must be implemented by subclasses.')
 91 | 
 92 |     @classmethod
 93 |     def _thread(cls):
 94 |         """Camera background thread."""
 95 |         print('Starting camera thread.')
 96 |         frames_iterator = cls.frames()
 97 |         for frame in frames_iterator:
 98 |             BaseCamera.frame = frame
 99 |             BaseCamera.event.set()  # send signal to clients
100 |             time.sleep(0)
101 | 
102 |             # if there hasn't been any clients asking for frames in
103 |             # the last 10 seconds then stop the thread
104 |             if time.time() - BaseCamera.last_access > 10:
105 |                 frames_iterator.close()
106 |                 cls.close()
107 |                 print('Stopping camera thread due to inactivity.')
108 |                 break
109 |         BaseCamera.thread = None
110 | 


--------------------------------------------------------------------------------
/caffe_models/det2.prototxt:
--------------------------------------------------------------------------------
  1 | name: "RNet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 24
  6 | input_dim: 24
  7 | 
  8 | 
  9 | ##########################
 10 | ######################
 11 | layer {
 12 |   name: "conv1"
 13 |   type: "Convolution"
 14 |   bottom: "data"
 15 |   top: "conv1"
 16 |   param {
 17 |     lr_mult: 0
 18 |     decay_mult: 0
 19 |   }
 20 |   param {
 21 |     lr_mult: 0
 22 |     decay_mult: 0
 23 |   }
 24 |   convolution_param {
 25 |     num_output: 28
 26 |     kernel_size: 3
 27 |     stride: 1
 28 |      weight_filler {
 29 |       type: "xavier"
 30 | 	}
 31 |     bias_filler {
 32 |       type: "constant"
 33 |       value: 0
 34 |     }
 35 |   }
 36 | }
 37 | layer {
 38 |   name: "prelu1"
 39 |   type: "PReLU"
 40 |   bottom: "conv1"
 41 |   top: "conv1"
 42 |   propagate_down: true
 43 | }
 44 | layer {
 45 |   name: "pool1"
 46 |   type: "Pooling"
 47 |   bottom: "conv1"
 48 |   top: "pool1"
 49 |   pooling_param {
 50 |     pool: MAX
 51 |     kernel_size: 3
 52 |     stride: 2
 53 |   }
 54 | }
 55 | 
 56 | layer {
 57 |   name: "conv2"
 58 |   type: "Convolution"
 59 |   bottom: "pool1"
 60 |   top: "conv2"
 61 |   param {
 62 |     lr_mult: 0
 63 |     decay_mult: 0
 64 |   }
 65 |   param {
 66 |     lr_mult: 0
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 48
 71 |     kernel_size: 3
 72 |     stride: 1
 73 |     weight_filler {
 74 |       type: "xavier"
 75 | 	}
 76 |     bias_filler {
 77 |       type: "constant"
 78 |       value: 0
 79 |     }
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "prelu2"
 84 |   type: "PReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 |   propagate_down: true
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | ####################################
101 | 
102 | ##################################
103 | layer {
104 |   name: "conv3"
105 |   type: "Convolution"
106 |   bottom: "pool2"
107 |   top: "conv3"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 64
118 |     kernel_size: 2
119 |     stride: 1
120 |     weight_filler {
121 |       type: "xavier"
122 | 	}
123 |     bias_filler {
124 |       type: "constant"
125 |       value: 0
126 |     }
127 |   }
128 | }
129 | layer {
130 |   name: "prelu3"
131 |   type: "PReLU"
132 |   bottom: "conv3"
133 |   top: "conv3"
134 |   propagate_down: true
135 | }
136 | ###############################
137 | 
138 | ###############################
139 | 
140 | layer {
141 |   name: "conv4"
142 |   type: "InnerProduct"
143 |   bottom: "conv3"
144 |   top: "conv4"
145 |   param {
146 |     lr_mult: 0
147 |     decay_mult: 0
148 |   }
149 |   param {
150 |     lr_mult: 0
151 |     decay_mult: 0
152 |   }
153 |   inner_product_param {
154 |     num_output: 128
155 |     weight_filler {
156 |       type: "xavier"
157 | 	}
158 |     bias_filler {
159 |       type: "constant"
160 |       value: 0
161 |     }
162 |   }
163 | }
164 | layer {
165 |   name: "prelu4"
166 |   type: "PReLU"
167 |   bottom: "conv4"
168 |   top: "conv4"
169 | }
170 | 
171 | layer {
172 |   name: "conv5-1"
173 |   type: "InnerProduct"
174 |   bottom: "conv4"
175 |   top: "conv5-1"
176 |   param {
177 |     lr_mult: 0
178 |     decay_mult: 0
179 |   }
180 |   param {
181 |     lr_mult: 0
182 |     decay_mult: 0
183 |   }
184 |   inner_product_param {
185 |     num_output: 2
186 |     #kernel_size: 1
187 |     #stride: 1
188 |     weight_filler {
189 |       type: "xavier"
190 | 	}
191 |     bias_filler {
192 |       type: "constant"
193 |       value: 0
194 |     }
195 |   }
196 | }
197 | layer {
198 |   name: "conv5-2"
199 |   type: "InnerProduct"
200 |   bottom: "conv4"
201 |   top: "conv5-2"
202 |   param {
203 |     lr_mult: 1
204 |     decay_mult: 1
205 |   }
206 |   param {
207 |     lr_mult: 2
208 |     decay_mult: 1
209 |   }
210 |   inner_product_param {
211 |     num_output: 4
212 |     #kernel_size: 1
213 |     #stride: 1
214 |      weight_filler {
215 |       type: "xavier"
216 | 	}
217 |     bias_filler {
218 |       type: "constant"
219 |       value: 0
220 |     }
221 |   }
222 | }
223 | layer {
224 |   name: "prob1"
225 |   type: "Softmax"
226 |   bottom: "conv5-1"
227 |   top: "prob1"
228 | }


--------------------------------------------------------------------------------
/caffe_models/det3.prototxt:
--------------------------------------------------------------------------------
  1 | name: "ONet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 48
  6 | input_dim: 48
  7 | ##################################
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   param {
 14 |     lr_mult: 1
 15 |     decay_mult: 1
 16 |   }
 17 |   param {
 18 |     lr_mult: 2
 19 |     decay_mult: 1
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 32
 23 |     kernel_size: 3
 24 |     stride: 1
 25 |      weight_filler {
 26 |       type: "xavier"
 27 | 	}
 28 |     bias_filler {
 29 |       type: "constant"
 30 |       value: 0
 31 |     }
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "prelu1"
 36 |   type: "PReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     stride: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "conv2"
 53 |   type: "Convolution"
 54 |   bottom: "pool1"
 55 |   top: "conv2"
 56 |   param {
 57 |     lr_mult: 1
 58 |     decay_mult: 1
 59 |   }
 60 |   param {
 61 |     lr_mult: 2
 62 |     decay_mult: 1
 63 |   }
 64 |   convolution_param {
 65 |     num_output: 64
 66 |     kernel_size: 3
 67 |     stride: 1
 68 |      weight_filler {
 69 |       type: "xavier"
 70 | 	}
 71 |     bias_filler {
 72 |       type: "constant"
 73 |       value: 0
 74 |     }
 75 |   }
 76 | }
 77 | 
 78 | layer {
 79 |   name: "prelu2"
 80 |   type: "PReLU"
 81 |   bottom: "conv2"
 82 |   top: "conv2"
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "conv2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3
 92 |     stride: 2
 93 |   }
 94 | }
 95 | 
 96 | layer {
 97 |   name: "conv3"
 98 |   type: "Convolution"
 99 |   bottom: "pool2"
100 |   top: "conv3"
101 |   param {
102 |     lr_mult: 1
103 |     decay_mult: 1
104 |   }
105 |   param {
106 |     lr_mult: 2
107 |     decay_mult: 1
108 |   }
109 |   convolution_param {
110 | 	num_output: 64
111 | 	kernel_size: 3
112 |     weight_filler {
113 |       type: "xavier"
114 | 	}
115 |     bias_filler {
116 |       type: "constant"
117 |       value: 0
118 |     }
119 |   }
120 | }
121 | layer {
122 |   name: "prelu3"
123 |   type: "PReLU"
124 |   bottom: "conv3"
125 |   top: "conv3"
126 | }
127 | layer {
128 |   name: "pool3"
129 |   type: "Pooling"
130 |   bottom: "conv3"
131 |   top: "pool3"
132 |   pooling_param {
133 |     pool: MAX
134 |     kernel_size: 2
135 |     stride: 2
136 |   }
137 | }
138 | layer {
139 |   name: "conv4"
140 |   type: "Convolution"
141 |   bottom: "pool3"
142 |   top: "conv4"
143 |   param {
144 |     lr_mult: 1
145 |     decay_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |     decay_mult: 1
150 |   }
151 |   convolution_param {
152 | 	num_output: 128
153 | 	kernel_size: 2
154 |     weight_filler {
155 |       type: "xavier"
156 | 	}
157 |     bias_filler {
158 |       type: "constant"
159 |       value: 0
160 |     }
161 |   }
162 | }
163 | layer {
164 |   name: "prelu4"
165 |   type: "PReLU"
166 |   bottom: "conv4"
167 |   top: "conv4"
168 | }
169 | 
170 | 
171 | layer {
172 |   name: "conv5"
173 |   type: "InnerProduct"
174 |   bottom: "conv4"
175 |   top: "conv5"
176 |   param {
177 |     lr_mult: 1
178 |     decay_mult: 1
179 |   }
180 |   param {
181 |     lr_mult: 2
182 |     decay_mult: 1
183 |   }
184 |   inner_product_param {
185 | 	#kernel_size: 3
186 | 	num_output: 256
187 |     weight_filler {
188 |       type: "xavier"
189 | 	}
190 |     bias_filler {
191 |       type: "constant"
192 |       value: 0
193 |     }
194 |   }
195 | }
196 | 
197 | layer {
198 |   name: "drop5"
199 |   type: "Dropout"
200 |   bottom: "conv5"
201 |   top: "conv5"
202 |   dropout_param {
203 |     dropout_ratio: 0.25
204 |   }
205 | }
206 | layer {
207 |   name: "prelu5"
208 |   type: "PReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | 
213 | 
214 | layer {
215 |   name: "conv6-1"
216 |   type: "InnerProduct"
217 |   bottom: "conv5"
218 |   top: "conv6-1"
219 |   param {
220 |     lr_mult: 1
221 |     decay_mult: 1
222 |   }
223 |   param {
224 |     lr_mult: 2
225 |     decay_mult: 1
226 |   }
227 |   inner_product_param {
228 |     #kernel_size: 1
229 | 	num_output: 2
230 |     weight_filler {
231 |       type: "xavier"
232 | 	}
233 |     bias_filler {
234 |       type: "constant"
235 |       value: 0
236 |     }
237 |   }
238 | }
239 | layer {
240 |   name: "conv6-2"
241 |   type: "InnerProduct"
242 |   bottom: "conv5"
243 |   top: "conv6-2"
244 |   param {
245 |     lr_mult: 1
246 |     decay_mult: 1
247 |   }
248 |   param {
249 |     lr_mult: 2
250 |     decay_mult: 1
251 |   }
252 |   inner_product_param {
253 |   	#kernel_size: 1
254 | 	num_output: 4
255 |     weight_filler {
256 |       type: "xavier"
257 | 	}
258 |     bias_filler {
259 |       type: "constant"
260 |       value: 0
261 |     }
262 |   }
263 | }
264 | layer {
265 |   name: "conv6-3"
266 |   type: "InnerProduct"
267 |   bottom: "conv5"
268 |   top: "conv6-3"
269 |   param {
270 |     lr_mult: 1
271 |     decay_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |     decay_mult: 1
276 |   }
277 |   inner_product_param {
278 |   	#kernel_size: 1
279 | 	num_output: 10
280 |     weight_filler {
281 |       type: "xavier"
282 | 	}
283 |     bias_filler {
284 |       type: "constant"
285 |       value: 0
286 |     }
287 |   }
288 | }
289 | layer {
290 |   name: "prob1"
291 |   type: "Softmax"
292 |   bottom: "conv6-1"
293 |   top: "prob1"
294 | }
295 | 


--------------------------------------------------------------------------------
/src/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | from collections import OrderedDict
  6 | from src.utils import try_gpu
  7 | 
  8 | 
  9 | class Flatten(nn.Module):
 10 |     def __init__(self):
 11 |         super(Flatten, self).__init__()
 12 | 
 13 |     def forward(self, x):
 14 |         """[summary]
 15 | 
 16 |         Arguments:
 17 |             x {[type]} -- a float tensor with shape [batch_size, c, h, w].
 18 | 
 19 |         Returns:
 20 |             [type] -- a float tensor with shape [batch_size, c*h*w].
 21 |         """
 22 | 
 23 |         # without this pretrained model isn't working
 24 |         x = x.transpose(3, 2).contiguous()
 25 | 
 26 |         # "flatten" the C * H * W values into a single vector per image
 27 |         return x.view(x.size(0), -1)
 28 | 
 29 | 
 30 | class _Net(nn.Module):
 31 |     def __init__(self, is_training=False, device=try_gpu()):
 32 |         super(_Net, self).__init__()
 33 | 
 34 |         self._init_net()
 35 | 
 36 |         # Move tensor to target device
 37 |         self.to(device)
 38 | 
 39 |         self.train(is_training)
 40 | 
 41 |     def _init_net(self):
 42 |         raise NotImplementedError
 43 | 
 44 |     def load(self, model_path):
 45 |         states_to_load = np.load(model_path, allow_pickle=True)[()]
 46 |         model_state = self.state_dict()
 47 |         model_state.update(states_to_load)
 48 |         self.load_state_dict(model_state)
 49 | 
 50 | 
 51 | class PNet(_Net):
 52 |     """
 53 |     Model's state_dict:
 54 |     backend.conv1.weight     torch.Size([10, 3, 3, 3])
 55 |     backend.conv1.bias       torch.Size([10])
 56 |     backend.prelu1.weight    torch.Size([10])
 57 |     backend.conv2.weight     torch.Size([16, 10, 3, 3])
 58 |     backend.conv2.bias       torch.Size([16])
 59 |     backend.prelu2.weight    torch.Size([16])
 60 |     backend.conv3.weight     torch.Size([32, 16, 3, 3])
 61 |     backend.conv3.bias       torch.Size([32])
 62 |     backend.prelu3.weight    torch.Size([32])
 63 |     cls_prob.conv4_1.weight          torch.Size([2, 32, 1, 1])
 64 |     cls_prob.conv4_1.bias    torch.Size([2])
 65 |     bbox_offset.conv4_2.weight       torch.Size([4, 32, 1, 1])
 66 |     bbox_offset.conv4_2.bias         torch.Size([4])
 67 |     """
 68 | 
 69 |     def __init__(self, **kwargs):
 70 |         super(PNet, self).__init__(**kwargs)
 71 | 
 72 |     def _init_net(self):
 73 |         self.backend = nn.Sequential(OrderedDict([
 74 |             ('conv1', nn.Conv2d(3, 10, kernel_size=3, stride=1)),
 75 |             ('prelu1', nn.PReLU(10)),
 76 |             ('pool1', nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)),
 77 | 
 78 |             ('conv2', nn.Conv2d(10, 16, kernel_size=3, stride=1)),
 79 |             ('prelu2', nn.PReLU(16)),
 80 | 
 81 |             ('conv3', nn.Conv2d(16, 32, kernel_size=3, stride=1)),
 82 |             ('prelu3', nn.PReLU(32))
 83 |         ]))
 84 | 
 85 |         self.cls_prob = nn.Sequential(OrderedDict([
 86 |             ('conv4_1', nn.Conv2d(32, 2, 1, 1)),
 87 |             ('softmax', nn.Softmax(dim=1))
 88 |         ]))
 89 | 
 90 |         self.bbox_offset = nn.Sequential(OrderedDict([
 91 |             ('conv4_2', nn.Conv2d(32, 4, 1, 1))
 92 |         ]))
 93 | 
 94 |     def forward(self, x):
 95 |         """[summary]
 96 | 
 97 |         Arguments:
 98 |             x {torch.float32} -- a float tensor with shape [batch_size, 3, h, w].
 99 | 
100 |         Returns:
101 |             cls_probs {torch.float32} -- a float tensor with shape [batch_size, 2, h, w].
102 |             offsets {torch.float32} -- a float tensor with shape [batch_size, 4, h, w].
103 |         """
104 | 
105 |         feature_map = self.backend(x)
106 | 
107 |         # face classification
108 |         cls_probs = self.cls_prob(feature_map)
109 | 
110 |         # bounding box regression
111 |         offsets = self.bbox_offset(feature_map)
112 | 
113 |         return cls_probs, offsets
114 | 
115 | 
116 | class RNet(_Net):
117 |     """
118 |     Model's state_dict:
119 |     backend.conv1.weight     torch.Size([28, 3, 3, 3])
120 |     backend.conv1.bias       torch.Size([28])
121 |     backend.prelu1.weight    torch.Size([28])
122 |     backend.conv2.weight     torch.Size([48, 28, 3, 3])
123 |     backend.conv2.bias       torch.Size([48])
124 |     backend.prelu2.weight    torch.Size([48])
125 |     backend.conv3.weight     torch.Size([64, 48, 2, 2])
126 |     backend.conv3.bias       torch.Size([64])
127 |     backend.prelu3.weight    torch.Size([64])
128 |     backend.conv4.weight     torch.Size([128, 576])
129 |     backend.conv4.bias       torch.Size([128])
130 |     backend.prelu4.weight    torch.Size([128])
131 |     cls_prob.conv5_1.weight          torch.Size([2, 128])
132 |     cls_prob.conv5_1.bias    torch.Size([2])
133 |     bbox_offset.conv5_2.weight       torch.Size([4, 128])
134 |     bbox_offset.conv5_2.bias         torch.Size([4])
135 |     """
136 | 
137 |     def __init__(self, **kwargs):
138 |         super(RNet, self).__init__(**kwargs)
139 | 
140 |     def _init_net(self):
141 |         self.backend = nn.Sequential(OrderedDict([
142 |             ('conv1', nn.Conv2d(3, 28, 3, 1)),
143 |             ('prelu1', nn.PReLU(28)),
144 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
145 | 
146 |             ('conv2', nn.Conv2d(28, 48, 3, 1)),
147 |             ('prelu2', nn.PReLU(48)),
148 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
149 | 
150 |             ('conv3', nn.Conv2d(48, 64, 2, 1)),
151 |             ('prelu3', nn.PReLU(64)),
152 | 
153 |             ('flatten', Flatten()),
154 |             # Linear(in_features, out_features, bias=True)
155 |             ('conv4', nn.Linear(576, 128)),
156 |             ('prelu4', nn.PReLU(128))
157 |         ]))
158 | 
159 |         self.cls_prob = nn.Sequential(OrderedDict([
160 |             ('conv5_1', nn.Linear(128, 2)),
161 |             ('softmax', nn.Softmax(dim=1))
162 |         ]))
163 | 
164 |         self.bbox_offset = nn.Sequential(OrderedDict([
165 |             ('conv5_2', nn.Linear(128, 4))
166 |         ]))
167 | 
168 |     def forward(self, x):
169 |         """[summary]
170 | 
171 |         Arguments:
172 |             x {torch.float32} -- a float tensor with shape [batch_size, 3, h, w].
173 | 
174 |         Returns:
175 |             cls_probs {torch.float32} -- a float tensor with shape [batch_size, 2].
176 |             offsets {torch.float32} -- a float tensor with shape [batch_size, 4].
177 |         """
178 | 
179 |         feature_map = self.backend(x)
180 | 
181 |         # face classification
182 |         cls_probs = self.cls_prob(feature_map)
183 | 
184 |         # bounding box regression
185 |         offsets = self.bbox_offset(feature_map)
186 | 
187 |         return cls_probs, offsets
188 | 
189 | 
190 | class ONet(_Net):
191 |     """
192 |     Model's state_dict:
193 |     backend.conv1.weight     torch.Size([32, 3, 3, 3])
194 |     backend.conv1.bias       torch.Size([32])
195 |     backend.prelu1.weight    torch.Size([32])
196 |     backend.conv2.weight     torch.Size([64, 32, 3, 3])
197 |     backend.conv2.bias       torch.Size([64])
198 |     backend.prelu2.weight    torch.Size([64])
199 |     backend.conv3.weight     torch.Size([64, 64, 3, 3])
200 |     backend.conv3.bias       torch.Size([64])
201 |     backend.prelu3.weight    torch.Size([64])
202 |     backend.conv4.weight     torch.Size([128, 64, 2, 2])
203 |     backend.conv4.bias       torch.Size([128])
204 |     backend.prelu4.weight    torch.Size([128])
205 |     backend.conv5.weight     torch.Size([256, 1152])
206 |     backend.conv5.bias       torch.Size([256])
207 |     backend.prelu5.weight    torch.Size([256])
208 |     cls_prob.conv6_1.weight          torch.Size([2, 256])
209 |     cls_prob.conv6_1.bias    torch.Size([2])
210 |     bbox_offset.conv6_2.weight       torch.Size([4, 256])
211 |     bbox_offset.conv6_2.bias         torch.Size([4])
212 |     landmarks.conv6_3.weight         torch.Size([10, 256])
213 |     landmarks.conv6_3.bias   torch.Size([10])
214 |     """
215 | 
216 |     def __init__(self, **kwargs):
217 |         super(ONet, self).__init__(**kwargs)
218 | 
219 |     def _init_net(self):
220 |         self.backend = nn.Sequential(OrderedDict([
221 |             ('conv1', nn.Conv2d(3, 32, 3, 1)),
222 |             ('prelu1', nn.PReLU(32)),
223 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
224 | 
225 |             ('conv2', nn.Conv2d(32, 64, 3, 1)),
226 |             ('prelu2', nn.PReLU(64)),
227 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
228 | 
229 |             ('conv3', nn.Conv2d(64, 64, 3, 1)),
230 |             ('prelu3', nn.PReLU(64)),
231 |             ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
232 | 
233 |             ('conv4', nn.Conv2d(64, 128, 2, 1)),
234 |             ('prelu4', nn.PReLU(128)),
235 | 
236 |             ('flatten', Flatten()),
237 |             ('conv5', nn.Linear(1152, 256)),
238 |             ('drop5', nn.Dropout(0.25)),
239 |             ('prelu5', nn.PReLU(256))
240 |         ]))
241 | 
242 |         self.cls_prob = nn.Sequential(OrderedDict([
243 |             ('conv6_1', nn.Linear(256, 2)),
244 |             ('softmax', nn.Softmax(dim=1))
245 |         ]))
246 | 
247 |         self.bbox_offset = nn.Sequential(OrderedDict([
248 |             ('conv6_2', nn.Linear(256, 4))
249 |         ]))
250 | 
251 |         self.landmarks = nn.Sequential(OrderedDict([
252 |             ('conv6_3', nn.Linear(256, 10))
253 |         ]))
254 | 
255 |     def forward(self, x):
256 |         """[summary]
257 | 
258 |         Arguments:
259 |             x {torch.float32} -- a float tensor with shape [batch_size, 3, h, w].
260 | 
261 |         Returns:
262 |             cls_probs {torch.float32} -- a float tensor with shape [batch_size, 2].
263 |             offsets {torch.float32} -- a float tensor with shape [batch_size, 4].
264 |             landmarks {torch.float32} -- a float tensor with shape [batch_size, 10].
265 |         """
266 | 
267 |         feature_map = self.backend(x)
268 | 
269 |         # face classification
270 |         cls_probs = self.cls_prob(feature_map)
271 | 
272 |         # bounding box regression
273 |         offsets = self.bbox_offset(feature_map)
274 | 
275 |         # Ficial landmark localization
276 |         landmarks = self.landmarks(feature_map)
277 | 
278 |         return cls_probs, offsets, landmarks
279 | 


--------------------------------------------------------------------------------
/src/detect.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | sys.path.append(os.pardir)
  4 | 
  5 | from src.utils import try_gpu, set_figsize, show_bboxes
  6 | from src.models import PNet, RNet, ONet
  7 | import math
  8 | from PIL import Image
  9 | import torchvision.transforms as transforms
 10 | import torchvision
 11 | import torch.nn.functional as F
 12 | import torch
 13 | 
 14 | 
 15 | def _no_grad(func):
 16 |     def wrapper(*args, **kwargs):
 17 |         with torch.no_grad():
 18 |             return func(*args, **kwargs)
 19 |         
 20 |     return wrapper
 21 | 
 22 | class FaceDetector():
 23 | 
 24 |     def __init__(self):
 25 |         self.device = try_gpu()
 26 | 
 27 |         # LOAD MODELS
 28 |         self.pnet = PNet()
 29 |         self.rnet = RNet()
 30 |         self.onet = ONet()
 31 | 
 32 |         self.pnet.load('../weights/pnet.npy')
 33 |         self.rnet.load('../weights/rnet.npy')
 34 |         # TBD need to check if weight is on GPU
 35 |         self.onet.load('../weights/onet.npy')
 36 | 
 37 |     def _preprocess(self, img):
 38 |         """Preprocessing step before feeding the network.
 39 | 
 40 |         Arguments:
 41 |             img {PIL.Image} -- an instance of PIL.Image. 
 42 |                             or an image path
 43 | 
 44 |         Returns:
 45 |             {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
 46 |         """
 47 | 
 48 |         if isinstance(img, str):
 49 |             img = Image.open(img)
 50 | 
 51 |         # The output of torchvision datasets are PILImage images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1].
 52 |         transform = transforms.Compose([
 53 |             # Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
 54 |             transforms.ToTensor(),
 55 |             # Normalize a tensor image with mean and standard deviation
 56 |             # input[channel] = (input[channel] - mean[channel]) / std[channel]
 57 |             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 58 |         ])
 59 | 
 60 |         img = transform(img).to(self.device)
 61 |         img = img.unsqueeze(0)
 62 | 
 63 |         return img
 64 | 
 65 |     def detect(self, img, min_face_size=20.0, threshold=[0.6, 0.7, 0.8], factor=0.707, nms_threshold=[0.7, 0.7, 0.7]):
 66 |         """[summary]
 67 | 
 68 |         Arguments:
 69 |             img {[type]} -- an instance of PIL.Image.
 70 | 
 71 |         Keyword Arguments:
 72 |             min_face_size {float} -- a float number. (default: {20.0})
 73 |             threshold {list} -- a list of length 3 (default: {[0.6, 0.7, 0.8]})
 74 |             factor {float} -- [description] (default: {0.707})
 75 |             nms_threshold {list} -- a list of length 3. (default: {[0.7, 0.7, 0.7]})
 76 | 
 77 |         Returns:
 78 |             [type] -- [description]
 79 |         """
 80 | 
 81 |         img = self._preprocess(img)
 82 | 
 83 |         scales = self.create_image_pyramid(img, min_face_size, factor)
 84 | 
 85 |         bounding_boxes = self.stage_one(
 86 |             img, scales, threshold[0], nms_threshold[0])
 87 |         bounding_boxes = self.stage_two(
 88 |             img, bounding_boxes, threshold[1], nms_threshold[1])
 89 |         bounding_boxes, _ = self.stage_three(
 90 |             img, bounding_boxes, threshold[2], nms_threshold[2])
 91 | 
 92 |         return bounding_boxes
 93 | 
 94 |     def create_image_pyramid(self, img, min_face_size, factor):
 95 |         """BUILD AN IMAGE PYRAMID
 96 | 
 97 |         Arguments:
 98 |             img {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
 99 |             min_face_size {float} -- [description]
100 |             factor {float} -- [description]
101 | 
102 |         Returns:
103 |             {list} -- [description]
104 |         """
105 |         _, _, height, width = img.shape
106 |         min_length = min(height, width)
107 | 
108 |         min_detection_size = 12
109 | 
110 |         # scales for scaling the image
111 |         scales = []
112 | 
113 |         # scales the image so that
114 |         # minimum size that we can detect equals to
115 |         # minimum face size that we want to detect
116 |         m = min_detection_size/min_face_size
117 |         min_length *= m
118 | 
119 |         factor_count = 0
120 |         while min_length > min_detection_size:
121 |             scales.append(m*factor**factor_count)   # TBD need to optimize here
122 |             min_length *= factor
123 |             factor_count += 1
124 | 
125 |         return scales
126 | 
127 |     def _generate_bboxes(self, cls_probs, offsets, scale, threshold):
128 |         """Generate bounding boxes at places
129 | 
130 |         Arguments:
131 |             cls_probs {[type]} -- a float tensor of shape [1, 2, n, m].
132 |             offsets {[type]} -- a float tensor of shape [1, 4, n, m].
133 |             scale {[type]} -- a float number, 
134 |                 width and height of the image were scaled by this number.
135 |             threshold {[type]} -- a float number.
136 | 
137 |         Returns:
138 |             bounding_boxes {} -- a float tensor of shape [n_boxes, 4]
139 |             scores {} -- a float tensor of shape [n_boxes]
140 |             offsets {} -- a float tensor of shape [n_boxes, 4]
141 |         """
142 | 
143 |         # applying P-Net is equivalent, in some sense, to
144 |         # moving 12x12 window with stride 2
145 |         stride = 2
146 |         cell_size = 12
147 | 
148 |         # extract positive probability and resize it as [n, m] dim tensor.
149 |         cls_probs = cls_probs[0, 1, :, :]
150 | 
151 |         # indices of boxes where there is probably a face
152 |         inds = (cls_probs > threshold).nonzero()
153 | 
154 |         if inds.shape[0] == 0:
155 |             return torch.empty((0, 4), device=self.device), torch.empty((0), device=self.device), torch.empty((0, 4), device=self.device)
156 | 
157 |         # transformations of bounding boxes
158 |         tx1, ty1, tx2, ty2 = [
159 |             offsets[0, i, inds[:, 0], inds[:, 1]] for i in range(4)]
160 |         # they are defined as:
161 |         # x1 = x * stride / scale
162 |         # y1 = y * stride / scale
163 |         # x2 = (x * stride + 12) / scale
164 |         # y2 = (y * stride + 12) / scale
165 |         # w = x2 - x1 + 1
166 |         # h = y2 - y1 + 1
167 |         # x1_true = x1 + tx1 * w
168 |         # x2_true = x2 + tx2 * w
169 |         # y1_true = y1 + ty1 * h
170 |         # y2_true = y2 + ty2 * h
171 | 
172 |         offsets = torch.stack([tx1, ty1, tx2, ty2], dim=1)
173 |         scores = cls_probs[inds[:, 0], inds[:, 1]]
174 | 
175 |         # P-Net is applied to scaled images
176 |         # so we need to rescale bounding boxes back
177 |         bounding_boxes = torch.stack([
178 |             (stride * inds[:, 1] + 1.0),
179 |             (stride * inds[:, 0] + 1.0),
180 |             (stride * inds[:, 1] + 1.0 + cell_size),
181 |             (stride * inds[:, 0] + 1.0 + cell_size),
182 |         ]).transpose(0, 1).float()
183 |         # why one is added?
184 |         bounding_boxes = bounding_boxes / scale
185 | 
186 |         return bounding_boxes, scores, offsets
187 | 
188 |     def _refine_boxes(self, bboxes, height, width):
189 |         bboxes = torch.max(torch.zeros_like(
190 |             bboxes, device=self.device), bboxes)
191 |         sizes = torch.tensor([[width, height, width, height]] *
192 |                              bboxes.shape[0], dtype=torch.float32, device=self.device)
193 |         bboxes = torch.min(bboxes, sizes)
194 | 
195 |         return bboxes
196 | 
197 |     def _get_image_boxes(self, bboxes, img, size=24):
198 |         """[summary]
199 | 
200 |         Arguments:
201 |             bboxes {torch.float32} -- a float tensor of shape [n, 4].
202 |             img {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
203 | 
204 |         Keyword Arguments:
205 |             size {int} -- an integer, size of cutouts. (default: {24})
206 | 
207 |         Returns:
208 |             {torch.float32} -- a float tensor of shape [n, 3, size, size].
209 |         """
210 | 
211 |         _, _, height, width = img.shape
212 |         bboxes = self._refine_boxes(bboxes, height, width)
213 | 
214 |         img_boxes = []
215 | 
216 |         for box in bboxes:
217 |             im = img[:, :, box[1].int(): box[3].int(),
218 |                      box[0].int(): box[2].int()]
219 |             im = F.interpolate(im, size=(size, size),
220 |                                mode='bilinear', align_corners=False)
221 |             img_boxes.append(im)
222 | 
223 |         img_boxes = torch.cat(img_boxes, 0)
224 | 
225 |         return img_boxes
226 | 
227 |     def _convert_to_square(self, bboxes):
228 |         """Convert bounding boxes to a square form.
229 | 
230 |         Arguments:
231 |             bboxes {torch.float32} -- a float tensor of shape [n, 4]
232 | 
233 |         Returns:
234 |             square_bboxes {torch.float32} -- a float tensor of shape [n, 4], 
235 |                 squared bounding boxes.  
236 |         """
237 |         square_bboxes = torch.zeros_like(bboxes, device=self.device)
238 |         x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
239 |         h = y2 - y1 + 1.0
240 |         w = x2 - x1 + 1.0
241 |         max_side = torch.max(h, w)
242 |         square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
243 |         square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
244 |         square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
245 |         square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
246 |         square_bboxes = torch.round(square_bboxes)
247 | 
248 |         return square_bboxes
249 | 
250 |     def _calibrate_box(self, bboxes, offsets):
251 |         """Transform bounding boxes to be more like true bounding boxes.
252 |         'offsets' is one of the outputs of the nets.
253 | 
254 |         Arguments:
255 |             bboxes {torch.float32} -- a float tensor of shape [n, 4].
256 |             offsets {torch.float32} -- a float tensor of shape [n, 4].
257 | 
258 |         Returns:
259 |             {torch.float32} -- a float tensor of shape [n, 4].
260 |         """
261 |         x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
262 |         w = x2 - x1 + 1.0
263 |         h = y2 - y1 + 1.0
264 |         w = torch.unsqueeze(w, 1)
265 |         h = torch.unsqueeze(h, 1)
266 | 
267 |         # this is what happening here:
268 |         # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
269 |         # x1_true = x1 + tx1 * w
270 |         # y1_true = y1 + ty1 * h
271 |         # x2_true = x2 + tx2 * w
272 |         # y2_true = y2 + ty2 * h
273 |         # below is just more compact form of this
274 | 
275 |         # are offsets always such that
276 |         # x1 < x2 and y1 < y2 ?
277 |         translation = torch.cat([w, h, w, h], dim=1) * offsets
278 |         bboxes = bboxes + translation
279 | 
280 |         return bboxes
281 | 
282 |     @_no_grad
283 |     def stage_one(self, img, scales, threshold, nms_threshold):
284 |         """Run P-Net, generate bounding boxes, and do NMS.
285 | 
286 |         Arguments:
287 |             img {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
288 |             scales {list} -- a float list,
289 |                 scale width and height of the image by this number.
290 |             threshold {float} -- a float number,
291 |                 threshold on the probability of a face when generating
292 |                 bounding boxes from predictions of the net.
293 |             nms_threshold {float} -- [description]
294 | 
295 |         Returns:
296 |             candidate_boxes {torch.float32} -- a float tensor of shape [n_boxes, 4]
297 |         """
298 | 
299 |         candidate_boxes = torch.empty((0, 4), device=self.device)
300 |         candidate_scores = torch.empty((0), device=self.device)
301 |         candidate_offsets = torch.empty((0, 4), device=self.device)
302 | 
303 |         # scale the image
304 |         for scale in scales:
305 |             _, _, height, width = img.shape
306 |             sh, sw = math.ceil(height * scale), math.ceil(width * scale)
307 |             resize_img = F.interpolate(img, size=(
308 |                 sh, sw), mode='bilinear', align_corners=False)
309 | 
310 |             # cls_probs: probability of a face at each sliding window
311 |             # offsets: transformations to true bounding boxes
312 |             cls_probs, offsets = self.pnet(resize_img)
313 | 
314 |             bboxes, scores, offsets = self._generate_bboxes(
315 |                 cls_probs, offsets, scale, threshold)
316 | 
317 |             candidate_boxes = torch.cat((candidate_boxes, bboxes))
318 |             candidate_scores = torch.cat((candidate_scores, scores))
319 |             candidate_offsets = torch.cat((candidate_offsets, offsets))
320 | 
321 |         keep = torchvision.ops.nms(
322 |             candidate_boxes, candidate_scores, iou_threshold=nms_threshold)
323 |         candidate_boxes = candidate_boxes[keep]
324 |         candidate_scores = candidate_scores[keep]
325 |         candidate_offsets = candidate_offsets[keep]
326 | 
327 |         # use offsets predicted by pnet to transform bounding boxes
328 |         candidate_boxes = self._calibrate_box(
329 |             candidate_boxes, candidate_offsets)
330 | 
331 |         candidate_boxes = self._convert_to_square(candidate_boxes)
332 | 
333 |         return candidate_boxes
334 | 
335 |     @_no_grad
336 |     def stage_two(self, img, bboxes, threshold, nms_threshold):
337 |         """Run R-Net, generate bounding boxes, and do NMS.
338 | 
339 |         Arguments:
340 |             img {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
341 |             bboxes {torch.float32} -- [description]
342 |             threshold {float} -- [description]
343 |             nms_threshold {float} -- [description]
344 | 
345 |         Returns:
346 |             {torch.float32} -- [description]
347 |         """
348 | 
349 |         # no candidate face found.
350 |         if bboxes.shape[0] == 0:
351 |             return bboxes
352 | 
353 |         img_boxes = self._get_image_boxes(bboxes, img, size=24)
354 | 
355 |         cls_probs, offsets = self.rnet(img_boxes)
356 | 
357 |         scores = cls_probs[:, 1]
358 |         keep = (scores > threshold)
359 |         bboxes = bboxes[keep]
360 |         offsets = offsets[keep]
361 |         scores = scores[keep]
362 | 
363 |         if bboxes.shape[0] == 0:   # TBD return value need to be check
364 |             return bboxes
365 | 
366 |         keep = torchvision.ops.nms(bboxes, scores, iou_threshold=nms_threshold)
367 |         bboxes = bboxes[keep]
368 |         offsets = offsets[keep]
369 | 
370 |         bboxes = self._calibrate_box(bboxes, offsets)
371 |         bboxes = self._convert_to_square(bboxes)
372 | 
373 |         return bboxes
374 | 
375 |     @_no_grad
376 |     def stage_three(self, img, bboxes, threshold, nms_threshold):
377 |         """Run O-Net, generate bounding boxes, and do NMS.
378 | 
379 |         Arguments:
380 |             img {torch.float32} -- a float tensor of shape [1, C, H, W] in the range [-1.0, 1.0]
381 |             bboxes {torch.float32} -- [description]
382 |             threshold {float} -- [description]
383 |             nms_threshold {float} -- [description]
384 | 
385 |         Returns:
386 |             {torch.float32} -- [description]
387 |         """
388 |         if bboxes.shape[0] == 0:
389 |             return bboxes, torch.empty(0, device=self.device)
390 | 
391 |         img_boxes = self._get_image_boxes(bboxes, img, size=48)
392 |         cls_probs, offsets, landmarks = self.onet(img_boxes)
393 | 
394 |         scores = cls_probs[:, 1]
395 |         keep = (scores > threshold)
396 |         bboxes = bboxes[keep]
397 |         offsets = offsets[keep]
398 |         scores = scores[keep]
399 |         landmarks = landmarks[keep]
400 | 
401 |         if bboxes.shape[0] == 0:
402 |             return bboxes, torch.empty(0, device=self.device)   # TBD
403 | 
404 |         # compute landmark points
405 |         # TBD
406 | 
407 |         bboxes = self._calibrate_box(bboxes, offsets)
408 |         keep = torchvision.ops.nms(bboxes, scores, iou_threshold=nms_threshold)
409 |         bboxes = bboxes[keep]
410 |         offsets = offsets[keep]
411 | 
412 |         return bboxes, torch.empty(0, device=self.device)
413 | 
414 | if __name__ == '__main__':
415 |     img = Image.open('../assets/office1.jpg')
416 |     detector = FaceDetector()
417 |     bounding_boxes = detector.detect(img)


--------------------------------------------------------------------------------
/caffe_models/det4.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LNet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 15
  5 | input_dim: 24
  6 | input_dim: 24
  7 | 
  8 | layer {
  9 |   name: "slicer_data"
 10 |   type: "Slice"
 11 |   bottom: "data"
 12 |   top: "data241"
 13 |   top: "data242"
 14 |   top: "data243"
 15 |   top: "data244"
 16 |   top: "data245"
 17 |   slice_param {
 18 |     axis: 1
 19 |     slice_point: 3
 20 |     slice_point: 6
 21 |     slice_point: 9
 22 |     slice_point: 12
 23 |   }
 24 | }
 25 | layer {
 26 |   name: "conv1_1"
 27 |   type: "Convolution"
 28 |   bottom: "data241"
 29 |   top: "conv1_1"
 30 |   param {
 31 |     lr_mult: 1
 32 |     decay_mult: 1
 33 |   }
 34 |   param {
 35 |     lr_mult: 2
 36 |     decay_mult: 1
 37 |   }
 38 |   convolution_param {
 39 |     num_output: 28
 40 |     kernel_size: 3
 41 |     stride: 1
 42 |      weight_filler {
 43 |       type: "xavier"
 44 | 	}
 45 |     bias_filler {
 46 |       type: "constant"
 47 |       value: 0
 48 |     }
 49 |   }
 50 |   
 51 | }
 52 | layer {
 53 |   name: "prelu1_1"
 54 |   type: "PReLU"
 55 |   bottom: "conv1_1"
 56 |   top: "conv1_1"
 57 | 
 58 | }
 59 | layer {
 60 |   name: "pool1_1"
 61 |   type: "Pooling"
 62 |   bottom: "conv1_1"
 63 |   top: "pool1_1"
 64 |   pooling_param {
 65 |     pool: MAX
 66 |     kernel_size: 3
 67 |     stride: 2
 68 |   }
 69 | }
 70 | 
 71 | layer {
 72 |   name: "conv2_1"
 73 |   type: "Convolution"
 74 |   bottom: "pool1_1"
 75 |   top: "conv2_1"
 76 |   param {
 77 |     lr_mult: 1
 78 |     decay_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |     decay_mult: 1
 83 |   }
 84 |   convolution_param {
 85 |     num_output: 48
 86 |     kernel_size: 3
 87 |     stride: 1
 88 |     weight_filler {
 89 |       type: "xavier"
 90 | 	}
 91 |     bias_filler {
 92 |       type: "constant"
 93 |       value: 0
 94 |     }
 95 |   }
 96 |   
 97 | }
 98 | layer {
 99 |   name: "prelu2_1"
100 |   type: "PReLU"
101 |   bottom: "conv2_1"
102 |   top: "conv2_1"
103 | }
104 | layer {
105 |   name: "pool2_1"
106 |   type: "Pooling"
107 |   bottom: "conv2_1"
108 |   top: "pool2_1"
109 |   pooling_param {
110 |     pool: MAX
111 |     kernel_size: 3
112 |     stride: 2
113 |   }
114 |   
115 | }
116 | layer {
117 |   name: "conv3_1"
118 |   type: "Convolution"
119 |   bottom: "pool2_1"
120 |   top: "conv3_1"
121 |   param {
122 |     lr_mult: 1
123 |     decay_mult: 1
124 |   }
125 |   param {
126 |     lr_mult: 2
127 |     decay_mult: 1
128 |   }
129 |   convolution_param {
130 |     num_output: 64
131 |     kernel_size: 2
132 |     stride: 1
133 |     weight_filler {
134 |       type: "xavier"
135 | 	}
136 |     bias_filler {
137 |       type: "constant"
138 |       value: 0
139 |     }
140 |   }
141 |   
142 | }
143 | layer {
144 |   name: "prelu3_1"
145 |   type: "PReLU"
146 |   bottom: "conv3_1"
147 |   top: "conv3_1"
148 | }
149 | ##########################
150 | layer {
151 |   name: "conv1_2"
152 |   type: "Convolution"
153 |   bottom: "data242"
154 |   top: "conv1_2"
155 |   param {
156 |     lr_mult: 1
157 |     decay_mult: 1
158 |   }
159 |   param {
160 |     lr_mult: 2
161 |     decay_mult: 1
162 |   }
163 |   convolution_param {
164 |     num_output: 28
165 |     kernel_size: 3
166 |     stride: 1
167 |      weight_filler {
168 |       type: "xavier"
169 | 	}
170 |     bias_filler {
171 |       type: "constant"
172 |       value: 0
173 |     }
174 |   }
175 |   
176 | }
177 | layer {
178 |   name: "prelu1_2"
179 |   type: "PReLU"
180 |   bottom: "conv1_2"
181 |   top: "conv1_2"
182 | 
183 | }
184 | layer {
185 |   name: "pool1_2"
186 |   type: "Pooling"
187 |   bottom: "conv1_2"
188 |   top: "pool1_2"
189 |   pooling_param {
190 |     pool: MAX
191 |     kernel_size: 3
192 |     stride: 2
193 |   }
194 | }
195 | 
196 | layer {
197 |   name: "conv2_2"
198 |   type: "Convolution"
199 |   bottom: "pool1_2"
200 |   top: "conv2_2"
201 |   param {
202 |     lr_mult: 1
203 |     decay_mult: 1
204 |   }
205 |   param {
206 |     lr_mult: 2
207 |     decay_mult: 1
208 |   }
209 |   convolution_param {
210 |     num_output: 48
211 |     kernel_size: 3
212 |     stride: 1
213 |     weight_filler {
214 |       type: "xavier"
215 | 	}
216 |     bias_filler {
217 |       type: "constant"
218 |       value: 0
219 |     }
220 |   }
221 |   
222 | }
223 | layer {
224 |   name: "prelu2_2"
225 |   type: "PReLU"
226 |   bottom: "conv2_2"
227 |   top: "conv2_2"
228 | }
229 | layer {
230 |   name: "pool2_2"
231 |   type: "Pooling"
232 |   bottom: "conv2_2"
233 |   top: "pool2_2"
234 |   pooling_param {
235 |     pool: MAX
236 |     kernel_size: 3
237 |     stride: 2
238 |   }
239 |   
240 | }
241 | layer {
242 |   name: "conv3_2"
243 |   type: "Convolution"
244 |   bottom: "pool2_2"
245 |   top: "conv3_2"
246 |   param {
247 |     lr_mult: 1
248 |     decay_mult: 1
249 |   }
250 |   param {
251 |     lr_mult: 2
252 |     decay_mult: 1
253 |   }
254 |   convolution_param {
255 |     num_output: 64
256 |     kernel_size: 2
257 |     stride: 1
258 |     weight_filler {
259 |       type: "xavier"
260 | 	}
261 |     bias_filler {
262 |       type: "constant"
263 |       value: 0
264 |     }
265 |   }
266 |   
267 | }
268 | layer {
269 |   name: "prelu3_2"
270 |   type: "PReLU"
271 |   bottom: "conv3_2"
272 |   top: "conv3_2"
273 | }
274 | ##########################
275 | ##########################
276 | layer {
277 |   name: "conv1_3"
278 |   type: "Convolution"
279 |   bottom: "data243"
280 |   top: "conv1_3"
281 |   param {
282 |     lr_mult: 1
283 |     decay_mult: 1
284 |   }
285 |   param {
286 |     lr_mult: 2
287 |     decay_mult: 1
288 |   }
289 |   convolution_param {
290 |     num_output: 28
291 |     kernel_size: 3
292 |     stride: 1
293 |      weight_filler {
294 |       type: "xavier"
295 | 	}
296 |     bias_filler {
297 |       type: "constant"
298 |       value: 0
299 |     }
300 |   }
301 |   
302 | }
303 | layer {
304 |   name: "prelu1_3"
305 |   type: "PReLU"
306 |   bottom: "conv1_3"
307 |   top: "conv1_3"
308 | 
309 | }
310 | layer {
311 |   name: "pool1_3"
312 |   type: "Pooling"
313 |   bottom: "conv1_3"
314 |   top: "pool1_3"
315 |   pooling_param {
316 |     pool: MAX
317 |     kernel_size: 3
318 |     stride: 2
319 |   }
320 | }
321 | 
322 | layer {
323 |   name: "conv2_3"
324 |   type: "Convolution"
325 |   bottom: "pool1_3"
326 |   top: "conv2_3"
327 |   param {
328 |     lr_mult: 1
329 |     decay_mult: 1
330 |   }
331 |   param {
332 |     lr_mult: 2
333 |     decay_mult: 1
334 |   }
335 |   convolution_param {
336 |     num_output: 48
337 |     kernel_size: 3
338 |     stride: 1
339 |     weight_filler {
340 |       type: "xavier"
341 | 	}
342 |     bias_filler {
343 |       type: "constant"
344 |       value: 0
345 |     }
346 |   }
347 |   
348 | }
349 | layer {
350 |   name: "prelu2_3"
351 |   type: "PReLU"
352 |   bottom: "conv2_3"
353 |   top: "conv2_3"
354 | }
355 | layer {
356 |   name: "pool2_3"
357 |   type: "Pooling"
358 |   bottom: "conv2_3"
359 |   top: "pool2_3"
360 |   pooling_param {
361 |     pool: MAX
362 |     kernel_size: 3
363 |     stride: 2
364 |   }
365 |   
366 | }
367 | layer {
368 |   name: "conv3_3"
369 |   type: "Convolution"
370 |   bottom: "pool2_3"
371 |   top: "conv3_3"
372 |   param {
373 |     lr_mult: 1
374 |     decay_mult: 1
375 |   }
376 |   param {
377 |     lr_mult: 2
378 |     decay_mult: 1
379 |   }
380 |   convolution_param {
381 |     num_output: 64
382 |     kernel_size: 2
383 |     stride: 1
384 |     weight_filler {
385 |       type: "xavier"
386 | 	}
387 |     bias_filler {
388 |       type: "constant"
389 |       value: 0
390 |     }
391 |   }
392 |   
393 | }
394 | layer {
395 |   name: "prelu3_3"
396 |   type: "PReLU"
397 |   bottom: "conv3_3"
398 |   top: "conv3_3"
399 | }
400 | ##########################
401 | ##########################
402 | layer {
403 |   name: "conv1_4"
404 |   type: "Convolution"
405 |   bottom: "data244"
406 |   top: "conv1_4"
407 |   param {
408 |     lr_mult: 1
409 |     decay_mult: 1
410 |   }
411 |   param {
412 |     lr_mult: 2
413 |     decay_mult: 1
414 |   }
415 |   convolution_param {
416 |     num_output: 28
417 |     kernel_size: 3
418 |     stride: 1
419 |      weight_filler {
420 |       type: "xavier"
421 | 	}
422 |     bias_filler {
423 |       type: "constant"
424 |       value: 0
425 |     }
426 |   }
427 |   
428 | }
429 | layer {
430 |   name: "prelu1_4"
431 |   type: "PReLU"
432 |   bottom: "conv1_4"
433 |   top: "conv1_4"
434 | 
435 | }
436 | layer {
437 |   name: "pool1_4"
438 |   type: "Pooling"
439 |   bottom: "conv1_4"
440 |   top: "pool1_4"
441 |   pooling_param {
442 |     pool: MAX
443 |     kernel_size: 3
444 |     stride: 2
445 |   }
446 | }
447 | 
448 | layer {
449 |   name: "conv2_4"
450 |   type: "Convolution"
451 |   bottom: "pool1_4"
452 |   top: "conv2_4"
453 |   param {
454 |     lr_mult: 1
455 |     decay_mult: 1
456 |   }
457 |   param {
458 |     lr_mult: 2
459 |     decay_mult: 1
460 |   }
461 |   convolution_param {
462 |     num_output: 48
463 |     kernel_size: 3
464 |     stride: 1
465 |     weight_filler {
466 |       type: "xavier"
467 | 	}
468 |     bias_filler {
469 |       type: "constant"
470 |       value: 0
471 |     }
472 |   }
473 |   
474 | }
475 | layer {
476 |   name: "prelu2_4"
477 |   type: "PReLU"
478 |   bottom: "conv2_4"
479 |   top: "conv2_4"
480 | }
481 | layer {
482 |   name: "pool2_4"
483 |   type: "Pooling"
484 |   bottom: "conv2_4"
485 |   top: "pool2_4"
486 |   pooling_param {
487 |     pool: MAX
488 |     kernel_size: 3
489 |     stride: 2
490 |   }
491 |   
492 | }
493 | layer {
494 |   name: "conv3_4"
495 |   type: "Convolution"
496 |   bottom: "pool2_4"
497 |   top: "conv3_4"
498 |   param {
499 |     lr_mult: 1
500 |     decay_mult: 1
501 |   }
502 |   param {
503 |     lr_mult: 2
504 |     decay_mult: 1
505 |   }
506 |   convolution_param {
507 |     num_output: 64
508 |     kernel_size: 2
509 |     stride: 1
510 |     weight_filler {
511 |       type: "xavier"
512 | 	}
513 |     bias_filler {
514 |       type: "constant"
515 |       value: 0
516 |     }
517 |   }
518 |   
519 | }
520 | layer {
521 |   name: "prelu3_4"
522 |   type: "PReLU"
523 |   bottom: "conv3_4"
524 |   top: "conv3_4"
525 | }
526 | ##########################
527 | ##########################
528 | layer {
529 |   name: "conv1_5"
530 |   type: "Convolution"
531 |   bottom: "data245"
532 |   top: "conv1_5"
533 |   param {
534 |     lr_mult: 1
535 |     decay_mult: 1
536 |   }
537 |   param {
538 |     lr_mult: 2
539 |     decay_mult: 1
540 |   }
541 |   convolution_param {
542 |     num_output: 28
543 |     kernel_size: 3
544 |     stride: 1
545 |      weight_filler {
546 |       type: "xavier"
547 | 	}
548 |     bias_filler {
549 |       type: "constant"
550 |       value: 0
551 |     }
552 |   }
553 |   
554 | }
555 | layer {
556 |   name: "prelu1_5"
557 |   type: "PReLU"
558 |   bottom: "conv1_5"
559 |   top: "conv1_5"
560 | 
561 | }
562 | layer {
563 |   name: "pool1_5"
564 |   type: "Pooling"
565 |   bottom: "conv1_5"
566 |   top: "pool1_5"
567 |   pooling_param {
568 |     pool: MAX
569 |     kernel_size: 3
570 |     stride: 2
571 |   }
572 | }
573 | 
574 | layer {
575 |   name: "conv2_5"
576 |   type: "Convolution"
577 |   bottom: "pool1_5"
578 |   top: "conv2_5"
579 |   param {
580 |     lr_mult: 1
581 |     decay_mult: 1
582 |   }
583 |   param {
584 |     lr_mult: 2
585 |     decay_mult: 1
586 |   }
587 |   convolution_param {
588 |     num_output: 48
589 |     kernel_size: 3
590 |     stride: 1
591 |     weight_filler {
592 |       type: "xavier"
593 | 	}
594 |     bias_filler {
595 |       type: "constant"
596 |       value: 0
597 |     }
598 |   }
599 |   
600 | }
601 | layer {
602 |   name: "prelu2_5"
603 |   type: "PReLU"
604 |   bottom: "conv2_5"
605 |   top: "conv2_5"
606 | }
607 | layer {
608 |   name: "pool2_5"
609 |   type: "Pooling"
610 |   bottom: "conv2_5"
611 |   top: "pool2_5"
612 |   pooling_param {
613 |     pool: MAX
614 |     kernel_size: 3
615 |     stride: 2
616 |   }
617 |   
618 | }
619 | layer {
620 |   name: "conv3_5"
621 |   type: "Convolution"
622 |   bottom: "pool2_5"
623 |   top: "conv3_5"
624 |   param {
625 |     lr_mult: 1
626 |     decay_mult: 1
627 |   }
628 |   param {
629 |     lr_mult: 2
630 |     decay_mult: 1
631 |   }
632 |   convolution_param {
633 |     num_output: 64
634 |     kernel_size: 2
635 |     stride: 1
636 |     weight_filler {
637 |       type: "xavier"
638 | 	}
639 |     bias_filler {
640 |       type: "constant"
641 |       value: 0
642 |     }
643 |   }
644 |   
645 | }
646 | layer {
647 |   name: "prelu3_5"
648 |   type: "PReLU"
649 |   bottom: "conv3_5"
650 |   top: "conv3_5"
651 | }
652 | ##########################
653 | layer { 
654 | 	name: "concat" 
655 | 	bottom: "conv3_1" 
656 | 	bottom: "conv3_2" 
657 | 	bottom: "conv3_3" 
658 | 	bottom: "conv3_4" 
659 | 	bottom: "conv3_5" 
660 | 	top: "conv3" 
661 | 	type: "Concat" 
662 | 	concat_param { 
663 | 		axis: 1 
664 | 	} 
665 | }
666 | ##########################
667 | layer {
668 |   name: "fc4"
669 |   type: "InnerProduct"
670 |   bottom: "conv3"
671 |   top: "fc4"
672 |   param {
673 |     lr_mult: 1
674 |     decay_mult: 1
675 |   }
676 |   param {
677 |     lr_mult: 2
678 |     decay_mult: 1
679 |   }
680 |   inner_product_param {
681 |     num_output: 256
682 |     weight_filler {
683 |       type: "xavier"
684 | 	}
685 |     bias_filler {
686 |       type: "constant"
687 |       value: 0
688 |     }
689 |   }
690 |   
691 | }
692 | layer {
693 |   name: "prelu4"
694 |   type: "PReLU"
695 |   bottom: "fc4"
696 |   top: "fc4"
697 | }
698 | ############################
699 | layer {
700 |   name: "fc4_1"
701 |   type: "InnerProduct"
702 |   bottom: "fc4"
703 |   top: "fc4_1"
704 |   param {
705 |     lr_mult: 1
706 |     decay_mult: 1
707 |   }
708 |   param {
709 |     lr_mult: 2
710 |     decay_mult: 1
711 |   }
712 |   inner_product_param {
713 |     num_output: 64
714 |     weight_filler {
715 |       type: "xavier"
716 | 	}
717 |     bias_filler {
718 |       type: "constant"
719 |       value: 0
720 |     }
721 |   }
722 |   
723 | }
724 | layer {
725 |   name: "prelu4_1"
726 |   type: "PReLU"
727 |   bottom: "fc4_1"
728 |   top: "fc4_1"
729 | }
730 | layer {
731 |   name: "fc5_1"
732 |   type: "InnerProduct"
733 |   bottom: "fc4_1"
734 |   top: "fc5_1"
735 |   param {
736 |     lr_mult: 1
737 |     decay_mult: 1
738 |   }
739 |   param {
740 |     lr_mult: 2
741 |     decay_mult: 1
742 |   }
743 |   inner_product_param {
744 |     num_output: 2
745 |     weight_filler {
746 |       type: "xavier"
747 | 	  #type: "constant"
748 |       #value: 0
749 | 	}
750 |     bias_filler {
751 |       type: "constant"
752 |       value: 0
753 |     }
754 |   }
755 | }
756 | 
757 | 
758 | #########################
759 | layer {
760 |   name: "fc4_2"
761 |   type: "InnerProduct"
762 |   bottom: "fc4"
763 |   top: "fc4_2"
764 |   param {
765 |     lr_mult: 1
766 |     decay_mult: 1
767 |   }
768 |   param {
769 |     lr_mult: 2
770 |     decay_mult: 1
771 |   }
772 |   inner_product_param {
773 |     num_output: 64
774 |     weight_filler {
775 |       type: "xavier"
776 | 	}
777 |     bias_filler {
778 |       type: "constant"
779 |       value: 0
780 |     }
781 |   }
782 |   
783 | }
784 | layer {
785 |   name: "prelu4_2"
786 |   type: "PReLU"
787 |   bottom: "fc4_2"
788 |   top: "fc4_2"
789 | }
790 | layer {
791 |   name: "fc5_2"
792 |   type: "InnerProduct"
793 |   bottom: "fc4_2"
794 |   top: "fc5_2"
795 |   param {
796 |     lr_mult: 1
797 |     decay_mult: 1
798 |   }
799 |   param {
800 |     lr_mult: 2
801 |     decay_mult: 1
802 |   }
803 |   inner_product_param {
804 |     num_output: 2
805 |     weight_filler {
806 |       type: "xavier"
807 | 	  #type: "constant"
808 |       #value: 0
809 | 	}
810 |     bias_filler {
811 |       type: "constant"
812 |       value: 0
813 |     }
814 |   }
815 | }
816 | 
817 | #########################
818 | layer {
819 |   name: "fc4_3"
820 |   type: "InnerProduct"
821 |   bottom: "fc4"
822 |   top: "fc4_3"
823 |   param {
824 |     lr_mult: 1
825 |     decay_mult: 1
826 |   }
827 |   param {
828 |     lr_mult: 2
829 |     decay_mult: 1
830 |   }
831 |   inner_product_param {
832 |     num_output: 64
833 |     weight_filler {
834 |       type: "xavier"
835 | 	}
836 |     bias_filler {
837 |       type: "constant"
838 |       value: 0
839 |     }
840 |   }
841 |   
842 | }
843 | layer {
844 |   name: "prelu4_3"
845 |   type: "PReLU"
846 |   bottom: "fc4_3"
847 |   top: "fc4_3"
848 | }
849 | layer {
850 |   name: "fc5_3"
851 |   type: "InnerProduct"
852 |   bottom: "fc4_3"
853 |   top: "fc5_3"
854 |   param {
855 |     lr_mult: 1
856 |     decay_mult: 1
857 |   }
858 |   param {
859 |     lr_mult: 2
860 |     decay_mult: 1
861 |   }
862 |   inner_product_param {
863 |     num_output: 2
864 |     weight_filler {
865 |       type: "xavier"
866 | 	  #type: "constant"
867 |       #value: 0
868 | 	}
869 |     bias_filler {
870 |       type: "constant"
871 |       value: 0
872 |     }
873 |   }
874 | }
875 | 
876 | #########################
877 | layer {
878 |   name: "fc4_4"
879 |   type: "InnerProduct"
880 |   bottom: "fc4"
881 |   top: "fc4_4"
882 |   param {
883 |     lr_mult: 1
884 |     decay_mult: 1
885 |   }
886 |   param {
887 |     lr_mult: 2
888 |     decay_mult: 1
889 |   }
890 |   inner_product_param {
891 |     num_output: 64
892 |     weight_filler {
893 |       type: "xavier"
894 | 	}
895 |     bias_filler {
896 |       type: "constant"
897 |       value: 0
898 |     }
899 |   }
900 |   
901 | }
902 | layer {
903 |   name: "prelu4_4"
904 |   type: "PReLU"
905 |   bottom: "fc4_4"
906 |   top: "fc4_4"
907 | }
908 | layer {
909 |   name: "fc5_4"
910 |   type: "InnerProduct"
911 |   bottom: "fc4_4"
912 |   top: "fc5_4"
913 |   param {
914 |     lr_mult: 1
915 |     decay_mult: 1
916 |   }
917 |   param {
918 |     lr_mult: 2
919 |     decay_mult: 1
920 |   }
921 |   inner_product_param {
922 |     num_output: 2
923 |     weight_filler {
924 |       type: "xavier"
925 | 	  #type: "constant"
926 |       #value: 0
927 | 	}
928 |     bias_filler {
929 |       type: "constant"
930 |       value: 0
931 |     }
932 |   }
933 | }
934 | 
935 | #########################
936 | layer {
937 |   name: "fc4_5"
938 |   type: "InnerProduct"
939 |   bottom: "fc4"
940 |   top: "fc4_5"
941 |   param {
942 |     lr_mult: 1
943 |     decay_mult: 1
944 |   }
945 |   param {
946 |     lr_mult: 2
947 |     decay_mult: 1
948 |   }
949 |   inner_product_param {
950 |     num_output: 64
951 |     weight_filler {
952 |       type: "xavier"
953 | 	}
954 |     bias_filler {
955 |       type: "constant"
956 |       value: 0
957 |     }
958 |   }
959 |   
960 | }
961 | layer {
962 |   name: "prelu4_5"
963 |   type: "PReLU"
964 |   bottom: "fc4_5"
965 |   top: "fc4_5"
966 | }
967 | layer {
968 |   name: "fc5_5"
969 |   type: "InnerProduct"
970 |   bottom: "fc4_5"
971 |   top: "fc5_5"
972 |   param {
973 |     lr_mult: 1
974 |     decay_mult: 1
975 |   }
976 |   param {
977 |     lr_mult: 2
978 |     decay_mult: 1
979 |   }
980 |   inner_product_param {
981 |     num_output: 2
982 |     weight_filler {
983 |       type: "xavier"
984 | 	  #type: "constant"
985 |       #value: 0
986 | 	}
987 |     bias_filler {
988 |       type: "constant"
989 |       value: 0
990 |     }
991 |   }
992 | }
993 | 
994 | #########################
995 | 
996 | 


--------------------------------------------------------------------------------