├── .gitignore
├── README.md
├── camera_demo.py
├── checkpoint
    ├── README.md
    ├── logging
    └── model_weights
    │   ├── weights.pth125.tar
    │   ├── weights.pth76.tar
    │   ├── weights.pth_epoch_195.tar
    │   └── weights.pth_epoch_199.tar
├── data
    ├── SetPreparation.py
    └── WFLW
    │   └── WFLW_annotations
    │       └── list_98pt_rect_attr_train_test
    │           ├── README
    │           ├── list_98pt_rect_attr_test.txt
    │           └── list_98pt_rect_attr_train.txt
├── dataset.py
├── euler_angles.py
├── face_detector
    ├── deploy.prototxt.txt
    ├── face_detector.py
    ├── haarcascade_frontalface_default.xml
    └── res10_300x300_ssd_iter_140000.caffemodel
├── generate_dataset.py
├── model
    ├── BottleneckResidual.py
    ├── DepthSepConv.py
    ├── Loss.py
    └── model.py
├── requirements.txt
├── test.py
├── train.py
├── utils.py
└── visualization.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | train/
  6 | test/
  7 | WFLW/
  8 | tensorboard/
  9 | *.tar
 10 | # C extensions
 11 | *.so
 12 | .vscode/
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Practical Facial Landmarks Detector 
  3 | 
  4 | My unofficial implementation of [PFLD paper](https://arxiv.org/pdf/1902.10859.pdf) "Practical Facial Landmarks Detector" using Pytorch for a real time landmarks detection and head pose estimation.<br/>
  5 | 
  6 | ![pfld](https://user-images.githubusercontent.com/35613645/109653302-89e1ef00-7b69-11eb-8dd7-e8810deebe44.png)
  7 | 
  8 | 
  9 | #### Demo
 10 | 
 11 | <img src="https://user-images.githubusercontent.com/35613645/110829589-f8792800-82a0-11eb-833d-0d665503a869.gif" width="1100" height="300">
 12 | 
 13 | 
 14 | ##### How to Install
 15 | ```
 16 |  $ pip3 install -r requirements.txt
 17 |  
 18 |  # Note that it can be run on lower versions of Pytorch so replace the versions with yours
 19 | ```
 20 | 
 21 | ##### install opencv & dnn from source (optional)
 22 | Both opencv dnn & haar cascade are used for face detection, if you want to use haar cascade you can skip this part.
 23 | ```
 24 | sudo apt update && sudo apt install -y cmake g++ wget unzip
 25 | wget -O opencv.zip https://github.com/opencv/opencv/archive/master.zip
 26 | wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/master.zip
 27 | unzip opencv.zip
 28 | unzip opencv_contrib.zip
 29 | mkdir -p build && cd build
 30 | cmake -DOPENCV_EXTRA_MODULES_PATH=../opencv_contrib-master/modules ../opencv-master
 31 | cmake --build .
 32 | ```
 33 | if you have any problems, refere to [Install opencv with dnn from source](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
 34 | 
 35 | ##### Run Camera Demo
 36 | Live camera demo 
 37 | ```python
 38 | $ python3 camera_demo.py
 39 | 
 40 | # add '--head_pose' option to visualize head pose directions 
 41 | $ python3 camera_demo.py --head_pose
 42 | 
 43 | # add '--haar' option if you want to use Haar cascade detector instead of dnn opencv face detector
 44 | $ python3 camera_demo.py --haar
 45 | ```
 46 | 
 47 | ##### WFLW Dataset
 48 | [Wider Facial Landmarks in-the-wild (WFLW)](https://wywu.github.io/projects/LAB/WFLW.html) contains 10000 faces (7500 for training and 2500 for testing) with 98 fully manual annotated landmarks.
 49 | 
 50 | **Download** the dataset & place it in '**data/WFLW**' folder path
 51 | - WFLW Training and Testing Images [Google Drive](https://drive.google.com/file/d/1hzBd48JIdWTJSsATBEB_eFVvPL1bx6UC/view)<br/>
 52 | - WFLW Face Annotations [Download](https://wywu.github.io/projects/LAB/support/WFLW_annotations.tar.gz)<br/>
 53 | ##### Prepare the dataset
 54 | Dataset augumentation & preparation <br/>
 55 | (Only apply one of the 2 options) for data augumentation
 56 | ```python
 57 | $ python3 generate_dataset.py
 58 | ```
 59 | ```python
 60 | # another option to augument dataset from polarisZhao/PFLD-pytorch repo 
 61 | $ cd data
 62 | $ python3 SetPreparation.py
 63 | ```
 64 | 
 65 | 
 66 | ##### Visualize dataset
 67 | Visualize dataset examples with annotated landmarks & head pose 
 68 | ```cmd
 69 | # add '--mode' option to determine the dataset to visualize
 70 | $ python3 visualization.py
 71 | ```
 72 | ##### Visualize euler angles
 73 | ```cmd
 74 | $ python3 euler_angles.py
 75 | ```
 76 | 
 77 | ##### Tensorboard 
 78 | Take a wide look on dataset examples using tensorboard
 79 | ```
 80 | $ python3 visualization.py --tensorboard
 81 | $ tensorboard --logdir checkpoint/tensorboard
 82 | ```
 83 | ![110810440-78e25d80-828e-11eb-9689-523c4d12b772](https://user-images.githubusercontent.com/35613645/110831295-bfda4e00-82a2-11eb-9c04-b77b7a30fc4a.png)
 84 | 
 85 | 
 86 | 
 87 | ##### Testing on WFLW test dataset
 88 | ```
 89 | $ python3 test.py
 90 | ```
 91 | 
 92 | 
 93 | ##### Training 
 94 | Train on augumented WFLW dataset
 95 | ```
 96 | $ python3 train.py
 97 | ```
 98 | 
 99 | 
100 | 
101 | #### Folder structure    
102 |     ├── model						# model's implementation
103 |     ├── data						# data folder contains WFLW dataset & generated dataset
104 |     	├── WFLW/					# extract WFLW images & annotations inside that folder
105 |         	├── WFLW_annotations/
106 |             ├── WFLW_images/
107 |         ├── train					# generated train dataset
108 |         ├── test					# generated test dataset
109 | 
110 | 
111 | #### Refrences
112 | MobileNet
113 | - https://medium.com/analytics-vidhya/image-classification-with-mobilenet-cc6fbb2cd470
114 | - https://medium.com/datadriveninvestor/review-on-mobile-net-v2-ec5cb7946784
115 | - https://towardsdatascience.com/mobilenetv2-inverted-residuals-and-linear-bottlenecks-8a4362f4ffd5
116 | 
117 | 3D-2D correspondences rotation:
118 | - https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html#ga549c2075fac14829ff4a58bc931c033d
119 | - https://learnopencv.com/head-pose-estimation-using-opencv-and-dlib/
120 | - https://medium.com/analytics-vidhya/real-time-head-pose-estimation-with-opencv-and-dlib-e8dc10d62078
121 | 
122 | Other PFLD Implementations
123 | - https://github.com/polarisZhao/PFLD-pytorch
124 | - https://github.com/guoqiangqi/PFLD
125 | 
126 | Survey
127 | - https://www.readcube.com/articles/10.1186%2Fs13640-018-0324-4
128 | 
129 | weak prespective projection
130 | - https://www.cse.unr.edu/~bebis/CS791E/Notes/PerspectiveProjection.pdf
131 | - https://en.wikipedia.org/wiki/3D_projection
132 | 
133 | 


--------------------------------------------------------------------------------
/camera_demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Live Camera Demo using opencv dnn face detection & PFLD for landmarks
  6 | """
  7 | import sys
  8 | import time
  9 | import argparse
 10 | import cv2
 11 | import numpy as np
 12 | from numpy.lib.type_check import imag
 13 | import torch
 14 | import torchvision.transforms.transforms as transforms
 15 | from face_detector.face_detector import DnnDetector, HaarCascadeDetector
 16 | from model.model import PFLD
 17 | from euler_angles import EulerAngles
 18 | 
 19 | sys.path.insert(1, 'face_detector')
 20 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 21 | 
 22 | def draw_euler_angles(frame, face, axis_pts, euler_angles):
 23 |     (x,y,w,h) = face
 24 |     top_left = (x,y)
 25 |     center = (x+w//2, y+h//2)
 26 | 
 27 |     axis_pts = axis_pts.astype(np.int32)
 28 |     pitch_point = tuple(axis_pts[0].ravel() + top_left)
 29 |     yaw_point   = tuple(axis_pts[1].ravel() + top_left)
 30 |     roll_point  = tuple(axis_pts[2].ravel() + top_left)
 31 | 
 32 |     width = 2
 33 |     cv2.line(frame, center,  pitch_point, (0,255,0), width)
 34 |     cv2.line(frame, center,  yaw_point, (255,0,0), width)
 35 |     cv2.line(frame, center,  roll_point, (0,0,255), width)
 36 | 
 37 |     pitch, yaw, roll = euler_angles
 38 |     cv2.putText(frame, "Pitch:{:.2f}".format(pitch), (x,y-10), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
 39 |     cv2.putText(frame, "Yaw:{:.2f}".format(yaw), (x,y-25), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
 40 |     cv2.putText(frame, "Roll:{:.2f}".format(roll), (x,y-40), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
 41 | 
 42 |     return frame
 43 | 
 44 | def preprocess_rect(rect, big_img_shape):
 45 |     (x1, y1, w, h) = rect
 46 |     w_factor = 0.1
 47 |     h_factor = 0.1
 48 |     if w > h:
 49 |         h_factor = 0.25
 50 |     elif h > w:
 51 |         w_factor = 0.25
 52 |     x2 = x1 + w
 53 |     y2 = y1 + h            
 54 |     rect_dw = (x2 - x1) * w_factor
 55 |     rect_dy = (y2 - y1) * h_factor
 56 |     x1 -= rect_dw/2
 57 |     x2 += rect_dw/2
 58 |     y1 -= rect_dy/2
 59 |     y2 += rect_dy/2
 60 |     x1 = max(x1, 0)
 61 |     y1 = max(y1, 0)
 62 |     h_max, w_max = big_img_shape[:2]
 63 |     y2 = min(y2, h_max)
 64 |     x2 = min(x2, w_max)
 65 |     return int(x1), int(y1), int(x2-x1+1), int(y2-y1+1)
 66 | 
 67 | def main(args):
 68 |     # Model
 69 |     pfld = PFLD().to(device)
 70 |     pfld.eval()
 71 |     head_pose = EulerAngles()
 72 | 
 73 |     # Load model
 74 |     checkpoint = torch.load(args.pretrained, map_location=device)
 75 |     pfld.load_state_dict(checkpoint['pfld'])
 76 | 
 77 |     # Face detection
 78 |     root = 'face_detector'
 79 |     face_detector = None
 80 |     if args.haar:
 81 |         face_detector = HaarCascadeDetector(root)
 82 |     else:
 83 |         face_detector = DnnDetector(root)
 84 | 
 85 |     video = cv2.VideoCapture(0) # 480, 640
 86 |     # video = cv2.VideoCapture("../1.mp4") # (720, 1280) or (1080, 1920)
 87 |     t1 = 0
 88 |     t2 = 0
 89 |     print('video.isOpened:', video.isOpened())
 90 |     while video.isOpened():
 91 |         _, frame = video.read()
 92 | 
 93 |         # time
 94 |         t2 = time.time()
 95 |         fps = round(1/(t2-t1))
 96 |         t1 = t2
 97 | 
 98 |         # faces
 99 |         faces = face_detector.detect_faces(frame)
100 | 
101 |         for face in faces:
102 |             (x,y,w,h) = face
103 | 
104 |             x,y,w,h = preprocess_rect((x,y,w,h), frame.shape)
105 |             cv2.rectangle(frame, (x,y), (x+w, y+h), (255,0,0), 3)
106 | 
107 |             # preprocessing
108 |             t = time.time()
109 |             input_face = frame[y:y+h, x:x+w]
110 |             input_face = cv2.resize(input_face, (112,112))
111 |             input_face = transforms.ToTensor()(input_face).to(device)
112 |             input_face = torch.unsqueeze(input_face, 0)
113 | 
114 |             with torch.no_grad():
115 |                 # landmarks
116 |                 _, landmarks = pfld(input_face)
117 |                 # print(f'PFLD Forward time = {(time.time()-t)*1000}')
118 | 
119 |                 # visualization
120 |                 landmarks = landmarks.cpu().reshape(98,2).numpy()
121 | 
122 |                 visual_landmarks = (landmarks * (w,h) ).astype(np.int32) 
123 |                 for (x_l, y_l) in visual_landmarks:
124 |                     cv2.circle(frame, (x + x_l, y + y_l), 1, (0,255,0), -1)
125 | 
126 |                 if args.head_pose:
127 |                     _, _, euler_angles = head_pose.eular_angles_from_landmarks(np.copy(landmarks*(112)).astype(np.float))
128 | 
129 |                     # just for visualization .. to get rotation/translation in terms of face rect (not to the 112x112 rect)
130 |                     vis_rvec, vis_tvec, _ = head_pose.eular_angles_from_landmarks(np.copy(landmarks*(w,h)).astype(np.float))
131 | 
132 |                     axis = np.identity(3) * 7
133 |                     axis[2,2] = 4
134 |                     axis_pts = cv2.projectPoints(axis, vis_rvec, vis_tvec , head_pose.camera_intrensic_matrix, None)[0]
135 |                     frame = draw_euler_angles(frame, face, axis_pts, euler_angles)
136 | 
137 |         cv2.putText(frame, str(fps), (10,25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0))
138 |         cv2.imshow("Video", frame)   
139 |         # cv2.imshow("black", face_frame)
140 |         if cv2.waitKey(1) & 0xff == 27:
141 |             video.release()
142 |             cv2.destroyAllWindows()
143 |             break
144 | 
145 | if __name__ == '__main__':
146 |     parser = argparse.ArgumentParser()
147 |     parser.add_argument('--haar', action='store_true', help='run the haar cascade face detector')
148 |     parser.add_argument('--pretrained',type=str,default='checkpoint/model_weights/weights.pth76.tar'
149 |                         ,help='load weights')
150 |     parser.add_argument('--head_pose', action='store_true', help='visualization of head pose euler angles')
151 |     args = parser.parse_args()
152 | 
153 |     main(args)
154 | 
155 | 


--------------------------------------------------------------------------------
/checkpoint/README.md:
--------------------------------------------------------------------------------
1 | ![Screenshot from 2021-03-11 17-25-10](https://user-images.githubusercontent.com/35613645/110810916-e8f0e380-828e-11eb-842a-63b804635ef4.png)
2 | 


--------------------------------------------------------------------------------
/checkpoint/logging:
--------------------------------------------------------------------------------
  1 | [2021-02-21 07:50:52,878] [p5217] [train.py:188] [INFO] **********************************************************************
  2 | 	Evaluation average loss= 0.155
  3 | 
  4 | [2021-02-21 07:50:53,880] [p5217] [train.py:102] [INFO] 	training epoch=128 .. weighted_loss= 0.526359283598202 ... loss=0.0715851572479835
  5 | [2021-02-21 07:59:57,569] [p5217] [train.py:188] [INFO] **********************************************************************
  6 | 	Evaluation average loss= 0.156
  7 | 
  8 | [2021-02-21 07:59:58,571] [p5217] [train.py:102] [INFO] 	training epoch=129 .. weighted_loss= 0.5050595510126915 ... loss=0.07586050608680664
  9 | [2021-02-21 08:09:04,143] [p5217] [train.py:188] [INFO] **********************************************************************
 10 | 	Evaluation average loss= 0.157
 11 | 
 12 | [2021-02-21 08:09:05,144] [p5217] [train.py:102] [INFO] 	training epoch=130 .. weighted_loss= 0.4639063532283466 ... loss=0.059519222478152896
 13 | [2021-02-21 08:18:10,409] [p5217] [train.py:188] [INFO] **********************************************************************
 14 | 	Evaluation average loss= 0.157
 15 | 
 16 | [2021-02-21 08:18:11,410] [p5217] [train.py:102] [INFO] 	training epoch=131 .. weighted_loss= 0.4701369117834393 ... loss=0.07163883422546917
 17 | [2021-02-21 08:27:14,352] [p5217] [train.py:188] [INFO] **********************************************************************
 18 | 	Evaluation average loss= 0.154
 19 | 
 20 | [2021-02-21 08:27:15,354] [p5217] [train.py:102] [INFO] 	training epoch=132 .. weighted_loss= 0.4365483605763339 ... loss=0.062103714148103166
 21 | [2021-02-21 08:36:18,148] [p5217] [train.py:188] [INFO] **********************************************************************
 22 | 	Evaluation average loss= 0.153
 23 | 
 24 | [2021-02-21 08:36:19,150] [p5217] [train.py:102] [INFO] 	training epoch=133 .. weighted_loss= 0.5643235978444539 ... loss=0.07693447643741248
 25 | [2021-02-21 08:45:23,346] [p5217] [train.py:188] [INFO] **********************************************************************
 26 | 	Evaluation average loss= 0.154
 27 | 
 28 | [2021-02-21 08:45:24,348] [p5217] [train.py:102] [INFO] 	training epoch=134 .. weighted_loss= 0.4931319925686419 ... loss=0.08124355255611453
 29 | [2021-02-21 08:54:27,684] [p5217] [train.py:188] [INFO] **********************************************************************
 30 | 	Evaluation average loss= 0.155
 31 | 
 32 | [2021-02-21 08:54:28,686] [p5217] [train.py:102] [INFO] 	training epoch=135 .. weighted_loss= 0.45444173752211287 ... loss=0.057358601020097724
 33 | [2021-02-21 09:03:33,691] [p5217] [train.py:188] [INFO] **********************************************************************
 34 | 	Evaluation average loss= 0.156
 35 | 
 36 | [2021-02-21 09:03:34,693] [p5217] [train.py:102] [INFO] 	training epoch=136 .. weighted_loss= 0.4624098848783759 ... loss=0.056029415150058916
 37 | [2021-02-21 09:12:38,795] [p5217] [train.py:188] [INFO] **********************************************************************
 38 | 	Evaluation average loss= 0.155
 39 | 
 40 | [2021-02-21 09:12:39,796] [p5217] [train.py:102] [INFO] 	training epoch=137 .. weighted_loss= 0.3695071610705561 ... loss=0.06534551697997454
 41 | [2021-02-21 09:21:44,706] [p5217] [train.py:188] [INFO] **********************************************************************
 42 | 	Evaluation average loss= 0.154
 43 | 
 44 | [2021-02-21 09:21:45,708] [p5217] [train.py:102] [INFO] 	training epoch=138 .. weighted_loss= 0.47413592080795375 ... loss=0.057611671327247156
 45 | [2021-02-21 09:30:47,908] [p5217] [train.py:188] [INFO] **********************************************************************
 46 | 	Evaluation average loss= 0.155
 47 | 
 48 | [2021-02-21 09:30:48,909] [p5217] [train.py:102] [INFO] 	training epoch=139 .. weighted_loss= 0.3813503869352062 ... loss=0.06324574862080913
 49 | [2021-02-21 09:39:53,062] [p5217] [train.py:188] [INFO] **********************************************************************
 50 | 	Evaluation average loss= 0.154
 51 | 
 52 | [2021-02-21 09:39:54,064] [p5217] [train.py:102] [INFO] 	training epoch=140 .. weighted_loss= 0.46136312699335996 ... loss=0.06283242096720744
 53 | [2021-02-21 09:48:57,850] [p5217] [train.py:188] [INFO] **********************************************************************
 54 | 	Evaluation average loss= 0.154
 55 | 
 56 | [2021-02-21 09:48:58,851] [p5217] [train.py:102] [INFO] 	training epoch=141 .. weighted_loss= 0.47359557587447243 ... loss=0.06248925259219614
 57 | [2021-02-21 09:58:03,372] [p5217] [train.py:188] [INFO] **********************************************************************
 58 | 	Evaluation average loss= 0.154
 59 | 
 60 | [2021-02-21 09:58:04,374] [p5217] [train.py:102] [INFO] 	training epoch=142 .. weighted_loss= 0.3894574875560777 ... loss=0.05730998091028125
 61 | [2021-02-21 10:07:08,637] [p5217] [train.py:188] [INFO] **********************************************************************
 62 | 	Evaluation average loss= 0.152
 63 | 
 64 | [2021-02-21 10:07:09,639] [p5217] [train.py:102] [INFO] 	training epoch=143 .. weighted_loss= 0.38334861204082643 ... loss=0.06087360909808922
 65 | [2021-02-21 10:16:14,196] [p5217] [train.py:188] [INFO] **********************************************************************
 66 | 	Evaluation average loss= 0.157
 67 | 
 68 | [2021-02-21 10:16:15,197] [p5217] [train.py:102] [INFO] 	training epoch=144 .. weighted_loss= 0.49432163749892893 ... loss=0.07072732749068382
 69 | [2021-02-21 10:25:19,517] [p5217] [train.py:188] [INFO] **********************************************************************
 70 | 	Evaluation average loss= 0.158
 71 | 
 72 | [2021-02-21 10:25:20,519] [p5217] [train.py:102] [INFO] 	training epoch=145 .. weighted_loss= 0.4868332211558487 ... loss=0.06327367562418107
 73 | [2021-02-21 10:34:25,124] [p5217] [train.py:188] [INFO] **********************************************************************
 74 | 	Evaluation average loss= 0.161
 75 | 
 76 | [2021-02-21 10:34:26,126] [p5217] [train.py:102] [INFO] 	training epoch=146 .. weighted_loss= 0.5552308520367758 ... loss=0.06837738686342776
 77 | [2021-02-21 10:43:30,256] [p5217] [train.py:188] [INFO] **********************************************************************
 78 | 	Evaluation average loss= 0.161
 79 | 
 80 | [2021-02-21 10:43:31,257] [p5217] [train.py:102] [INFO] 	training epoch=147 .. weighted_loss= 0.39814224159139283 ... loss=0.06776860424706951
 81 | [2021-02-21 10:52:35,719] [p5217] [train.py:188] [INFO] **********************************************************************
 82 | 	Evaluation average loss= 0.157
 83 | 
 84 | [2021-02-21 10:52:36,720] [p5217] [train.py:102] [INFO] 	training epoch=148 .. weighted_loss= 0.41261211752656085 ... loss=0.06302895824338264
 85 | [2021-02-21 11:01:41,918] [p5217] [train.py:188] [INFO] **********************************************************************
 86 | 	Evaluation average loss= 0.156
 87 | 
 88 | [2021-02-21 11:01:42,920] [p5217] [train.py:102] [INFO] 	training epoch=149 .. weighted_loss= 0.4173870320001062 ... loss=0.06221473294604286
 89 | [2021-02-21 11:10:46,838] [p5217] [train.py:188] [INFO] **********************************************************************
 90 | 	Evaluation average loss= 0.155
 91 | 
 92 | [2021-02-21 11:10:47,839] [p5217] [train.py:102] [INFO] 	training epoch=150 .. weighted_loss= 0.39058630772661457 ... loss=0.057102610351358656
 93 | [2021-02-21 11:19:52,242] [p5217] [train.py:188] [INFO] **********************************************************************
 94 | 	Evaluation average loss= 0.155
 95 | 
 96 | [2021-02-21 11:19:53,243] [p5217] [train.py:102] [INFO] 	training epoch=151 .. weighted_loss= 0.3196037534906815 ... loss=0.057976651942578936
 97 | [2021-02-21 11:28:58,223] [p5217] [train.py:188] [INFO] **********************************************************************
 98 | 	Evaluation average loss= 0.155
 99 | 
100 | [2021-02-21 11:28:59,225] [p5217] [train.py:102] [INFO] 	training epoch=152 .. weighted_loss= 0.3127827769128545 ... loss=0.05121453034849041
101 | [2021-02-21 11:38:03,872] [p5217] [train.py:188] [INFO] **********************************************************************
102 | 	Evaluation average loss= 0.153
103 | 
104 | [2021-02-21 11:38:04,874] [p5217] [train.py:102] [INFO] 	training epoch=153 .. weighted_loss= 0.3323248827886722 ... loss=0.057961544230850155
105 | [2021-02-21 11:47:09,470] [p5217] [train.py:188] [INFO] **********************************************************************
106 | 	Evaluation average loss= 0.153
107 | 
108 | [2021-02-21 11:47:10,472] [p5217] [train.py:102] [INFO] 	training epoch=154 .. weighted_loss= 0.29602614600307314 ... loss=0.05415185272391887
109 | [2021-02-21 11:56:15,458] [p5217] [train.py:188] [INFO] **********************************************************************
110 | 	Evaluation average loss= 0.154
111 | 
112 | [2021-02-21 11:56:16,460] [p5217] [train.py:102] [INFO] 	training epoch=155 .. weighted_loss= 0.3147133450173504 ... loss=0.058752519594636086
113 | [2021-02-21 12:05:20,764] [p5217] [train.py:188] [INFO] **********************************************************************
114 | 	Evaluation average loss= 0.153
115 | 
116 | [2021-02-21 12:05:21,765] [p5217] [train.py:102] [INFO] 	training epoch=156 .. weighted_loss= 0.3655616832313081 ... loss=0.054503646640400256
117 | [2021-02-21 12:14:26,569] [p5217] [train.py:188] [INFO] **********************************************************************
118 | 	Evaluation average loss= 0.156
119 | 
120 | [2021-02-21 12:14:27,570] [p5217] [train.py:102] [INFO] 	training epoch=157 .. weighted_loss= 0.36032460400530164 ... loss=0.05613234142852661
121 | [2021-02-21 12:23:32,522] [p5217] [train.py:188] [INFO] **********************************************************************
122 | 	Evaluation average loss= 0.153
123 | 
124 | [2021-02-21 12:23:33,523] [p5217] [train.py:102] [INFO] 	training epoch=158 .. weighted_loss= 0.35317307405868703 ... loss=0.055008669717861364
125 | [2021-02-21 12:32:38,228] [p5217] [train.py:188] [INFO] **********************************************************************
126 | 	Evaluation average loss= 0.158
127 | 
128 | [2021-02-21 12:32:39,230] [p5217] [train.py:102] [INFO] 	training epoch=159 .. weighted_loss= 0.45785738535938847 ... loss=0.067094133648799
129 | [2021-02-21 12:41:43,784] [p5217] [train.py:188] [INFO] **********************************************************************
130 | 	Evaluation average loss= 0.155
131 | 
132 | [2021-02-21 12:41:44,785] [p5217] [train.py:102] [INFO] 	training epoch=160 .. weighted_loss= 0.3988937505277748 ... loss=0.06059209249638389
133 | [2021-02-21 12:50:49,080] [p5217] [train.py:188] [INFO] **********************************************************************
134 | 	Evaluation average loss= 0.158
135 | 
136 | [2021-02-21 12:50:50,082] [p5217] [train.py:102] [INFO] 	training epoch=161 .. weighted_loss= 0.35590103676740126 ... loss=0.05230415155900744
137 | [2021-02-21 12:59:54,038] [p5217] [train.py:188] [INFO] **********************************************************************
138 | 	Evaluation average loss= 0.153
139 | 
140 | [2021-02-21 12:59:55,040] [p5217] [train.py:102] [INFO] 	training epoch=162 .. weighted_loss= 0.3504926571185457 ... loss=0.05667258612415026
141 | [2021-02-21 13:08:59,441] [p5217] [train.py:188] [INFO] **********************************************************************
142 | 	Evaluation average loss= 0.154
143 | 
144 | [2021-02-21 13:09:00,443] [p5217] [train.py:102] [INFO] 	training epoch=163 .. weighted_loss= 0.38601378899729916 ... loss=0.07271810555420323
145 | [2021-02-21 13:18:05,511] [p5217] [train.py:188] [INFO] **********************************************************************
146 | 	Evaluation average loss= 0.154
147 | 
148 | [2021-02-21 13:18:06,513] [p5217] [train.py:102] [INFO] 	training epoch=164 .. weighted_loss= 0.28512605089947246 ... loss=0.05483989512465863
149 | [2021-02-21 13:27:11,129] [p5217] [train.py:188] [INFO] **********************************************************************
150 | 	Evaluation average loss= 0.156
151 | 
152 | [2021-02-21 13:27:12,131] [p5217] [train.py:102] [INFO] 	training epoch=165 .. weighted_loss= 0.24047668927894703 ... loss=0.04717388119678009
153 | [2021-02-21 13:36:15,874] [p5217] [train.py:188] [INFO] **********************************************************************
154 | 	Evaluation average loss= 0.155
155 | 
156 | [2021-02-21 13:36:16,876] [p5217] [train.py:102] [INFO] 	training epoch=166 .. weighted_loss= 0.37315462464162374 ... loss=0.05893905751655888
157 | [2021-02-21 13:45:21,645] [p5217] [train.py:188] [INFO] **********************************************************************
158 | 	Evaluation average loss= 0.154
159 | 
160 | [2021-02-21 13:45:22,647] [p5217] [train.py:102] [INFO] 	training epoch=167 .. weighted_loss= 0.3780057911157864 ... loss=0.05705317291556641
161 | [2021-02-21 13:54:26,470] [p5217] [train.py:188] [INFO] **********************************************************************
162 | 	Evaluation average loss= 0.157
163 | 
164 | [2021-02-21 13:54:27,472] [p5217] [train.py:102] [INFO] 	training epoch=168 .. weighted_loss= 0.37366930390112146 ... loss=0.06031628116056174
165 | [2021-02-21 14:03:32,383] [p5217] [train.py:188] [INFO] **********************************************************************
166 | 	Evaluation average loss= 0.155
167 | 
168 | [2021-02-21 14:03:33,384] [p5217] [train.py:102] [INFO] 	training epoch=169 .. weighted_loss= 0.4094276675321398 ... loss=0.07302647657605833
169 | [2021-02-21 14:12:38,678] [p5217] [train.py:188] [INFO] **********************************************************************
170 | 	Evaluation average loss= 0.156
171 | 
172 | [2021-02-21 14:12:39,680] [p5217] [train.py:102] [INFO] 	training epoch=170 .. weighted_loss= 0.3315893988312317 ... loss=0.04731389177066087
173 | [2021-02-21 14:21:44,084] [p5217] [train.py:188] [INFO] **********************************************************************
174 | 	Evaluation average loss= 0.156
175 | 
176 | [2021-02-21 14:21:45,086] [p5217] [train.py:102] [INFO] 	training epoch=171 .. weighted_loss= 0.32952779436014323 ... loss=0.058489670092038794
177 | [2021-02-21 14:30:49,199] [p5217] [train.py:188] [INFO] **********************************************************************
178 | 	Evaluation average loss= 0.158
179 | 
180 | [2021-02-21 14:30:50,201] [p5217] [train.py:102] [INFO] 	training epoch=172 .. weighted_loss= 0.366975812980048 ... loss=0.05379984476369218
181 | [2021-02-21 14:39:54,678] [p5217] [train.py:188] [INFO] **********************************************************************
182 | 	Evaluation average loss= 0.158
183 | 
184 | [2021-02-21 14:39:55,680] [p5217] [train.py:102] [INFO] 	training epoch=173 .. weighted_loss= 0.3529519404450851 ... loss=0.051291476515526374
185 | [2021-02-21 14:48:59,586] [p5217] [train.py:188] [INFO] **********************************************************************
186 | 	Evaluation average loss= 0.16
187 | 
188 | [2021-02-21 14:49:00,587] [p5217] [train.py:102] [INFO] 	training epoch=174 .. weighted_loss= 0.3774069191716597 ... loss=0.057134809659138516
189 | [2021-02-21 14:58:05,264] [p5217] [train.py:188] [INFO] **********************************************************************
190 | 	Evaluation average loss= 0.163
191 | 
192 | [2021-02-21 14:58:06,266] [p5217] [train.py:102] [INFO] 	training epoch=175 .. weighted_loss= 0.3319261832758687 ... loss=0.049057541634162406
193 | [2021-02-21 15:07:10,243] [p5217] [train.py:188] [INFO] **********************************************************************
194 | 	Evaluation average loss= 0.162
195 | 
196 | [2021-02-21 15:07:11,244] [p5217] [train.py:102] [INFO] 	training epoch=176 .. weighted_loss= 0.36302723619661376 ... loss=0.05086882418449708
197 | [2021-02-21 15:16:16,179] [p5217] [train.py:188] [INFO] **********************************************************************
198 | 	Evaluation average loss= 0.16
199 | 
200 | [2021-02-21 15:16:17,181] [p5217] [train.py:102] [INFO] 	training epoch=177 .. weighted_loss= 0.3466498247350936 ... loss=0.05233603449734074
201 | [2021-02-21 15:25:22,549] [p5217] [train.py:188] [INFO] **********************************************************************
202 | 	Evaluation average loss= 0.158
203 | 
204 | [2021-02-21 15:25:23,551] [p5217] [train.py:102] [INFO] 	training epoch=178 .. weighted_loss= 0.48009333879129096 ... loss=0.06221657782533107
205 | [2021-02-21 15:34:28,831] [p5217] [train.py:188] [INFO] **********************************************************************
206 | 	Evaluation average loss= 0.154
207 | 
208 | [2021-02-21 15:34:29,833] [p5217] [train.py:102] [INFO] 	training epoch=179 .. weighted_loss= 0.4585495527901726 ... loss=0.05532294346544937
209 | [2021-02-21 15:43:35,182] [p5217] [train.py:188] [INFO] **********************************************************************
210 | 	Evaluation average loss= 0.157
211 | 
212 | [2021-02-21 15:43:36,184] [p5217] [train.py:102] [INFO] 	training epoch=180 .. weighted_loss= 0.3590938022515255 ... loss=0.058023598136978646
213 | [2021-02-21 15:52:39,866] [p5217] [train.py:188] [INFO] **********************************************************************
214 | 	Evaluation average loss= 0.156
215 | 
216 | [2021-02-21 15:52:40,868] [p5217] [train.py:102] [INFO] 	training epoch=181 .. weighted_loss= 0.37525135539547544 ... loss=0.054304381281470936
217 | [2021-02-21 16:01:44,867] [p5217] [train.py:188] [INFO] **********************************************************************
218 | 	Evaluation average loss= 0.155
219 | 
220 | [2021-02-21 16:01:45,869] [p5217] [train.py:102] [INFO] 	training epoch=182 .. weighted_loss= 0.3934783864065811 ... loss=0.06406967057220293
221 | [2021-02-21 16:10:48,948] [p5217] [train.py:188] [INFO] **********************************************************************
222 | 	Evaluation average loss= 0.153
223 | 
224 | [2021-02-21 16:10:49,949] [p5217] [train.py:102] [INFO] 	training epoch=183 .. weighted_loss= 0.4264355251708146 ... loss=0.05580343558372237
225 | [2021-02-21 16:19:53,922] [p5217] [train.py:188] [INFO] **********************************************************************
226 | 	Evaluation average loss= 0.156
227 | 
228 | [2021-02-21 16:19:54,924] [p5217] [train.py:102] [INFO] 	training epoch=184 .. weighted_loss= 0.29054877196988516 ... loss=0.05046070970736868
229 | [2021-02-21 16:28:58,691] [p5217] [train.py:188] [INFO] **********************************************************************
230 | 	Evaluation average loss= 0.153
231 | 
232 | [2021-02-21 16:28:59,692] [p5217] [train.py:102] [INFO] 	training epoch=185 .. weighted_loss= 0.3474297538272023 ... loss=0.050256123999284176
233 | [2021-02-21 16:38:05,645] [p5217] [train.py:188] [INFO] **********************************************************************
234 | 	Evaluation average loss= 0.152
235 | 
236 | [2021-02-21 16:38:06,647] [p5217] [train.py:102] [INFO] 	training epoch=186 .. weighted_loss= 0.3664725103485166 ... loss=0.05382472125756788
237 | [2021-02-21 16:47:12,562] [p5217] [train.py:188] [INFO] **********************************************************************
238 | 	Evaluation average loss= 0.153
239 | 
240 | [2021-02-21 16:47:13,564] [p5217] [train.py:102] [INFO] 	training epoch=187 .. weighted_loss= 0.3590934142764026 ... loss=0.05778198717151002
241 | [2021-02-21 16:56:17,620] [p5217] [train.py:188] [INFO] **********************************************************************
242 | 	Evaluation average loss= 0.153
243 | 
244 | [2021-02-21 16:56:18,622] [p5217] [train.py:102] [INFO] 	training epoch=188 .. weighted_loss= 0.3065291238962078 ... loss=0.0509055756663739
245 | [2021-02-21 17:05:22,731] [p5217] [train.py:188] [INFO] **********************************************************************
246 | 	Evaluation average loss= 0.154
247 | 
248 | [2021-02-21 17:05:23,732] [p5217] [train.py:102] [INFO] 	training epoch=189 .. weighted_loss= 0.29871330131399804 ... loss=0.05035034189207152
249 | [2021-02-21 17:14:29,839] [p5217] [train.py:188] [INFO] **********************************************************************
250 | 	Evaluation average loss= 0.155
251 | 
252 | [2021-02-21 17:14:30,841] [p5217] [train.py:102] [INFO] 	training epoch=190 .. weighted_loss= 0.31847917282870697 ... loss=0.04839831367325428
253 | [2021-02-21 17:23:33,736] [p5217] [train.py:188] [INFO] **********************************************************************
254 | 	Evaluation average loss= 0.153
255 | 
256 | [2021-02-21 17:23:34,737] [p5217] [train.py:102] [INFO] 	training epoch=191 .. weighted_loss= 0.36037722485812723 ... loss=0.05085933701474805
257 | [2021-02-21 17:32:40,129] [p5217] [train.py:188] [INFO] **********************************************************************
258 | 	Evaluation average loss= 0.154
259 | 
260 | [2021-02-21 17:32:41,131] [p5217] [train.py:102] [INFO] 	training epoch=192 .. weighted_loss= 0.35304617720343995 ... loss=0.0580241315481722
261 | [2021-02-21 17:41:46,516] [p5217] [train.py:188] [INFO] **********************************************************************
262 | 	Evaluation average loss= 0.154
263 | 
264 | [2021-02-21 17:41:47,518] [p5217] [train.py:102] [INFO] 	training epoch=193 .. weighted_loss= 0.3122850896651734 ... loss=0.04626624202341368
265 | [2021-02-21 17:50:50,745] [p5217] [train.py:188] [INFO] **********************************************************************
266 | 	Evaluation average loss= 0.154
267 | 
268 | [2021-02-21 17:50:51,747] [p5217] [train.py:102] [INFO] 	training epoch=194 .. weighted_loss= 0.2904981937513787 ... loss=0.04545938261880461
269 | [2021-02-21 17:59:55,394] [p5217] [train.py:188] [INFO] **********************************************************************
270 | 	Evaluation average loss= 0.153
271 | 
272 | [2021-02-21 17:59:56,396] [p5217] [train.py:102] [INFO] 	training epoch=195 .. weighted_loss= 0.3235342768021777 ... loss=0.04596157511111121
273 | [2021-02-21 18:09:00,816] [p5217] [train.py:188] [INFO] **********************************************************************
274 | 	Evaluation average loss= 0.154
275 | 
276 | [2021-02-21 18:09:01,818] [p5217] [train.py:102] [INFO] 	training epoch=196 .. weighted_loss= 0.291155543677695 ... loss=0.05042012120907835
277 | [2021-02-21 18:18:04,850] [p5217] [train.py:188] [INFO] **********************************************************************
278 | 	Evaluation average loss= 0.154
279 | 
280 | [2021-02-21 18:18:05,852] [p5217] [train.py:102] [INFO] 	training epoch=197 .. weighted_loss= 0.32474776819405493 ... loss=0.05787040359399912
281 | [2021-02-21 18:27:09,076] [p5217] [train.py:188] [INFO] **********************************************************************
282 | 	Evaluation average loss= 0.154
283 | 
284 | [2021-02-21 18:27:10,078] [p5217] [train.py:102] [INFO] 	training epoch=198 .. weighted_loss= 0.2849737667766051 ... loss=0.04445334503059464
285 | [2021-02-21 18:36:13,154] [p5217] [train.py:188] [INFO] **********************************************************************
286 | 	Evaluation average loss= 0.154
287 | 
288 | [2021-02-21 18:36:14,156] [p5217] [train.py:102] [INFO] 	training epoch=199 .. weighted_loss= 0.3539362995353677 ... loss=0.05037977797337478
289 | 


--------------------------------------------------------------------------------
/checkpoint/model_weights/weights.pth125.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmrElsersy/PFLD-Pytorch-Landmarks/087886c821a98bcf454643c0861a16d86ad54cfa/checkpoint/model_weights/weights.pth125.tar


--------------------------------------------------------------------------------
/checkpoint/model_weights/weights.pth76.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmrElsersy/PFLD-Pytorch-Landmarks/087886c821a98bcf454643c0861a16d86ad54cfa/checkpoint/model_weights/weights.pth76.tar


--------------------------------------------------------------------------------
/checkpoint/model_weights/weights.pth_epoch_195.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmrElsersy/PFLD-Pytorch-Landmarks/087886c821a98bcf454643c0861a16d86ad54cfa/checkpoint/model_weights/weights.pth_epoch_195.tar


--------------------------------------------------------------------------------
/checkpoint/model_weights/weights.pth_epoch_199.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmrElsersy/PFLD-Pytorch-Landmarks/087886c821a98bcf454643c0861a16d86ad54cfa/checkpoint/model_weights/weights.pth_epoch_199.tar


--------------------------------------------------------------------------------
/data/SetPreparation.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | import os
  3 | import numpy as np
  4 | import cv2
  5 | import shutil
  6 | import sys
  7 | sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
  8 | from euler_angles import EulerAngles
  9 | debug = False
 10 | 
 11 | def rotate(angle, center, landmark):
 12 |     rad = angle * np.pi / 180.0
 13 |     alpha = np.cos(rad)
 14 |     beta = np.sin(rad)
 15 |     M = np.zeros((2,3), dtype=np.float32)
 16 |     M[0, 0] = alpha
 17 |     M[0, 1] = beta
 18 |     M[0, 2] = (1-alpha)*center[0] - beta*center[1]
 19 |     M[1, 0] = -beta
 20 |     M[1, 1] = alpha
 21 |     M[1, 2] = beta*center[0] + (1-alpha)*center[1]
 22 | 
 23 |     landmark_ = np.asarray([(M[0,0]*x+M[0,1]*y+M[0,2],
 24 |                              M[1,0]*x+M[1,1]*y+M[1,2]) for (x,y) in landmark])
 25 |     return M, landmark_
 26 | 
 27 | class ImageDate():
 28 |     def __init__(self, line, imgDir, image_size=112):
 29 |         self.image_size = image_size
 30 |         line = line.strip().split()
 31 |         #0-195: landmark 坐标点  196-199: bbox 坐标点;
 32 |         #200: 姿态(pose)         0->正常姿态(normal pose)          1->大的姿态(large pose)
 33 |         #201: 表情(expression)   0->正常表情(normal expression)    1->夸张的表情(exaggerate expression)
 34 |         #202: 照度(illumination) 0->正常照明(normal illumination)  1->极端照明(extreme illumination)
 35 |         #203: 化妆(make-up)      0->无化妆(no make-up)             1->化妆(make-up)
 36 |         #204: 遮挡(occlusion)    0->无遮挡(no occlusion)           1->遮挡(occlusion)
 37 |         #205: 模糊(blur)         0->清晰(clear)                    1->模糊(blur)
 38 |         #206: 图片名称
 39 |         # print(len(line))
 40 |         # assert(len(line) == 207)
 41 |         self.list = line
 42 |         self.landmark = np.asarray(list(map(float, line[:196])), dtype=np.float32).reshape(-1, 2)
 43 |         self.box = np.asarray(list(map(int, line[196:200])),dtype=np.int32)
 44 |         flag = list(map(int, line[200:206]))
 45 |         flag = list(map(bool, flag))
 46 |         self.pose = flag[0]
 47 |         self.expression = flag[1]
 48 |         self.illumination = flag[2]
 49 |         self.make_up = flag[3]
 50 |         self.occlusion = flag[4]
 51 |         self.blur = flag[5]
 52 |         self.path = os.path.join(imgDir, line[206])
 53 |         self.img = None
 54 | 
 55 |         self.imgs = []
 56 |         self.landmarks = []
 57 |         self.boxes = []
 58 | 
 59 |         self.estimator = EulerAngles()
 60 | 
 61 |     def load_data(self, is_train, repeat):
 62 |         xy = np.min(self.landmark, axis=0).astype(np.int32) 
 63 |         zz = np.max(self.landmark, axis=0).astype(np.int32)
 64 |         wh = zz - xy + 1
 65 | 
 66 |         center = (xy + wh/2).astype(np.int32)
 67 |         img = cv2.imread(self.path)
 68 |         boxsize = int(np.max(wh)*1.2)
 69 |         xy = center - boxsize//2
 70 |         x1, y1 = xy
 71 |         x2, y2 = xy + boxsize
 72 |         height, width, _ = img.shape
 73 |         dx = max(0, -x1)
 74 |         dy = max(0, -y1)
 75 |         x1 = max(0, x1)
 76 |         y1 = max(0, y1)
 77 | 
 78 |         edx = max(0, x2 - width)
 79 |         edy = max(0, y2 - height)
 80 |         x2 = min(width, x2)
 81 |         y2 = min(height, y2)
 82 | 
 83 |         imgT = img[y1:y2, x1:x2]
 84 |         if (dx > 0 or dy > 0 or edx > 0 or edy > 0):
 85 |             imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)
 86 |         if imgT.shape[0] == 0 or imgT.shape[1] == 0:
 87 |             imgTT = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 88 |             for x, y in (self.landmark+0.5).astype(np.int32):
 89 |                 cv2.circle(imgTT, (x, y), 1, (0, 0, 255))
 90 |             cv2.imshow('0', imgTT)
 91 |             if cv2.waitKey(0) == 27:
 92 |                 exit()
 93 |         imgT = cv2.resize(imgT, (self.image_size, self.image_size))
 94 |         landmark = (self.landmark - xy)/boxsize
 95 |         assert (landmark >= 0).all(), str(landmark) + str([dx, dy])
 96 |         assert (landmark <= 1).all(), str(landmark) + str([dx, dy])
 97 |         self.imgs.append(imgT)
 98 |         self.landmarks.append(landmark)
 99 | 
100 |         if is_train:
101 |             while len(self.imgs) < repeat:
102 |                 angle = np.random.randint(-30, 30)
103 |                 cx, cy = center
104 |                 cx = cx + int(np.random.randint(-boxsize*0.1, boxsize*0.1))
105 |                 cy = cy + int(np.random.randint(-boxsize * 0.1, boxsize * 0.1))
106 |                 M, landmark = rotate(angle, (cx,cy), self.landmark)
107 | 
108 |                 imgT = cv2.warpAffine(img, M, (int(img.shape[1]*1.1), int(img.shape[0]*1.1)))
109 | 
110 |                 
111 |                 wh = np.ptp(landmark, axis=0).astype(np.int32) + 1
112 |                 size = np.random.randint(int(np.min(wh)), np.ceil(np.max(wh) * 1.25))
113 |                 xy = np.asarray((cx - size // 2, cy - size//2), dtype=np.int32)
114 |                 landmark = (landmark - xy) / size
115 |                 if (landmark < 0).any() or (landmark > 1).any():
116 |                     continue
117 | 
118 |                 x1, y1 = xy
119 |                 x2, y2 = xy + size
120 |                 height, width, _ = imgT.shape
121 |                 dx = max(0, -x1)
122 |                 dy = max(0, -y1)
123 |                 x1 = max(0, x1)
124 |                 y1 = max(0, y1)
125 | 
126 |                 edx = max(0, x2 - width)
127 |                 edy = max(0, y2 - height)
128 |                 x2 = min(width, x2)
129 |                 y2 = min(height, y2)
130 | 
131 |                 imgT = imgT[y1:y2, x1:x2]
132 |                 if (dx > 0 or dy > 0 or edx >0 or edy > 0):
133 |                     imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)
134 | 
135 |                 imgT = cv2.resize(imgT, (self.image_size, self.image_size))
136 | 
137 |                 if np.random.choice((True, False)):
138 |                     mirror_idx = [   32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12
139 |                     ,11,10,9,8,7,6,5,4,3,2,1,0,46,45,44,43,42,50,49,48,47,37,36,35,
140 |                     34,33,41,40,39,38,51,52,53,54,59,58,57,56,55,72,71,70,69,68,75,
141 |                     74,73,64,63,62,61,60,67,66,65,82,81,80,79,78,77,76,87,86,85,84,
142 |                     83,92,91,90,89,88,95,94,93,97,96]
143 | 
144 |                     landmark[:,0] = 1 - landmark[:,0]
145 |                     landmark = landmark[mirror_idx]
146 |                     imgT = cv2.flip(imgT, 1)
147 |                 self.imgs.append(imgT)
148 |                 self.landmarks.append(landmark)
149 | 
150 |     def save_data(self, path, prefix):
151 |         attributes = [self.pose, self.expression, self.illumination, self.make_up, self.occlusion, self.blur]
152 |         attributes = np.asarray(attributes, dtype=np.int32)
153 |         attributes_str = ' '.join(list(map(str, attributes)))
154 |         labels = []
155 | 
156 |         for i, (img, lanmark) in enumerate(zip(self.imgs, self.landmarks)):
157 |             assert lanmark.shape == (98, 2)
158 |             save_path = os.path.join(path, prefix+'_'+str(i)+'.png')
159 |             assert not os.path.exists(save_path), save_path
160 |             cv2.imwrite(save_path, img)
161 | 
162 |             # euler_angles_landmark = []
163 |             # for index in TRACKED_POINTS:
164 |             #     euler_angles_landmark.append(lanmark[index])
165 |             # euler_angles_landmark = np.asarray(euler_angles_landmark).reshape((-1, 28))
166 |             # euler_angles_landmark = np.asarray(euler_angles_landmark)
167 | 
168 |             rvec, tvec, eulers = self.estimator.eular_angles_from_landmarks(lanmark)
169 |             pitch, yaw, roll = eulers
170 |             euler_angles = np.asarray((pitch, yaw, roll), dtype=np.float32)
171 |             euler_angles_str = ' '.join(list(map(str, euler_angles)))
172 | 
173 |             landmark_str = ' '.join(list(map(str,lanmark.reshape(-1).tolist())))
174 | 
175 |             label = '{} {} {} {}\n'.format(save_path, landmark_str, attributes_str, euler_angles_str)
176 | 
177 |             labels.append(label)
178 |         return labels
179 |         
180 | def get_dataset_list(imgDir, outDir, landmarkDir, is_train):
181 |     with open(landmarkDir,'r') as f:
182 |         lines = f.readlines()
183 |         labels = []
184 |         save_img = os.path.join(outDir, 'images')
185 |         if not os.path.exists(save_img):
186 |             os.mkdir(save_img)
187 | 
188 |         if debug:
189 |             lines = lines[:100]
190 |         for i, line in enumerate(lines):
191 |             Img = ImageDate(line, imgDir)
192 |             img_name = Img.path
193 |             Img.load_data(is_train, 10)
194 |             _, filename = os.path.split(img_name)
195 |             filename, _ = os.path.splitext(filename)
196 |             label_txt = Img.save_data(save_img, str(i)+'_' + filename)
197 |             labels.append(label_txt)
198 |             if ((i + 1) % 100) == 0:
199 |                 print('file: {}/{}'.format(i+1, len(lines)))
200 | 
201 |     with open(os.path.join(outDir, 'annotations.txt'),'w') as f:
202 |         for label in labels:
203 |             f.writelines(label)
204 | 
205 | if __name__ == '__main__':
206 |     root_dir = os.path.dirname(os.path.realpath(__file__))
207 |     imageDirs = 'WFLW/WFLW_images'
208 | 
209 |     landmarkDirs = ['WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_test.txt',
210 |                     'WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt']
211 | 
212 |     outDirs = ['test', 'train']
213 |     for landmarkDir, outDir in zip(landmarkDirs, outDirs):
214 |         outDir = os.path.join(root_dir, outDir)
215 |         print(outDir)
216 |         if os.path.exists(outDir):
217 |             shutil.rmtree(outDir)
218 |         os.mkdir(outDir)
219 |         if 'list_98pt_rect_attr_test.txt' in landmarkDir:
220 |             is_train = False
221 |         else:
222 |             is_train = True
223 |         imgs = get_dataset_list(imageDirs, outDir, landmarkDir, is_train)
224 |     print('end')


--------------------------------------------------------------------------------
/data/WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/README:
--------------------------------------------------------------------------------
 1 | # Look at Boundary: A Boundary-Aware Face Alignment Algorithm.
 2 | # Wayne Wu, Chen Qian, Shuo Yang, Quan Wang, Yici Cai, Qiang Zhou.
 3 | # In CVPR 2018.
 4 | 
 5 | 
 6 | 
 7 | The format of txt ground truth list (7,500 for training and 2,500 for testing).
 8 | 
 9 |   coordinates of 98 landmarks (196) + coordinates of upper left corner and lower right corner of detection rectangle (4) + attributes annotations (6) + image name (1)
10 |   x0 y0 ... x97 y97 x_min_rect y_min_rect x_max_rect y_max_rect pose expression illumination make-up occlusion blur image_name
11 | 
12 | 
13 | 
14 | Attached the mappings between attribute names and label values.
15 | 
16 | 
17 | pose:
18 | 
19 |   normal pose->0
20 |   large pose->1
21 | 
22 | 
23 | expression:
24 | 
25 |   normal expression->0
26 |   exaggerate expression->1
27 | 
28 | 
29 | illumination:
30 | 
31 |   normal illumination->0
32 |   extreme illumination->1
33 | 
34 | 
35 | make-up:
36 | 
37 |   no make-up->0
38 |   make-up->1
39 | 
40 | 
41 | occlusion:
42 | 
43 |   no occlusion->0
44 |   occlusion->1
45 | 
46 | 
47 | blur:
48 | 
49 |   clear->0
50 |   blur->1
51 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: WFLW Dataset module to read images with annotations
  6 | """
  7 | 
  8 | import os, time, enum
  9 | from PIL import Image
 10 | import argparse
 11 | import torch
 12 | from torch.utils.data import Dataset, DataLoader
 13 | import torchvision.transforms.transforms as transforms
 14 | import numpy as np 
 15 | import cv2
 16 | 
 17 | class WFLW_Dataset(Dataset):
 18 |     def __init__(self, root='data', mode='train', transform=None):
 19 |         self.root = root
 20 |         self.transform = transform
 21 | 
 22 |         self.mode = mode 
 23 |         assert mode in ['train', 'test']
 24 | 
 25 |         self.images_root = os.path.join(self.root, self.mode, "images")
 26 |         self.annotations_root = os.path.join(self.root, self.mode, "annotations.txt")
 27 | 
 28 |         self.annotations_file  = open(self.annotations_root ,'r')
 29 |         self.annotations_lines  = self.annotations_file.read().splitlines()
 30 | 
 31 |     def __getitem__(self, index):
 32 | 
 33 |         labels = self.read_annotations(index)
 34 |         image = self.read_image(labels['image_name'])
 35 |         labels['landmarks'] *= 112
 36 |         
 37 |         if self.transform:
 38 |             # to tensor
 39 |             # temp = np.copy(image)
 40 |             # temp = self.transform(temp)
 41 |             image = self.transform(image)
 42 |             # print('image after', image, '\n\n')
 43 |             # print('temp after', temp, '\n\n')
 44 | 
 45 |             # Noramlization Landmarks
 46 |             labels['landmarks'] = self.transform(labels['landmarks']) / 112
 47 |             # print(labels['landmarks'])
 48 | 
 49 |             # to tensor
 50 |             labels['attributes'] = self.transform(labels['attributes'].reshape(1,6))
 51 |             labels['euler_angles'] = self.transform(labels['euler_angles'].reshape(1,3))
 52 | 
 53 |         return image, labels
 54 | 
 55 |     def read_annotations(self, index):
 56 |         annotations = self.annotations_lines[index]
 57 |         annotations = annotations.split(' ')
 58 |             
 59 |         # 98 lanamark points
 60 |         # pose expression illumination make-up occlusion blur
 61 |         # image relative-path 
 62 | 
 63 |         image_name = annotations[0]
 64 |         landmarks = annotations[1:197]
 65 |         attributes = annotations[197:203]
 66 |         euler_angles = annotations[203:206]
 67 | 
 68 |         # strings to num
 69 |         landmarks = [float(landmark) for landmark in landmarks]
 70 |         attributes = [int(attribute) for attribute in attributes]
 71 |         euler_angles = [float(angle) for angle in euler_angles]
 72 |         
 73 |         # list to numpy 
 74 |         landmarks = np.array(landmarks, dtype=np.float).reshape(98,2)
 75 |         attributes = np.array(attributes, dtype=np.int).reshape((6,))
 76 |         euler_angles = np.array(euler_angles, dtype=np.float).reshape((3,))
 77 | 
 78 |         labels = {
 79 |             "landmarks" : landmarks,
 80 |             "attributes": attributes,
 81 |             "image_name": image_name,
 82 |             "euler_angles": euler_angles
 83 |         }
 84 | 
 85 |         return labels
 86 | 
 87 |     def read_image(self, path):
 88 |         # path = os.path.join(path)
 89 |         image = cv2.imread(path, cv2.IMREAD_COLOR)
 90 |         return image        
 91 | 
 92 |     def __len__(self):
 93 |         return len(self.annotations_lines)
 94 | 
 95 | 
 96 | def create_train_loader(root='data', batch_size = 64, transform=transforms.ToTensor()):
 97 |     dataset = WFLW_Dataset(root, mode='train', transform=transform)
 98 |     dataloader = DataLoader(dataset, shuffle=True, batch_size=batch_size)
 99 |     return dataloader
100 | 
101 | def create_test_loader(root='data', batch_size = 1, transform=transforms.ToTensor()):
102 |     dataset = WFLW_Dataset(root, mode='test', transform=transform)
103 |     dataloader = DataLoader(dataset, shuffle=False, batch_size=batch_size)
104 |     return dataloader
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     parser = argparse.ArgumentParser()
109 |     parser.add_argument('--mode', type=str, default='train', choices=['train', 'test'])
110 |     args = parser.parse_args()
111 | 
112 |     dataset = WFLW_Dataset(mode=args.mode, transform= transforms.ToTensor())
113 |     for i in range(len(dataset)):
114 |         image, labels = dataset[i]
115 |     
116 |     # dataloader = create_train_loader(batch_size=1)
117 |     # for image, labels in dataloader:
118 |         
119 |         print("image.shape",image.shape)
120 |         print("landmarks.shape",labels['landmarks'])
121 |         print("euler_angles.shape",labels['euler_angles'])
122 |         print("attributes.shape",labels['attributes'])
123 |         print('***' * 40, '\n')        
124 |         
125 | 
126 |         time.sleep(1)
127 | 


--------------------------------------------------------------------------------
/euler_angles.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Head Pose Euler Angles(pitch yaw roll) estimation from 2D-3D correspondences landmarks
  6 | """
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | 
 11 | class EulerAngles:
 12 |     """
 13 |         Head Pose Estimation from landmarks annotations with solvePnP OpenCV
 14 |         Pitch, Yaw, Roll Rotation angles (eular angles) from 2D-3D Correspondences Landmarks
 15 | 
 16 |         Givin a General 3D Face Model (3D Landmarks) & annotations 2D landmarks
 17 |         2D point = internsic * exterinsic * 3D point_in_world_space
 18 |         if we have 2D - 3D correspondences & internsic camera matrix, 
 19 |         we can use cv2.solvPnP to get the extrensic matrix that convert the world_space to camera_3D_space
 20 |         this extrensic matrix is considered as the 3 eular angles & translation vector
 21 | 
 22 |         we can do that because the faces have similar 3D structure and emotions & iluminations dosn't affect the pose
 23 |         Notes:
 24 |             we can choose get any 3D coord from any 3D face model .. changing translation will not affect the angle
 25 |             it will only inroduce a bigger tvec but the same rotation matrix
 26 |     """
 27 | 
 28 |     def __init__(self,  img_shape=(112,112) ):
 29 |         # Lazy Estimation of Camera internsic Matrix Approximation
 30 |         self.camera_intrensic_matrix = self.estimate_camera_matrix(img_shape)        
 31 |         # 3D Face model 3D landmarks
 32 |         self.landmarks_3D = self.get_face_model_3D_landmarks()
 33 | 
 34 |     def estimate_camera_matrix(self, img_shape):
 35 |         # Used Weak Prespective projection as we assume near object with similar depths
 36 | 
 37 |         # cx, cy the optical centres
 38 |         # translation to image center as image center here is top left corner
 39 |         # focal length is function of image size & Field of View (assumed to be 30 degree)
 40 |         c_x = img_shape[0] / 2
 41 |         c_y = img_shape[1] / 2
 42 |         FieldOfView = 60
 43 |         focal = c_x / np.tan(np.radians(FieldOfView/2))
 44 |         
 45 |         # Approximated Camera intrensic matrix assuming weak prespective
 46 |         return np.float32([
 47 |             [focal, 0.0,    c_x], 
 48 |             [0.0,   focal,  c_y],
 49 |             [0.0,   0.0,    1.0]
 50 |         ])
 51 | 
 52 |     def set_img_shape(self, img_shape):
 53 |         self.camera_intrensic_matrix = self.estimate_camera_matrix(img_shape)
 54 | 
 55 |     def get_face_model_3D_landmarks(self):
 56 |         """
 57 |             General 3D Face Model Coordinates (3D Landmarks) 
 58 |             obtained from antrophometric measurement of the human head.
 59 | 
 60 |             Returns:
 61 |             -------
 62 |             3D_Landmarks: numpy array of shape(N, 3) as N = 11 point in 3D
 63 |         """
 64 |         # X-Y-Z with X pointing forward and Y on the left and Z up (same as LIDAR)
 65 |         # OpenCV Coord X points to the right, Y down, Z to the front (same as 3D Camera)
 66 |         
 67 |         landmarks_3D = np.float32([
 68 |             [6.825897, 6.760612, 4.402142],  # LEFT_EYEBROW_LEFT, 
 69 |             [1.330353, 7.122144, 6.903745],  # LEFT_EYEBROW_RIGHT, 
 70 |             [-1.330353, 7.122144, 6.903745],  # RIGHT_EYEBROW_LEFT,
 71 |             [-6.825897, 6.760612, 4.402142],  # RIGHT_EYEBROW_RIGHT,
 72 |             [5.311432, 5.485328, 3.987654],  # LEFT_EYE_LEFT,
 73 |             [1.789930, 5.393625, 4.413414],  # LEFT_EYE_RIGHT,
 74 |             [-1.789930, 5.393625, 4.413414],  # RIGHT_EYE_LEFT,
 75 |             [-5.311432, 5.485328, 3.987654],  # RIGHT_EYE_RIGHT,
 76 |             [-2.005628, 1.409845, 6.165652],  # NOSE_LEFT,
 77 |             [-2.005628, 1.409845, 6.165652],  # NOSE_RIGHT,
 78 |             [2.774015, -2.080775, 5.048531],  # MOUTH_LEFT,
 79 |             [-2.774015, -2.080775, 5.048531],  # MOUTH_RIGHT,
 80 |             [0.000000, -3.116408, 6.097667],  # LOWER_LIP,
 81 |             [0.000000, -7.415691, 4.070434],  # CHIN
 82 |         ])
 83 | 
 84 |         return landmarks_3D
 85 | 
 86 | 
 87 |     def eular_angles_from_landmarks(self, landmarks_2D):
 88 |         """
 89 |             Estimates Euler angles from 2D landmarks 
 90 |             
 91 |             Parameters:
 92 |             ----------
 93 |             landmarks_2D: numpy array of shape(N, 2) as N is num of landmarks (usualy 98 from WFLW)
 94 | 
 95 |             Returns:
 96 |             -------
 97 |             rvec: rotation numpy array that transform model space to camera space (3D in both)
 98 |             tvec: translation numpy array that transform model space to camera space
 99 |             euler_angles: (pitch yaw roll) in degrees
100 |         """
101 | 
102 |         # WFLW(98 landmark) tracked points
103 |         TRACKED_POINTS_MASK = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
104 |         landmarks_2D = landmarks_2D[TRACKED_POINTS_MASK]
105 | 
106 |         """
107 |             solve for extrensic matrix (rotation & translation) with 2D-3D correspondences
108 |             returns:
109 |                 rvec: rotation vector (as rotation is 3 degree of freedom, it is represented as 3d-vector)
110 |                 tvec: translate vector (world origin position relative to the camera 3d coord system)
111 |                 _ : error -not important-.
112 |         """
113 |         _, rvec, tvec = cv2.solvePnP(self.landmarks_3D, landmarks_2D, self.camera_intrensic_matrix, distCoeffs=None)
114 | 
115 |         """
116 |             note:
117 |                 tvec is almost constant = the world origin coord with respect to the camera
118 |                 avarage value of tvec = [-1,-2,-21]
119 |                 we can use this directly without computing tvec
120 |         """
121 | 
122 |         # convert rotation vector to rotation matrix .. note: function is used for vice versa
123 |         # rotation matrix that transform from model coord(object model 3D space) to the camera 3D coord space
124 |         rotation_matrix, _ = cv2.Rodrigues(rvec)
125 | 
126 |         # [R T] may be used in cv2.decomposeProjectionMatrix(extrinsic)[6]
127 |         extrensic_matrix = np.hstack((rotation_matrix, tvec))    
128 |         
129 |         # decompose the extrensic matrix to many things including the 3 eular angles 
130 |         # (pitch yaw roll) in degrees
131 |         euler_angles = cv2.RQDecomp3x3(rotation_matrix)[0]
132 | 
133 |         return rvec, tvec, euler_angles
134 | 
135 | if __name__ == "__main__":
136 |     from dataset import WFLW_Dataset
137 |     from visualization import WFLW_Visualizer
138 | 
139 |     dataset = WFLW_Dataset(mode='train')
140 |     visualizer = WFLW_Visualizer()
141 |     eular_estimator = EulerAngles()        
142 | 
143 |     for i in range(len(dataset)):
144 |         image, labels = dataset[i]
145 |         landmarks = labels['landmarks']
146 | 
147 |         rvec, tvec, euler_angles = eular_estimator.eular_angles_from_landmarks(landmarks)
148 |         image = visualizer.draw_euler_angles(image, rvec, tvec, euler_angles, eular_estimator.camera_intrensic_matrix)
149 | 
150 |         print("rvec\n", rvec)
151 |         print("tvec\n", tvec)
152 |         print("euler ", euler_angles)        
153 |         print ("*" * 80, '\n\n\t press n for next example .... ESC to exit')
154 |         visualizer.show(image)
155 | 
156 |         if visualizer.user_press == 27:
157 |             cv2.destroyAllWindows()
158 |             break
159 |         
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/face_detector/deploy.prototxt.txt:
--------------------------------------------------------------------------------
   1 | input: "data"
   2 | input_shape {
   3 |   dim: 1
   4 |   dim: 3
   5 |   dim: 300
   6 |   dim: 300
   7 | }
   8 | 
   9 | layer {
  10 |   name: "data_bn"
  11 |   type: "BatchNorm"
  12 |   bottom: "data"
  13 |   top: "data_bn"
  14 |   param {
  15 |     lr_mult: 0.0
  16 |   }
  17 |   param {
  18 |     lr_mult: 0.0
  19 |   }
  20 |   param {
  21 |     lr_mult: 0.0
  22 |   }
  23 | }
  24 | layer {
  25 |   name: "data_scale"
  26 |   type: "Scale"
  27 |   bottom: "data_bn"
  28 |   top: "data_bn"
  29 |   param {
  30 |     lr_mult: 1.0
  31 |     decay_mult: 1.0
  32 |   }
  33 |   param {
  34 |     lr_mult: 2.0
  35 |     decay_mult: 1.0
  36 |   }
  37 |   scale_param {
  38 |     bias_term: true
  39 |   }
  40 | }
  41 | layer {
  42 |   name: "conv1_h"
  43 |   type: "Convolution"
  44 |   bottom: "data_bn"
  45 |   top: "conv1_h"
  46 |   param {
  47 |     lr_mult: 1.0
  48 |     decay_mult: 1.0
  49 |   }
  50 |   param {
  51 |     lr_mult: 2.0
  52 |     decay_mult: 1.0
  53 |   }
  54 |   convolution_param {
  55 |     num_output: 32
  56 |     pad: 3
  57 |     kernel_size: 7
  58 |     stride: 2
  59 |     weight_filler {
  60 |       type: "msra"
  61 |       variance_norm: FAN_OUT
  62 |     }
  63 |     bias_filler {
  64 |       type: "constant"
  65 |       value: 0.0
  66 |     }
  67 |   }
  68 | }
  69 | layer {
  70 |   name: "conv1_bn_h"
  71 |   type: "BatchNorm"
  72 |   bottom: "conv1_h"
  73 |   top: "conv1_h"
  74 |   param {
  75 |     lr_mult: 0.0
  76 |   }
  77 |   param {
  78 |     lr_mult: 0.0
  79 |   }
  80 |   param {
  81 |     lr_mult: 0.0
  82 |   }
  83 | }
  84 | layer {
  85 |   name: "conv1_scale_h"
  86 |   type: "Scale"
  87 |   bottom: "conv1_h"
  88 |   top: "conv1_h"
  89 |   param {
  90 |     lr_mult: 1.0
  91 |     decay_mult: 1.0
  92 |   }
  93 |   param {
  94 |     lr_mult: 2.0
  95 |     decay_mult: 1.0
  96 |   }
  97 |   scale_param {
  98 |     bias_term: true
  99 |   }
 100 | }
 101 | layer {
 102 |   name: "conv1_relu"
 103 |   type: "ReLU"
 104 |   bottom: "conv1_h"
 105 |   top: "conv1_h"
 106 | }
 107 | layer {
 108 |   name: "conv1_pool"
 109 |   type: "Pooling"
 110 |   bottom: "conv1_h"
 111 |   top: "conv1_pool"
 112 |   pooling_param {
 113 |     kernel_size: 3
 114 |     stride: 2
 115 |   }
 116 | }
 117 | layer {
 118 |   name: "layer_64_1_conv1_h"
 119 |   type: "Convolution"
 120 |   bottom: "conv1_pool"
 121 |   top: "layer_64_1_conv1_h"
 122 |   param {
 123 |     lr_mult: 1.0
 124 |     decay_mult: 1.0
 125 |   }
 126 |   convolution_param {
 127 |     num_output: 32
 128 |     bias_term: false
 129 |     pad: 1
 130 |     kernel_size: 3
 131 |     stride: 1
 132 |     weight_filler {
 133 |       type: "msra"
 134 |     }
 135 |     bias_filler {
 136 |       type: "constant"
 137 |       value: 0.0
 138 |     }
 139 |   }
 140 | }
 141 | layer {
 142 |   name: "layer_64_1_bn2_h"
 143 |   type: "BatchNorm"
 144 |   bottom: "layer_64_1_conv1_h"
 145 |   top: "layer_64_1_conv1_h"
 146 |   param {
 147 |     lr_mult: 0.0
 148 |   }
 149 |   param {
 150 |     lr_mult: 0.0
 151 |   }
 152 |   param {
 153 |     lr_mult: 0.0
 154 |   }
 155 | }
 156 | layer {
 157 |   name: "layer_64_1_scale2_h"
 158 |   type: "Scale"
 159 |   bottom: "layer_64_1_conv1_h"
 160 |   top: "layer_64_1_conv1_h"
 161 |   param {
 162 |     lr_mult: 1.0
 163 |     decay_mult: 1.0
 164 |   }
 165 |   param {
 166 |     lr_mult: 2.0
 167 |     decay_mult: 1.0
 168 |   }
 169 |   scale_param {
 170 |     bias_term: true
 171 |   }
 172 | }
 173 | layer {
 174 |   name: "layer_64_1_relu2"
 175 |   type: "ReLU"
 176 |   bottom: "layer_64_1_conv1_h"
 177 |   top: "layer_64_1_conv1_h"
 178 | }
 179 | layer {
 180 |   name: "layer_64_1_conv2_h"
 181 |   type: "Convolution"
 182 |   bottom: "layer_64_1_conv1_h"
 183 |   top: "layer_64_1_conv2_h"
 184 |   param {
 185 |     lr_mult: 1.0
 186 |     decay_mult: 1.0
 187 |   }
 188 |   convolution_param {
 189 |     num_output: 32
 190 |     bias_term: false
 191 |     pad: 1
 192 |     kernel_size: 3
 193 |     stride: 1
 194 |     weight_filler {
 195 |       type: "msra"
 196 |     }
 197 |     bias_filler {
 198 |       type: "constant"
 199 |       value: 0.0
 200 |     }
 201 |   }
 202 | }
 203 | layer {
 204 |   name: "layer_64_1_sum"
 205 |   type: "Eltwise"
 206 |   bottom: "layer_64_1_conv2_h"
 207 |   bottom: "conv1_pool"
 208 |   top: "layer_64_1_sum"
 209 | }
 210 | layer {
 211 |   name: "layer_128_1_bn1_h"
 212 |   type: "BatchNorm"
 213 |   bottom: "layer_64_1_sum"
 214 |   top: "layer_128_1_bn1_h"
 215 |   param {
 216 |     lr_mult: 0.0
 217 |   }
 218 |   param {
 219 |     lr_mult: 0.0
 220 |   }
 221 |   param {
 222 |     lr_mult: 0.0
 223 |   }
 224 | }
 225 | layer {
 226 |   name: "layer_128_1_scale1_h"
 227 |   type: "Scale"
 228 |   bottom: "layer_128_1_bn1_h"
 229 |   top: "layer_128_1_bn1_h"
 230 |   param {
 231 |     lr_mult: 1.0
 232 |     decay_mult: 1.0
 233 |   }
 234 |   param {
 235 |     lr_mult: 2.0
 236 |     decay_mult: 1.0
 237 |   }
 238 |   scale_param {
 239 |     bias_term: true
 240 |   }
 241 | }
 242 | layer {
 243 |   name: "layer_128_1_relu1"
 244 |   type: "ReLU"
 245 |   bottom: "layer_128_1_bn1_h"
 246 |   top: "layer_128_1_bn1_h"
 247 | }
 248 | layer {
 249 |   name: "layer_128_1_conv1_h"
 250 |   type: "Convolution"
 251 |   bottom: "layer_128_1_bn1_h"
 252 |   top: "layer_128_1_conv1_h"
 253 |   param {
 254 |     lr_mult: 1.0
 255 |     decay_mult: 1.0
 256 |   }
 257 |   convolution_param {
 258 |     num_output: 128
 259 |     bias_term: false
 260 |     pad: 1
 261 |     kernel_size: 3
 262 |     stride: 2
 263 |     weight_filler {
 264 |       type: "msra"
 265 |     }
 266 |     bias_filler {
 267 |       type: "constant"
 268 |       value: 0.0
 269 |     }
 270 |   }
 271 | }
 272 | layer {
 273 |   name: "layer_128_1_bn2"
 274 |   type: "BatchNorm"
 275 |   bottom: "layer_128_1_conv1_h"
 276 |   top: "layer_128_1_conv1_h"
 277 |   param {
 278 |     lr_mult: 0.0
 279 |   }
 280 |   param {
 281 |     lr_mult: 0.0
 282 |   }
 283 |   param {
 284 |     lr_mult: 0.0
 285 |   }
 286 | }
 287 | layer {
 288 |   name: "layer_128_1_scale2"
 289 |   type: "Scale"
 290 |   bottom: "layer_128_1_conv1_h"
 291 |   top: "layer_128_1_conv1_h"
 292 |   param {
 293 |     lr_mult: 1.0
 294 |     decay_mult: 1.0
 295 |   }
 296 |   param {
 297 |     lr_mult: 2.0
 298 |     decay_mult: 1.0
 299 |   }
 300 |   scale_param {
 301 |     bias_term: true
 302 |   }
 303 | }
 304 | layer {
 305 |   name: "layer_128_1_relu2"
 306 |   type: "ReLU"
 307 |   bottom: "layer_128_1_conv1_h"
 308 |   top: "layer_128_1_conv1_h"
 309 | }
 310 | layer {
 311 |   name: "layer_128_1_conv2"
 312 |   type: "Convolution"
 313 |   bottom: "layer_128_1_conv1_h"
 314 |   top: "layer_128_1_conv2"
 315 |   param {
 316 |     lr_mult: 1.0
 317 |     decay_mult: 1.0
 318 |   }
 319 |   convolution_param {
 320 |     num_output: 128
 321 |     bias_term: false
 322 |     pad: 1
 323 |     kernel_size: 3
 324 |     stride: 1
 325 |     weight_filler {
 326 |       type: "msra"
 327 |     }
 328 |     bias_filler {
 329 |       type: "constant"
 330 |       value: 0.0
 331 |     }
 332 |   }
 333 | }
 334 | layer {
 335 |   name: "layer_128_1_conv_expand_h"
 336 |   type: "Convolution"
 337 |   bottom: "layer_128_1_bn1_h"
 338 |   top: "layer_128_1_conv_expand_h"
 339 |   param {
 340 |     lr_mult: 1.0
 341 |     decay_mult: 1.0
 342 |   }
 343 |   convolution_param {
 344 |     num_output: 128
 345 |     bias_term: false
 346 |     pad: 0
 347 |     kernel_size: 1
 348 |     stride: 2
 349 |     weight_filler {
 350 |       type: "msra"
 351 |     }
 352 |     bias_filler {
 353 |       type: "constant"
 354 |       value: 0.0
 355 |     }
 356 |   }
 357 | }
 358 | layer {
 359 |   name: "layer_128_1_sum"
 360 |   type: "Eltwise"
 361 |   bottom: "layer_128_1_conv2"
 362 |   bottom: "layer_128_1_conv_expand_h"
 363 |   top: "layer_128_1_sum"
 364 | }
 365 | layer {
 366 |   name: "layer_256_1_bn1"
 367 |   type: "BatchNorm"
 368 |   bottom: "layer_128_1_sum"
 369 |   top: "layer_256_1_bn1"
 370 |   param {
 371 |     lr_mult: 0.0
 372 |   }
 373 |   param {
 374 |     lr_mult: 0.0
 375 |   }
 376 |   param {
 377 |     lr_mult: 0.0
 378 |   }
 379 | }
 380 | layer {
 381 |   name: "layer_256_1_scale1"
 382 |   type: "Scale"
 383 |   bottom: "layer_256_1_bn1"
 384 |   top: "layer_256_1_bn1"
 385 |   param {
 386 |     lr_mult: 1.0
 387 |     decay_mult: 1.0
 388 |   }
 389 |   param {
 390 |     lr_mult: 2.0
 391 |     decay_mult: 1.0
 392 |   }
 393 |   scale_param {
 394 |     bias_term: true
 395 |   }
 396 | }
 397 | layer {
 398 |   name: "layer_256_1_relu1"
 399 |   type: "ReLU"
 400 |   bottom: "layer_256_1_bn1"
 401 |   top: "layer_256_1_bn1"
 402 | }
 403 | layer {
 404 |   name: "layer_256_1_conv1"
 405 |   type: "Convolution"
 406 |   bottom: "layer_256_1_bn1"
 407 |   top: "layer_256_1_conv1"
 408 |   param {
 409 |     lr_mult: 1.0
 410 |     decay_mult: 1.0
 411 |   }
 412 |   convolution_param {
 413 |     num_output: 256
 414 |     bias_term: false
 415 |     pad: 1
 416 |     kernel_size: 3
 417 |     stride: 2
 418 |     weight_filler {
 419 |       type: "msra"
 420 |     }
 421 |     bias_filler {
 422 |       type: "constant"
 423 |       value: 0.0
 424 |     }
 425 |   }
 426 | }
 427 | layer {
 428 |   name: "layer_256_1_bn2"
 429 |   type: "BatchNorm"
 430 |   bottom: "layer_256_1_conv1"
 431 |   top: "layer_256_1_conv1"
 432 |   param {
 433 |     lr_mult: 0.0
 434 |   }
 435 |   param {
 436 |     lr_mult: 0.0
 437 |   }
 438 |   param {
 439 |     lr_mult: 0.0
 440 |   }
 441 | }
 442 | layer {
 443 |   name: "layer_256_1_scale2"
 444 |   type: "Scale"
 445 |   bottom: "layer_256_1_conv1"
 446 |   top: "layer_256_1_conv1"
 447 |   param {
 448 |     lr_mult: 1.0
 449 |     decay_mult: 1.0
 450 |   }
 451 |   param {
 452 |     lr_mult: 2.0
 453 |     decay_mult: 1.0
 454 |   }
 455 |   scale_param {
 456 |     bias_term: true
 457 |   }
 458 | }
 459 | layer {
 460 |   name: "layer_256_1_relu2"
 461 |   type: "ReLU"
 462 |   bottom: "layer_256_1_conv1"
 463 |   top: "layer_256_1_conv1"
 464 | }
 465 | layer {
 466 |   name: "layer_256_1_conv2"
 467 |   type: "Convolution"
 468 |   bottom: "layer_256_1_conv1"
 469 |   top: "layer_256_1_conv2"
 470 |   param {
 471 |     lr_mult: 1.0
 472 |     decay_mult: 1.0
 473 |   }
 474 |   convolution_param {
 475 |     num_output: 256
 476 |     bias_term: false
 477 |     pad: 1
 478 |     kernel_size: 3
 479 |     stride: 1
 480 |     weight_filler {
 481 |       type: "msra"
 482 |     }
 483 |     bias_filler {
 484 |       type: "constant"
 485 |       value: 0.0
 486 |     }
 487 |   }
 488 | }
 489 | layer {
 490 |   name: "layer_256_1_conv_expand"
 491 |   type: "Convolution"
 492 |   bottom: "layer_256_1_bn1"
 493 |   top: "layer_256_1_conv_expand"
 494 |   param {
 495 |     lr_mult: 1.0
 496 |     decay_mult: 1.0
 497 |   }
 498 |   convolution_param {
 499 |     num_output: 256
 500 |     bias_term: false
 501 |     pad: 0
 502 |     kernel_size: 1
 503 |     stride: 2
 504 |     weight_filler {
 505 |       type: "msra"
 506 |     }
 507 |     bias_filler {
 508 |       type: "constant"
 509 |       value: 0.0
 510 |     }
 511 |   }
 512 | }
 513 | layer {
 514 |   name: "layer_256_1_sum"
 515 |   type: "Eltwise"
 516 |   bottom: "layer_256_1_conv2"
 517 |   bottom: "layer_256_1_conv_expand"
 518 |   top: "layer_256_1_sum"
 519 | }
 520 | layer {
 521 |   name: "layer_512_1_bn1"
 522 |   type: "BatchNorm"
 523 |   bottom: "layer_256_1_sum"
 524 |   top: "layer_512_1_bn1"
 525 |   param {
 526 |     lr_mult: 0.0
 527 |   }
 528 |   param {
 529 |     lr_mult: 0.0
 530 |   }
 531 |   param {
 532 |     lr_mult: 0.0
 533 |   }
 534 | }
 535 | layer {
 536 |   name: "layer_512_1_scale1"
 537 |   type: "Scale"
 538 |   bottom: "layer_512_1_bn1"
 539 |   top: "layer_512_1_bn1"
 540 |   param {
 541 |     lr_mult: 1.0
 542 |     decay_mult: 1.0
 543 |   }
 544 |   param {
 545 |     lr_mult: 2.0
 546 |     decay_mult: 1.0
 547 |   }
 548 |   scale_param {
 549 |     bias_term: true
 550 |   }
 551 | }
 552 | layer {
 553 |   name: "layer_512_1_relu1"
 554 |   type: "ReLU"
 555 |   bottom: "layer_512_1_bn1"
 556 |   top: "layer_512_1_bn1"
 557 | }
 558 | layer {
 559 |   name: "layer_512_1_conv1_h"
 560 |   type: "Convolution"
 561 |   bottom: "layer_512_1_bn1"
 562 |   top: "layer_512_1_conv1_h"
 563 |   param {
 564 |     lr_mult: 1.0
 565 |     decay_mult: 1.0
 566 |   }
 567 |   convolution_param {
 568 |     num_output: 128
 569 |     bias_term: false
 570 |     pad: 1
 571 |     kernel_size: 3
 572 |     stride: 1 # 2
 573 |     weight_filler {
 574 |       type: "msra"
 575 |     }
 576 |     bias_filler {
 577 |       type: "constant"
 578 |       value: 0.0
 579 |     }
 580 |   }
 581 | }
 582 | layer {
 583 |   name: "layer_512_1_bn2_h"
 584 |   type: "BatchNorm"
 585 |   bottom: "layer_512_1_conv1_h"
 586 |   top: "layer_512_1_conv1_h"
 587 |   param {
 588 |     lr_mult: 0.0
 589 |   }
 590 |   param {
 591 |     lr_mult: 0.0
 592 |   }
 593 |   param {
 594 |     lr_mult: 0.0
 595 |   }
 596 | }
 597 | layer {
 598 |   name: "layer_512_1_scale2_h"
 599 |   type: "Scale"
 600 |   bottom: "layer_512_1_conv1_h"
 601 |   top: "layer_512_1_conv1_h"
 602 |   param {
 603 |     lr_mult: 1.0
 604 |     decay_mult: 1.0
 605 |   }
 606 |   param {
 607 |     lr_mult: 2.0
 608 |     decay_mult: 1.0
 609 |   }
 610 |   scale_param {
 611 |     bias_term: true
 612 |   }
 613 | }
 614 | layer {
 615 |   name: "layer_512_1_relu2"
 616 |   type: "ReLU"
 617 |   bottom: "layer_512_1_conv1_h"
 618 |   top: "layer_512_1_conv1_h"
 619 | }
 620 | layer {
 621 |   name: "layer_512_1_conv2_h"
 622 |   type: "Convolution"
 623 |   bottom: "layer_512_1_conv1_h"
 624 |   top: "layer_512_1_conv2_h"
 625 |   param {
 626 |     lr_mult: 1.0
 627 |     decay_mult: 1.0
 628 |   }
 629 |   convolution_param {
 630 |     num_output: 256
 631 |     bias_term: false
 632 |     pad: 2 # 1
 633 |     kernel_size: 3
 634 |     stride: 1
 635 |     dilation: 2
 636 |     weight_filler {
 637 |       type: "msra"
 638 |     }
 639 |     bias_filler {
 640 |       type: "constant"
 641 |       value: 0.0
 642 |     }
 643 |   }
 644 | }
 645 | layer {
 646 |   name: "layer_512_1_conv_expand_h"
 647 |   type: "Convolution"
 648 |   bottom: "layer_512_1_bn1"
 649 |   top: "layer_512_1_conv_expand_h"
 650 |   param {
 651 |     lr_mult: 1.0
 652 |     decay_mult: 1.0
 653 |   }
 654 |   convolution_param {
 655 |     num_output: 256
 656 |     bias_term: false
 657 |     pad: 0
 658 |     kernel_size: 1
 659 |     stride: 1 # 2
 660 |     weight_filler {
 661 |       type: "msra"
 662 |     }
 663 |     bias_filler {
 664 |       type: "constant"
 665 |       value: 0.0
 666 |     }
 667 |   }
 668 | }
 669 | layer {
 670 |   name: "layer_512_1_sum"
 671 |   type: "Eltwise"
 672 |   bottom: "layer_512_1_conv2_h"
 673 |   bottom: "layer_512_1_conv_expand_h"
 674 |   top: "layer_512_1_sum"
 675 | }
 676 | layer {
 677 |   name: "last_bn_h"
 678 |   type: "BatchNorm"
 679 |   bottom: "layer_512_1_sum"
 680 |   top: "layer_512_1_sum"
 681 |   param {
 682 |     lr_mult: 0.0
 683 |   }
 684 |   param {
 685 |     lr_mult: 0.0
 686 |   }
 687 |   param {
 688 |     lr_mult: 0.0
 689 |   }
 690 | }
 691 | layer {
 692 |   name: "last_scale_h"
 693 |   type: "Scale"
 694 |   bottom: "layer_512_1_sum"
 695 |   top: "layer_512_1_sum"
 696 |   param {
 697 |     lr_mult: 1.0
 698 |     decay_mult: 1.0
 699 |   }
 700 |   param {
 701 |     lr_mult: 2.0
 702 |     decay_mult: 1.0
 703 |   }
 704 |   scale_param {
 705 |     bias_term: true
 706 |   }
 707 | }
 708 | layer {
 709 |   name: "last_relu"
 710 |   type: "ReLU"
 711 |   bottom: "layer_512_1_sum"
 712 |   top: "fc7"
 713 | }
 714 | 
 715 | layer {
 716 |   name: "conv6_1_h"
 717 |   type: "Convolution"
 718 |   bottom: "fc7"
 719 |   top: "conv6_1_h"
 720 |   param {
 721 |     lr_mult: 1
 722 |     decay_mult: 1
 723 |   }
 724 |   param {
 725 |     lr_mult: 2
 726 |     decay_mult: 0
 727 |   }
 728 |   convolution_param {
 729 |     num_output: 128
 730 |     pad: 0
 731 |     kernel_size: 1
 732 |     stride: 1
 733 |     weight_filler {
 734 |       type: "xavier"
 735 |     }
 736 |     bias_filler {
 737 |       type: "constant"
 738 |       value: 0
 739 |     }
 740 |   }
 741 | }
 742 | layer {
 743 |   name: "conv6_1_relu"
 744 |   type: "ReLU"
 745 |   bottom: "conv6_1_h"
 746 |   top: "conv6_1_h"
 747 | }
 748 | layer {
 749 |   name: "conv6_2_h"
 750 |   type: "Convolution"
 751 |   bottom: "conv6_1_h"
 752 |   top: "conv6_2_h"
 753 |   param {
 754 |     lr_mult: 1
 755 |     decay_mult: 1
 756 |   }
 757 |   param {
 758 |     lr_mult: 2
 759 |     decay_mult: 0
 760 |   }
 761 |   convolution_param {
 762 |     num_output: 256
 763 |     pad: 1
 764 |     kernel_size: 3
 765 |     stride: 2
 766 |     weight_filler {
 767 |       type: "xavier"
 768 |     }
 769 |     bias_filler {
 770 |       type: "constant"
 771 |       value: 0
 772 |     }
 773 |   }
 774 | }
 775 | layer {
 776 |   name: "conv6_2_relu"
 777 |   type: "ReLU"
 778 |   bottom: "conv6_2_h"
 779 |   top: "conv6_2_h"
 780 | }
 781 | layer {
 782 |   name: "conv7_1_h"
 783 |   type: "Convolution"
 784 |   bottom: "conv6_2_h"
 785 |   top: "conv7_1_h"
 786 |   param {
 787 |     lr_mult: 1
 788 |     decay_mult: 1
 789 |   }
 790 |   param {
 791 |     lr_mult: 2
 792 |     decay_mult: 0
 793 |   }
 794 |   convolution_param {
 795 |     num_output: 64
 796 |     pad: 0
 797 |     kernel_size: 1
 798 |     stride: 1
 799 |     weight_filler {
 800 |       type: "xavier"
 801 |     }
 802 |     bias_filler {
 803 |       type: "constant"
 804 |       value: 0
 805 |     }
 806 |   }
 807 | }
 808 | layer {
 809 |   name: "conv7_1_relu"
 810 |   type: "ReLU"
 811 |   bottom: "conv7_1_h"
 812 |   top: "conv7_1_h"
 813 | }
 814 | layer {
 815 |   name: "conv7_2_h"
 816 |   type: "Convolution"
 817 |   bottom: "conv7_1_h"
 818 |   top: "conv7_2_h"
 819 |   param {
 820 |     lr_mult: 1
 821 |     decay_mult: 1
 822 |   }
 823 |   param {
 824 |     lr_mult: 2
 825 |     decay_mult: 0
 826 |   }
 827 |   convolution_param {
 828 |     num_output: 128
 829 |     pad: 1
 830 |     kernel_size: 3
 831 |     stride: 2
 832 |     weight_filler {
 833 |       type: "xavier"
 834 |     }
 835 |     bias_filler {
 836 |       type: "constant"
 837 |       value: 0
 838 |     }
 839 |   }
 840 | }
 841 | layer {
 842 |   name: "conv7_2_relu"
 843 |   type: "ReLU"
 844 |   bottom: "conv7_2_h"
 845 |   top: "conv7_2_h"
 846 | }
 847 | layer {
 848 |   name: "conv8_1_h"
 849 |   type: "Convolution"
 850 |   bottom: "conv7_2_h"
 851 |   top: "conv8_1_h"
 852 |   param {
 853 |     lr_mult: 1
 854 |     decay_mult: 1
 855 |   }
 856 |   param {
 857 |     lr_mult: 2
 858 |     decay_mult: 0
 859 |   }
 860 |   convolution_param {
 861 |     num_output: 64
 862 |     pad: 0
 863 |     kernel_size: 1
 864 |     stride: 1
 865 |     weight_filler {
 866 |       type: "xavier"
 867 |     }
 868 |     bias_filler {
 869 |       type: "constant"
 870 |       value: 0
 871 |     }
 872 |   }
 873 | }
 874 | layer {
 875 |   name: "conv8_1_relu"
 876 |   type: "ReLU"
 877 |   bottom: "conv8_1_h"
 878 |   top: "conv8_1_h"
 879 | }
 880 | layer {
 881 |   name: "conv8_2_h"
 882 |   type: "Convolution"
 883 |   bottom: "conv8_1_h"
 884 |   top: "conv8_2_h"
 885 |   param {
 886 |     lr_mult: 1
 887 |     decay_mult: 1
 888 |   }
 889 |   param {
 890 |     lr_mult: 2
 891 |     decay_mult: 0
 892 |   }
 893 |   convolution_param {
 894 |     num_output: 128
 895 |     pad: 0
 896 |     kernel_size: 3
 897 |     stride: 1
 898 |     weight_filler {
 899 |       type: "xavier"
 900 |     }
 901 |     bias_filler {
 902 |       type: "constant"
 903 |       value: 0
 904 |     }
 905 |   }
 906 | }
 907 | layer {
 908 |   name: "conv8_2_relu"
 909 |   type: "ReLU"
 910 |   bottom: "conv8_2_h"
 911 |   top: "conv8_2_h"
 912 | }
 913 | layer {
 914 |   name: "conv9_1_h"
 915 |   type: "Convolution"
 916 |   bottom: "conv8_2_h"
 917 |   top: "conv9_1_h"
 918 |   param {
 919 |     lr_mult: 1
 920 |     decay_mult: 1
 921 |   }
 922 |   param {
 923 |     lr_mult: 2
 924 |     decay_mult: 0
 925 |   }
 926 |   convolution_param {
 927 |     num_output: 64
 928 |     pad: 0
 929 |     kernel_size: 1
 930 |     stride: 1
 931 |     weight_filler {
 932 |       type: "xavier"
 933 |     }
 934 |     bias_filler {
 935 |       type: "constant"
 936 |       value: 0
 937 |     }
 938 |   }
 939 | }
 940 | layer {
 941 |   name: "conv9_1_relu"
 942 |   type: "ReLU"
 943 |   bottom: "conv9_1_h"
 944 |   top: "conv9_1_h"
 945 | }
 946 | layer {
 947 |   name: "conv9_2_h"
 948 |   type: "Convolution"
 949 |   bottom: "conv9_1_h"
 950 |   top: "conv9_2_h"
 951 |   param {
 952 |     lr_mult: 1
 953 |     decay_mult: 1
 954 |   }
 955 |   param {
 956 |     lr_mult: 2
 957 |     decay_mult: 0
 958 |   }
 959 |   convolution_param {
 960 |     num_output: 128
 961 |     pad: 0
 962 |     kernel_size: 3
 963 |     stride: 1
 964 |     weight_filler {
 965 |       type: "xavier"
 966 |     }
 967 |     bias_filler {
 968 |       type: "constant"
 969 |       value: 0
 970 |     }
 971 |   }
 972 | }
 973 | layer {
 974 |   name: "conv9_2_relu"
 975 |   type: "ReLU"
 976 |   bottom: "conv9_2_h"
 977 |   top: "conv9_2_h"
 978 | }
 979 | layer {
 980 |   name: "conv4_3_norm"
 981 |   type: "Normalize"
 982 |   bottom: "layer_256_1_bn1"
 983 |   top: "conv4_3_norm"
 984 |   norm_param {
 985 |     across_spatial: false
 986 |     scale_filler {
 987 |       type: "constant"
 988 |       value: 20
 989 |     }
 990 |     channel_shared: false
 991 |   }
 992 | }
 993 | layer {
 994 |   name: "conv4_3_norm_mbox_loc"
 995 |   type: "Convolution"
 996 |   bottom: "conv4_3_norm"
 997 |   top: "conv4_3_norm_mbox_loc"
 998 |   param {
 999 |     lr_mult: 1
1000 |     decay_mult: 1
1001 |   }
1002 |   param {
1003 |     lr_mult: 2
1004 |     decay_mult: 0
1005 |   }
1006 |   convolution_param {
1007 |     num_output: 16
1008 |     pad: 1
1009 |     kernel_size: 3
1010 |     stride: 1
1011 |     weight_filler {
1012 |       type: "xavier"
1013 |     }
1014 |     bias_filler {
1015 |       type: "constant"
1016 |       value: 0
1017 |     }
1018 |   }
1019 | }
1020 | layer {
1021 |   name: "conv4_3_norm_mbox_loc_perm"
1022 |   type: "Permute"
1023 |   bottom: "conv4_3_norm_mbox_loc"
1024 |   top: "conv4_3_norm_mbox_loc_perm"
1025 |   permute_param {
1026 |     order: 0
1027 |     order: 2
1028 |     order: 3
1029 |     order: 1
1030 |   }
1031 | }
1032 | layer {
1033 |   name: "conv4_3_norm_mbox_loc_flat"
1034 |   type: "Flatten"
1035 |   bottom: "conv4_3_norm_mbox_loc_perm"
1036 |   top: "conv4_3_norm_mbox_loc_flat"
1037 |   flatten_param {
1038 |     axis: 1
1039 |   }
1040 | }
1041 | layer {
1042 |   name: "conv4_3_norm_mbox_conf"
1043 |   type: "Convolution"
1044 |   bottom: "conv4_3_norm"
1045 |   top: "conv4_3_norm_mbox_conf"
1046 |   param {
1047 |     lr_mult: 1
1048 |     decay_mult: 1
1049 |   }
1050 |   param {
1051 |     lr_mult: 2
1052 |     decay_mult: 0
1053 |   }
1054 |   convolution_param {
1055 |     num_output: 8 # 84
1056 |     pad: 1
1057 |     kernel_size: 3
1058 |     stride: 1
1059 |     weight_filler {
1060 |       type: "xavier"
1061 |     }
1062 |     bias_filler {
1063 |       type: "constant"
1064 |       value: 0
1065 |     }
1066 |   }
1067 | }
1068 | layer {
1069 |   name: "conv4_3_norm_mbox_conf_perm"
1070 |   type: "Permute"
1071 |   bottom: "conv4_3_norm_mbox_conf"
1072 |   top: "conv4_3_norm_mbox_conf_perm"
1073 |   permute_param {
1074 |     order: 0
1075 |     order: 2
1076 |     order: 3
1077 |     order: 1
1078 |   }
1079 | }
1080 | layer {
1081 |   name: "conv4_3_norm_mbox_conf_flat"
1082 |   type: "Flatten"
1083 |   bottom: "conv4_3_norm_mbox_conf_perm"
1084 |   top: "conv4_3_norm_mbox_conf_flat"
1085 |   flatten_param {
1086 |     axis: 1
1087 |   }
1088 | }
1089 | layer {
1090 |   name: "conv4_3_norm_mbox_priorbox"
1091 |   type: "PriorBox"
1092 |   bottom: "conv4_3_norm"
1093 |   bottom: "data"
1094 |   top: "conv4_3_norm_mbox_priorbox"
1095 |   prior_box_param {
1096 |     min_size: 30.0
1097 |     max_size: 60.0
1098 |     aspect_ratio: 2
1099 |     flip: true
1100 |     clip: false
1101 |     variance: 0.1
1102 |     variance: 0.1
1103 |     variance: 0.2
1104 |     variance: 0.2
1105 |     step: 8
1106 |     offset: 0.5
1107 |   }
1108 | }
1109 | layer {
1110 |   name: "fc7_mbox_loc"
1111 |   type: "Convolution"
1112 |   bottom: "fc7"
1113 |   top: "fc7_mbox_loc"
1114 |   param {
1115 |     lr_mult: 1
1116 |     decay_mult: 1
1117 |   }
1118 |   param {
1119 |     lr_mult: 2
1120 |     decay_mult: 0
1121 |   }
1122 |   convolution_param {
1123 |     num_output: 24
1124 |     pad: 1
1125 |     kernel_size: 3
1126 |     stride: 1
1127 |     weight_filler {
1128 |       type: "xavier"
1129 |     }
1130 |     bias_filler {
1131 |       type: "constant"
1132 |       value: 0
1133 |     }
1134 |   }
1135 | }
1136 | layer {
1137 |   name: "fc7_mbox_loc_perm"
1138 |   type: "Permute"
1139 |   bottom: "fc7_mbox_loc"
1140 |   top: "fc7_mbox_loc_perm"
1141 |   permute_param {
1142 |     order: 0
1143 |     order: 2
1144 |     order: 3
1145 |     order: 1
1146 |   }
1147 | }
1148 | layer {
1149 |   name: "fc7_mbox_loc_flat"
1150 |   type: "Flatten"
1151 |   bottom: "fc7_mbox_loc_perm"
1152 |   top: "fc7_mbox_loc_flat"
1153 |   flatten_param {
1154 |     axis: 1
1155 |   }
1156 | }
1157 | layer {
1158 |   name: "fc7_mbox_conf"
1159 |   type: "Convolution"
1160 |   bottom: "fc7"
1161 |   top: "fc7_mbox_conf"
1162 |   param {
1163 |     lr_mult: 1
1164 |     decay_mult: 1
1165 |   }
1166 |   param {
1167 |     lr_mult: 2
1168 |     decay_mult: 0
1169 |   }
1170 |   convolution_param {
1171 |     num_output: 12 # 126
1172 |     pad: 1
1173 |     kernel_size: 3
1174 |     stride: 1
1175 |     weight_filler {
1176 |       type: "xavier"
1177 |     }
1178 |     bias_filler {
1179 |       type: "constant"
1180 |       value: 0
1181 |     }
1182 |   }
1183 | }
1184 | layer {
1185 |   name: "fc7_mbox_conf_perm"
1186 |   type: "Permute"
1187 |   bottom: "fc7_mbox_conf"
1188 |   top: "fc7_mbox_conf_perm"
1189 |   permute_param {
1190 |     order: 0
1191 |     order: 2
1192 |     order: 3
1193 |     order: 1
1194 |   }
1195 | }
1196 | layer {
1197 |   name: "fc7_mbox_conf_flat"
1198 |   type: "Flatten"
1199 |   bottom: "fc7_mbox_conf_perm"
1200 |   top: "fc7_mbox_conf_flat"
1201 |   flatten_param {
1202 |     axis: 1
1203 |   }
1204 | }
1205 | layer {
1206 |   name: "fc7_mbox_priorbox"
1207 |   type: "PriorBox"
1208 |   bottom: "fc7"
1209 |   bottom: "data"
1210 |   top: "fc7_mbox_priorbox"
1211 |   prior_box_param {
1212 |     min_size: 60.0
1213 |     max_size: 111.0
1214 |     aspect_ratio: 2
1215 |     aspect_ratio: 3
1216 |     flip: true
1217 |     clip: false
1218 |     variance: 0.1
1219 |     variance: 0.1
1220 |     variance: 0.2
1221 |     variance: 0.2
1222 |     step: 16
1223 |     offset: 0.5
1224 |   }
1225 | }
1226 | layer {
1227 |   name: "conv6_2_mbox_loc"
1228 |   type: "Convolution"
1229 |   bottom: "conv6_2_h"
1230 |   top: "conv6_2_mbox_loc"
1231 |   param {
1232 |     lr_mult: 1
1233 |     decay_mult: 1
1234 |   }
1235 |   param {
1236 |     lr_mult: 2
1237 |     decay_mult: 0
1238 |   }
1239 |   convolution_param {
1240 |     num_output: 24
1241 |     pad: 1
1242 |     kernel_size: 3
1243 |     stride: 1
1244 |     weight_filler {
1245 |       type: "xavier"
1246 |     }
1247 |     bias_filler {
1248 |       type: "constant"
1249 |       value: 0
1250 |     }
1251 |   }
1252 | }
1253 | layer {
1254 |   name: "conv6_2_mbox_loc_perm"
1255 |   type: "Permute"
1256 |   bottom: "conv6_2_mbox_loc"
1257 |   top: "conv6_2_mbox_loc_perm"
1258 |   permute_param {
1259 |     order: 0
1260 |     order: 2
1261 |     order: 3
1262 |     order: 1
1263 |   }
1264 | }
1265 | layer {
1266 |   name: "conv6_2_mbox_loc_flat"
1267 |   type: "Flatten"
1268 |   bottom: "conv6_2_mbox_loc_perm"
1269 |   top: "conv6_2_mbox_loc_flat"
1270 |   flatten_param {
1271 |     axis: 1
1272 |   }
1273 | }
1274 | layer {
1275 |   name: "conv6_2_mbox_conf"
1276 |   type: "Convolution"
1277 |   bottom: "conv6_2_h"
1278 |   top: "conv6_2_mbox_conf"
1279 |   param {
1280 |     lr_mult: 1
1281 |     decay_mult: 1
1282 |   }
1283 |   param {
1284 |     lr_mult: 2
1285 |     decay_mult: 0
1286 |   }
1287 |   convolution_param {
1288 |     num_output: 12 # 126
1289 |     pad: 1
1290 |     kernel_size: 3
1291 |     stride: 1
1292 |     weight_filler {
1293 |       type: "xavier"
1294 |     }
1295 |     bias_filler {
1296 |       type: "constant"
1297 |       value: 0
1298 |     }
1299 |   }
1300 | }
1301 | layer {
1302 |   name: "conv6_2_mbox_conf_perm"
1303 |   type: "Permute"
1304 |   bottom: "conv6_2_mbox_conf"
1305 |   top: "conv6_2_mbox_conf_perm"
1306 |   permute_param {
1307 |     order: 0
1308 |     order: 2
1309 |     order: 3
1310 |     order: 1
1311 |   }
1312 | }
1313 | layer {
1314 |   name: "conv6_2_mbox_conf_flat"
1315 |   type: "Flatten"
1316 |   bottom: "conv6_2_mbox_conf_perm"
1317 |   top: "conv6_2_mbox_conf_flat"
1318 |   flatten_param {
1319 |     axis: 1
1320 |   }
1321 | }
1322 | layer {
1323 |   name: "conv6_2_mbox_priorbox"
1324 |   type: "PriorBox"
1325 |   bottom: "conv6_2_h"
1326 |   bottom: "data"
1327 |   top: "conv6_2_mbox_priorbox"
1328 |   prior_box_param {
1329 |     min_size: 111.0
1330 |     max_size: 162.0
1331 |     aspect_ratio: 2
1332 |     aspect_ratio: 3
1333 |     flip: true
1334 |     clip: false
1335 |     variance: 0.1
1336 |     variance: 0.1
1337 |     variance: 0.2
1338 |     variance: 0.2
1339 |     step: 32
1340 |     offset: 0.5
1341 |   }
1342 | }
1343 | layer {
1344 |   name: "conv7_2_mbox_loc"
1345 |   type: "Convolution"
1346 |   bottom: "conv7_2_h"
1347 |   top: "conv7_2_mbox_loc"
1348 |   param {
1349 |     lr_mult: 1
1350 |     decay_mult: 1
1351 |   }
1352 |   param {
1353 |     lr_mult: 2
1354 |     decay_mult: 0
1355 |   }
1356 |   convolution_param {
1357 |     num_output: 24
1358 |     pad: 1
1359 |     kernel_size: 3
1360 |     stride: 1
1361 |     weight_filler {
1362 |       type: "xavier"
1363 |     }
1364 |     bias_filler {
1365 |       type: "constant"
1366 |       value: 0
1367 |     }
1368 |   }
1369 | }
1370 | layer {
1371 |   name: "conv7_2_mbox_loc_perm"
1372 |   type: "Permute"
1373 |   bottom: "conv7_2_mbox_loc"
1374 |   top: "conv7_2_mbox_loc_perm"
1375 |   permute_param {
1376 |     order: 0
1377 |     order: 2
1378 |     order: 3
1379 |     order: 1
1380 |   }
1381 | }
1382 | layer {
1383 |   name: "conv7_2_mbox_loc_flat"
1384 |   type: "Flatten"
1385 |   bottom: "conv7_2_mbox_loc_perm"
1386 |   top: "conv7_2_mbox_loc_flat"
1387 |   flatten_param {
1388 |     axis: 1
1389 |   }
1390 | }
1391 | layer {
1392 |   name: "conv7_2_mbox_conf"
1393 |   type: "Convolution"
1394 |   bottom: "conv7_2_h"
1395 |   top: "conv7_2_mbox_conf"
1396 |   param {
1397 |     lr_mult: 1
1398 |     decay_mult: 1
1399 |   }
1400 |   param {
1401 |     lr_mult: 2
1402 |     decay_mult: 0
1403 |   }
1404 |   convolution_param {
1405 |     num_output: 12 # 126
1406 |     pad: 1
1407 |     kernel_size: 3
1408 |     stride: 1
1409 |     weight_filler {
1410 |       type: "xavier"
1411 |     }
1412 |     bias_filler {
1413 |       type: "constant"
1414 |       value: 0
1415 |     }
1416 |   }
1417 | }
1418 | layer {
1419 |   name: "conv7_2_mbox_conf_perm"
1420 |   type: "Permute"
1421 |   bottom: "conv7_2_mbox_conf"
1422 |   top: "conv7_2_mbox_conf_perm"
1423 |   permute_param {
1424 |     order: 0
1425 |     order: 2
1426 |     order: 3
1427 |     order: 1
1428 |   }
1429 | }
1430 | layer {
1431 |   name: "conv7_2_mbox_conf_flat"
1432 |   type: "Flatten"
1433 |   bottom: "conv7_2_mbox_conf_perm"
1434 |   top: "conv7_2_mbox_conf_flat"
1435 |   flatten_param {
1436 |     axis: 1
1437 |   }
1438 | }
1439 | layer {
1440 |   name: "conv7_2_mbox_priorbox"
1441 |   type: "PriorBox"
1442 |   bottom: "conv7_2_h"
1443 |   bottom: "data"
1444 |   top: "conv7_2_mbox_priorbox"
1445 |   prior_box_param {
1446 |     min_size: 162.0
1447 |     max_size: 213.0
1448 |     aspect_ratio: 2
1449 |     aspect_ratio: 3
1450 |     flip: true
1451 |     clip: false
1452 |     variance: 0.1
1453 |     variance: 0.1
1454 |     variance: 0.2
1455 |     variance: 0.2
1456 |     step: 64
1457 |     offset: 0.5
1458 |   }
1459 | }
1460 | layer {
1461 |   name: "conv8_2_mbox_loc"
1462 |   type: "Convolution"
1463 |   bottom: "conv8_2_h"
1464 |   top: "conv8_2_mbox_loc"
1465 |   param {
1466 |     lr_mult: 1
1467 |     decay_mult: 1
1468 |   }
1469 |   param {
1470 |     lr_mult: 2
1471 |     decay_mult: 0
1472 |   }
1473 |   convolution_param {
1474 |     num_output: 16
1475 |     pad: 1
1476 |     kernel_size: 3
1477 |     stride: 1
1478 |     weight_filler {
1479 |       type: "xavier"
1480 |     }
1481 |     bias_filler {
1482 |       type: "constant"
1483 |       value: 0
1484 |     }
1485 |   }
1486 | }
1487 | layer {
1488 |   name: "conv8_2_mbox_loc_perm"
1489 |   type: "Permute"
1490 |   bottom: "conv8_2_mbox_loc"
1491 |   top: "conv8_2_mbox_loc_perm"
1492 |   permute_param {
1493 |     order: 0
1494 |     order: 2
1495 |     order: 3
1496 |     order: 1
1497 |   }
1498 | }
1499 | layer {
1500 |   name: "conv8_2_mbox_loc_flat"
1501 |   type: "Flatten"
1502 |   bottom: "conv8_2_mbox_loc_perm"
1503 |   top: "conv8_2_mbox_loc_flat"
1504 |   flatten_param {
1505 |     axis: 1
1506 |   }
1507 | }
1508 | layer {
1509 |   name: "conv8_2_mbox_conf"
1510 |   type: "Convolution"
1511 |   bottom: "conv8_2_h"
1512 |   top: "conv8_2_mbox_conf"
1513 |   param {
1514 |     lr_mult: 1
1515 |     decay_mult: 1
1516 |   }
1517 |   param {
1518 |     lr_mult: 2
1519 |     decay_mult: 0
1520 |   }
1521 |   convolution_param {
1522 |     num_output: 8 # 84
1523 |     pad: 1
1524 |     kernel_size: 3
1525 |     stride: 1
1526 |     weight_filler {
1527 |       type: "xavier"
1528 |     }
1529 |     bias_filler {
1530 |       type: "constant"
1531 |       value: 0
1532 |     }
1533 |   }
1534 | }
1535 | layer {
1536 |   name: "conv8_2_mbox_conf_perm"
1537 |   type: "Permute"
1538 |   bottom: "conv8_2_mbox_conf"
1539 |   top: "conv8_2_mbox_conf_perm"
1540 |   permute_param {
1541 |     order: 0
1542 |     order: 2
1543 |     order: 3
1544 |     order: 1
1545 |   }
1546 | }
1547 | layer {
1548 |   name: "conv8_2_mbox_conf_flat"
1549 |   type: "Flatten"
1550 |   bottom: "conv8_2_mbox_conf_perm"
1551 |   top: "conv8_2_mbox_conf_flat"
1552 |   flatten_param {
1553 |     axis: 1
1554 |   }
1555 | }
1556 | layer {
1557 |   name: "conv8_2_mbox_priorbox"
1558 |   type: "PriorBox"
1559 |   bottom: "conv8_2_h"
1560 |   bottom: "data"
1561 |   top: "conv8_2_mbox_priorbox"
1562 |   prior_box_param {
1563 |     min_size: 213.0
1564 |     max_size: 264.0
1565 |     aspect_ratio: 2
1566 |     flip: true
1567 |     clip: false
1568 |     variance: 0.1
1569 |     variance: 0.1
1570 |     variance: 0.2
1571 |     variance: 0.2
1572 |     step: 100
1573 |     offset: 0.5
1574 |   }
1575 | }
1576 | layer {
1577 |   name: "conv9_2_mbox_loc"
1578 |   type: "Convolution"
1579 |   bottom: "conv9_2_h"
1580 |   top: "conv9_2_mbox_loc"
1581 |   param {
1582 |     lr_mult: 1
1583 |     decay_mult: 1
1584 |   }
1585 |   param {
1586 |     lr_mult: 2
1587 |     decay_mult: 0
1588 |   }
1589 |   convolution_param {
1590 |     num_output: 16
1591 |     pad: 1
1592 |     kernel_size: 3
1593 |     stride: 1
1594 |     weight_filler {
1595 |       type: "xavier"
1596 |     }
1597 |     bias_filler {
1598 |       type: "constant"
1599 |       value: 0
1600 |     }
1601 |   }
1602 | }
1603 | layer {
1604 |   name: "conv9_2_mbox_loc_perm"
1605 |   type: "Permute"
1606 |   bottom: "conv9_2_mbox_loc"
1607 |   top: "conv9_2_mbox_loc_perm"
1608 |   permute_param {
1609 |     order: 0
1610 |     order: 2
1611 |     order: 3
1612 |     order: 1
1613 |   }
1614 | }
1615 | layer {
1616 |   name: "conv9_2_mbox_loc_flat"
1617 |   type: "Flatten"
1618 |   bottom: "conv9_2_mbox_loc_perm"
1619 |   top: "conv9_2_mbox_loc_flat"
1620 |   flatten_param {
1621 |     axis: 1
1622 |   }
1623 | }
1624 | layer {
1625 |   name: "conv9_2_mbox_conf"
1626 |   type: "Convolution"
1627 |   bottom: "conv9_2_h"
1628 |   top: "conv9_2_mbox_conf"
1629 |   param {
1630 |     lr_mult: 1
1631 |     decay_mult: 1
1632 |   }
1633 |   param {
1634 |     lr_mult: 2
1635 |     decay_mult: 0
1636 |   }
1637 |   convolution_param {
1638 |     num_output: 8 # 84
1639 |     pad: 1
1640 |     kernel_size: 3
1641 |     stride: 1
1642 |     weight_filler {
1643 |       type: "xavier"
1644 |     }
1645 |     bias_filler {
1646 |       type: "constant"
1647 |       value: 0
1648 |     }
1649 |   }
1650 | }
1651 | layer {
1652 |   name: "conv9_2_mbox_conf_perm"
1653 |   type: "Permute"
1654 |   bottom: "conv9_2_mbox_conf"
1655 |   top: "conv9_2_mbox_conf_perm"
1656 |   permute_param {
1657 |     order: 0
1658 |     order: 2
1659 |     order: 3
1660 |     order: 1
1661 |   }
1662 | }
1663 | layer {
1664 |   name: "conv9_2_mbox_conf_flat"
1665 |   type: "Flatten"
1666 |   bottom: "conv9_2_mbox_conf_perm"
1667 |   top: "conv9_2_mbox_conf_flat"
1668 |   flatten_param {
1669 |     axis: 1
1670 |   }
1671 | }
1672 | layer {
1673 |   name: "conv9_2_mbox_priorbox"
1674 |   type: "PriorBox"
1675 |   bottom: "conv9_2_h"
1676 |   bottom: "data"
1677 |   top: "conv9_2_mbox_priorbox"
1678 |   prior_box_param {
1679 |     min_size: 264.0
1680 |     max_size: 315.0
1681 |     aspect_ratio: 2
1682 |     flip: true
1683 |     clip: false
1684 |     variance: 0.1
1685 |     variance: 0.1
1686 |     variance: 0.2
1687 |     variance: 0.2
1688 |     step: 300
1689 |     offset: 0.5
1690 |   }
1691 | }
1692 | layer {
1693 |   name: "mbox_loc"
1694 |   type: "Concat"
1695 |   bottom: "conv4_3_norm_mbox_loc_flat"
1696 |   bottom: "fc7_mbox_loc_flat"
1697 |   bottom: "conv6_2_mbox_loc_flat"
1698 |   bottom: "conv7_2_mbox_loc_flat"
1699 |   bottom: "conv8_2_mbox_loc_flat"
1700 |   bottom: "conv9_2_mbox_loc_flat"
1701 |   top: "mbox_loc"
1702 |   concat_param {
1703 |     axis: 1
1704 |   }
1705 | }
1706 | layer {
1707 |   name: "mbox_conf"
1708 |   type: "Concat"
1709 |   bottom: "conv4_3_norm_mbox_conf_flat"
1710 |   bottom: "fc7_mbox_conf_flat"
1711 |   bottom: "conv6_2_mbox_conf_flat"
1712 |   bottom: "conv7_2_mbox_conf_flat"
1713 |   bottom: "conv8_2_mbox_conf_flat"
1714 |   bottom: "conv9_2_mbox_conf_flat"
1715 |   top: "mbox_conf"
1716 |   concat_param {
1717 |     axis: 1
1718 |   }
1719 | }
1720 | layer {
1721 |   name: "mbox_priorbox"
1722 |   type: "Concat"
1723 |   bottom: "conv4_3_norm_mbox_priorbox"
1724 |   bottom: "fc7_mbox_priorbox"
1725 |   bottom: "conv6_2_mbox_priorbox"
1726 |   bottom: "conv7_2_mbox_priorbox"
1727 |   bottom: "conv8_2_mbox_priorbox"
1728 |   bottom: "conv9_2_mbox_priorbox"
1729 |   top: "mbox_priorbox"
1730 |   concat_param {
1731 |     axis: 2
1732 |   }
1733 | }
1734 | 
1735 | layer {
1736 |   name: "mbox_conf_reshape"
1737 |   type: "Reshape"
1738 |   bottom: "mbox_conf"
1739 |   top: "mbox_conf_reshape"
1740 |   reshape_param {
1741 |     shape {
1742 |       dim: 0
1743 |       dim: -1
1744 |       dim: 2
1745 |     }
1746 |   }
1747 | }
1748 | layer {
1749 |   name: "mbox_conf_softmax"
1750 |   type: "Softmax"
1751 |   bottom: "mbox_conf_reshape"
1752 |   top: "mbox_conf_softmax"
1753 |   softmax_param {
1754 |     axis: 2
1755 |   }
1756 | }
1757 | layer {
1758 |   name: "mbox_conf_flatten"
1759 |   type: "Flatten"
1760 |   bottom: "mbox_conf_softmax"
1761 |   top: "mbox_conf_flatten"
1762 |   flatten_param {
1763 |     axis: 1
1764 |   }
1765 | }
1766 | 
1767 | layer {
1768 |   name: "detection_out"
1769 |   type: "DetectionOutput"
1770 |   bottom: "mbox_loc"
1771 |   bottom: "mbox_conf_flatten"
1772 |   bottom: "mbox_priorbox"
1773 |   top: "detection_out"
1774 |   include {
1775 |     phase: TEST
1776 |   }
1777 |   detection_output_param {
1778 |     num_classes: 2
1779 |     share_location: true
1780 |     background_label_id: 0
1781 |     nms_param {
1782 |       nms_threshold: 0.45
1783 |       top_k: 400
1784 |     }
1785 |     code_type: CENTER_SIZE
1786 |     keep_top_k: 200
1787 |     confidence_threshold: 0.01
1788 |     clip: 1
1789 |   }
1790 | }
1791 | 


--------------------------------------------------------------------------------
/face_detector/face_detector.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import os
 4 | 
 5 | # Abstract class / Interface
 6 | class FaceDetectorIface:
 7 |     def detect_faces(self, frame):
 8 |         raise NotImplementedError
 9 | 
10 | class HaarCascadeDetector(FaceDetectorIface):
11 |     def __init__(self, root=None):
12 |         self.path = "haarcascade_frontalface_default.xml"
13 |         if root:
14 |             self.path = os.path.join(root, self.path)
15 | 
16 |         self.detector = cv2.CascadeClassifier(self.path)
17 | 
18 |     def detect_faces(self, frame):
19 |         faces = self.detector.detectMultiScale(frame)
20 |         return faces
21 | 
22 | class DnnDetector(FaceDetectorIface):
23 |     """
24 |         SSD (Single Shot Detectors) based face detection (ResNet-18 backbone(light feature extractor))
25 |     """
26 |     def __init__(self, root=None):
27 |         self.prototxt = "deploy.prototxt.txt"
28 |         self.model_weights = "res10_300x300_ssd_iter_140000.caffemodel"
29 | 
30 |         if root:
31 |             self.prototxt = os.path.join(root, self.prototxt)
32 |             self.model_weights = os.path.join(root, self.model_weights)
33 | 
34 |         self.detector = cv2.dnn.readNetFromCaffe(prototxt=self.prototxt, caffeModel=self.model_weights)
35 |         self.threshold = 0.5 # to remove weak detections
36 | 
37 |     def detect_faces(self,frame):
38 |         h = frame.shape[0]
39 |         w = frame.shape[1]
40 | 
41 |         # required preprocessing(mean & variance(scale) & size) to use the dnn model
42 |         """
43 |             Problem of not detecting small faces if the image is big (720p or 1080p)
44 |             because we resize to 300,300 ... but if we use the original size it will detect right but so slow
45 |         """
46 |         resized_frame = cv2.resize(frame, (300, 300))
47 |         blob = cv2.dnn.blobFromImage(resized_frame, 1.0, resized_frame.shape[0:2], (104.0, 177.0, 123.0))
48 |         # detect
49 |         self.detector.setInput(blob)
50 |         detections = self.detector.forward()
51 |         faces = []
52 |         # shape 2 is num of detections
53 |         for i in range(detections.shape[2]):
54 |             confidence = detections[0,0,i,2] 
55 |             if confidence < self.threshold:
56 |                 continue
57 | 
58 |             # model output is percentage of bbox dims
59 |             box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
60 |             box = box.astype("int")
61 |             (x1,y1, x2,y2) = box
62 | 
63 |             # x,y,w,h
64 |             faces.append((x1,y1,x2-x1,y2-y1))
65 |             # print(confidence)
66 |         return faces
67 | 


--------------------------------------------------------------------------------
/face_detector/res10_300x300_ssd_iter_140000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AmrElsersy/PFLD-Pytorch-Landmarks/087886c821a98bcf454643c0861a16d86ad54cfa/face_detector/res10_300x300_ssd_iter_140000.caffemodel


--------------------------------------------------------------------------------
/generate_dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Dataset Augumentation & Generation
  6 | """
  7 | 
  8 | import os, time
  9 | import argparse
 10 | from numpy.lib.type_check import imag
 11 | import math
 12 | import numpy as np 
 13 | import cv2
 14 | from euler_angles import EulerAngles
 15 | 
 16 | # ============= Data Augumentation =============
 17 | from utils import flip, resize
 18 | 
 19 | class Data_Augumentor:
 20 |     """
 21 |         Data Augumentation
 22 |         - reads dataset annotations and preprocess data & augument it
 23 |         - generates new & clean dataset ready to be used.
 24 |     """
 25 |     def __init__(self, n_augumentation=5):
 26 |         self.n_augumentation = n_augumentation
 27 |         self.face_shape = (112,112)
 28 |         self.theta1 = 15
 29 |         self.theta2 = 30
 30 |         self.euler_estimator = EulerAngles()
 31 | 
 32 |         self.root = 'data'
 33 |         self.train_path = os.path.join(self.root, 'train')
 34 |         self.test_path = os.path.join(self.root, 'test')
 35 | 
 36 |         self.images_root = os.path.join(self.root, 'WFLW', "WFLW_images")
 37 |         train_test_root = os.path.join(self.root, 'WFLW', "WFLW_annotations", "list_98pt_rect_attr_train_test")
 38 |         train_name = os.path.join(train_test_root, "list_98pt_rect_attr_train.txt")
 39 |         test_name = os.path.join(train_test_root, "list_98pt_rect_attr_test.txt")
 40 |         test_file  = open(test_name ,'r')
 41 |         train_file = open(train_name,'r')
 42 |         # the important ones
 43 |         self.test_lines  = test_file.read().splitlines()
 44 |         self.train_lines = train_file.read().splitlines()
 45 | 
 46 |     def generate_dataset(self, mode='train'):
 47 |         assert mode in ['train', 'test']
 48 |         try:
 49 |             if mode == 'train':
 50 |                 os.mkdir(self.train_path)
 51 |                 os.mkdir(os.path.join(self.train_path, 'images'))
 52 |             else:
 53 |                 os.mkdir(self.test_path)
 54 |                 os.mkdir(os.path.join(self.test_path, 'images'))           
 55 |             print(f'created data/{mode} folder')
 56 |         except:
 57 |             print(f"data/{mode} folder already exist .. delete it to generate a new dataset")
 58 |             return
 59 | 
 60 |         lines = self.train_lines if mode == 'train' else self.test_lines
 61 |         save_path = self.train_path if mode == 'train' else self.test_path
 62 | 
 63 |         # annotation for all train/test dataset strings
 64 |         all_annotations_str = []
 65 | 
 66 |         k = 0
 67 |         for annotations_line in lines:
 68 |             # read annotations
 69 |             annotations = self.read_annotations(annotations_line)
 70 |             image_full_path = annotations['path']
 71 |             image = self.read_image(image_full_path)
 72 |             rect = annotations['rect']
 73 |             landmarks = annotations['landmarks']
 74 |             attributes = annotations['attributes']
 75 | 
 76 |             # ============= Data Augumentation =================
 77 |             all_images = []
 78 |             all_landmarks = []
 79 | 
 80 |             if mode == 'test':
 81 |                 image, landmarks, skip = self.crop_face_landmarks(image, landmarks, False)
 82 |                 if skip:
 83 |                     continue
 84 |                 all_images = [image]
 85 |                 all_landmarks = [landmarks]
 86 |             else:
 87 |                 for i in range(self.n_augumentation):
 88 |                     angle = np.random.randint(-30, 30)
 89 | 
 90 |                     augument_image, augument_landmarks = self.rotate(np.copy(image), np.copy(landmarks), angle)
 91 |                     augument_image, augument_landmarks, skip = self.crop_face_landmarks(augument_image, augument_landmarks)
 92 |                     if skip:
 93 |                         continue
 94 |                     
 95 |                     if np.random.choice((True, False)):
 96 |                         augument_image, augument_landmarks = flip(augument_image, augument_landmarks)
 97 | 
 98 |                     # # visualize
 99 |                     # img = np.copy(augument_image)
100 |                     # for point in augument_landmarks:
101 |                     #     point = (int(point[0]), int(point[1]))
102 |                     #     cv2.circle(img, point, 1, (0,255,0), -1)
103 |                     # # img = cv2.resize(img, (300,300))
104 |                     # cv2.imshow("image", img)
105 |                     # if cv2.waitKey(0) == 27:
106 |                     #     exit(0)
107 |                     # print("*"*80)
108 | 
109 |                     all_images.append(augument_image)
110 |                     all_landmarks.append(augument_landmarks)
111 | 
112 |             # for every augumented image
113 |             for i, img in enumerate(all_images):
114 |                 img = all_images[i]
115 |                 landmark = all_landmarks[i] / 112
116 | 
117 |                 # generate euler angles from landmarks
118 |                 _, _, euler_angles = self.euler_estimator.eular_angles_from_landmarks(landmark)
119 |                 euler_str = ' '.join([str(round(angle,2)) for angle in euler_angles])
120 | 
121 |                 # get image name
122 |                 new_image_path = self.save_image(img, image_full_path, k, i, save_path) # id should be unique for every img
123 | 
124 |                 # convert landmarks to string
125 |                 landmarks_list = landmark.reshape(196,).tolist()
126 |                 landmarks_str = ' '.join([str(l) for l in landmarks_list])
127 | 
128 |                 # attributes list to string
129 |                 attributes_str = ' '.join([str(attribute) for attribute in attributes])
130 | 
131 |                 # annotation string = image_name + 98 landmarks + attributes + euler
132 |                 new_annotation = ' '.join([new_image_path, landmarks_str, attributes_str, euler_str])
133 |                 all_annotations_str.append(new_annotation)
134 |                 # print(new_annotation)
135 | 
136 |             k += 1
137 |             if k % 100 == 0:
138 |                 print(f'{mode} dataset: {k} generated data')
139 | 
140 |         # ========= Save annotations ===============
141 |         one_annotation_str = '\n'.join([annotation for annotation in all_annotations_str])
142 |         annotations_path = os.path.join(save_path, 'annotations.txt')
143 |         annotations_file = open(annotations_path, 'w')
144 |         annotations_file.write(one_annotation_str)
145 |         annotations_file.close()
146 |         print('*'*60,f'\n\t {mode} annotations is saved @ data/{mode}/annotations.txt')
147 |         time.sleep(2)
148 | 
149 |     def rotate(self, image, landmarks, theta):
150 |         top_left = np.min(landmarks, axis=0).astype(np.int32) 
151 |         bottom_right = np.max(landmarks, axis=0).astype(np.int32)
152 |         wh = bottom_right - top_left + 1
153 |         center = (top_left + wh/2).astype(np.int32)
154 |         boxsize = int(np.max(wh)*1.2)
155 |         cx, cy = center
156 | 
157 |         # random shift
158 |         cx += int(np.random.randint(-boxsize*0.1, boxsize*0.1))
159 |         cy += int(np.random.randint(-boxsize*0.1, boxsize*0.1))
160 | 
161 |         center = (cx, cy)
162 | 
163 |         # get translation-rotation matrix numpy array shape (2,3) has rotation and last column is translation
164 |         # note that it translate the coord to the origin apply the rotation then translate it again to cente
165 |         rotation_matrix = cv2.getRotationMatrix2D(center, theta, 1)
166 |         # to keep all the boxes is visible as some boundary boxes may dissapear during rotation
167 |         shape_factor = 1.1
168 |         h, w = image.shape[:2]
169 |         new_shape = (int(w*shape_factor), int(h*shape_factor))
170 |         image = cv2.warpAffine(image, rotation_matrix, new_shape)
171 | 
172 |         # add homoginous 1 to 2D landmarks to be able to use the same translation-rotation matrix
173 |         landmarks =np.hstack((landmarks, np.ones((98, 1))))
174 |         landmarks = (rotation_matrix @ landmarks.T).T
175 | 
176 |         # for point in landmarks:
177 |         #     point = (int(point[0]), int(point[1]))
178 |         #     cv2.circle(image, point, 0, (0,0,255), -1)
179 |         # ima = cv2.resize(image, (500,500))
180 |         # cv2.imshow("image", ima)
181 |         # if cv2.waitKey(0) == 27:
182 |         #     exit(0)
183 | 
184 |         return image, landmarks
185 | 
186 |     def crop_face_landmarks(self, image, landmarks, is_scaled=True):
187 |         # max (x,y) together & the min is the boundary of bbox
188 |         top_left = np.min(landmarks, axis=0).astype(np.int32) 
189 |         bottom_right = np.max(landmarks, axis=0).astype(np.int32)
190 |         
191 |         x1,y1 = top_left
192 |         x2,y2 = bottom_right
193 |         rect = [(x1, y1), (x2, y2)]
194 |  
195 |         if is_scaled:
196 |             wh = np.ptp(landmarks, axis=0).astype(np.int32) + 1
197 |             scaled_size = np.random.randint(int(np.min(wh)), np.ceil(np.max(wh) * 1.25))            
198 | 
199 |             (x1, y1), (x2, y2) = self.scale_rect(rect, scaled_size, image.shape)
200 |         else:
201 |             (x1, y1), (x2, y2) = self.scale_rect2(rect, 0.2, image.shape)
202 | 
203 | 
204 |         if x1 == x2 or y1 == y2:
205 |             return None, None, True
206 | 
207 |         # landmarks normalization
208 |         landmarks -= (x1,y1)
209 |         landmarks = landmarks / [x2-x1, y2-y1] 
210 | 
211 |         # when rotation is applied, boundary parts of image may disapear & landmarks will be out of the image shape
212 |         if (landmarks < 0).any() or (landmarks > 1).any() :
213 |             return None, None, True
214 | 
215 |         # crop
216 |         image = image[int(y1):int(y2), int(x1):int(x2)]
217 |         # resize
218 |         image = cv2.resize(image, self.face_shape)
219 |         landmarks *= 112
220 | 
221 |         # skip this image if any of top left coord has a big -ve
222 |         # because this will lead to a big shift to landmarks & wrong annotations
223 |         skip = False
224 |         min_neg = min(x1,y1)
225 |         if min_neg < -5:
226 |             skip = True
227 | 
228 |         return image, landmarks, skip
229 | 
230 |     def scale_rect(self, rect, factor, big_img_shape):
231 |         (x1, y1), (x2, y2) = rect        
232 |         cx = (x1+x2) // 2
233 |         cy = (y1+y2) // 2    
234 | 
235 |         top_left = np.asarray((cx - factor // 2, cy - factor//2), dtype=np.int32)
236 |         bottom_right = top_left + (factor, factor)
237 | 
238 |         (x1,y1) = top_left
239 |         (x2,y2) = bottom_right
240 | 
241 |         x1 = max(x1, 0)
242 |         y1 = max(y1, 0)
243 |         h_max, w_max = big_img_shape[:2]
244 |         y2 = min(y2, h_max)
245 |         x2 = min(x2, w_max)
246 |         rect[0] = (x1,y1)
247 |         rect[1] = (x2,y2)
248 |         return np.array(rect).astype(np.int32)
249 |  
250 |     def scale_rect2(self, rect, factor, big_img_shape):
251 |         (x1, y1), (x2, y2) = rect            
252 |         rect_dw = (x2 - x1) * factor
253 |         rect_dy = (y2 - y1) * factor
254 |         x1 -= rect_dw/2
255 |         x2 += rect_dw/2
256 |         y1 -= rect_dy/2
257 |         y2 += rect_dy/2
258 |         x1 = max(x1, 0)
259 |         y1 = max(y1, 0)
260 |         h_max, w_max = big_img_shape[:2]
261 |         y2 = min(y2, h_max)
262 |         x2 = min(x2, w_max)
263 |         rect[0] = (x1,y1)
264 |         rect[1] = (x2,y2)
265 |         return np.array(rect).astype(np.int32)
266 | 
267 |     def crop_face_rect(self, image, rect, landmarks):
268 |         (x1, y1), (x2, y2) = rect           
269 |         image = image[int(y1):int(y2), int(x1):int(x2)]
270 | 
271 |         # resize the image & store the dims to resize landmarks
272 |         h, w = image.shape[:2]
273 |         image = cv2.resize(image, self.face_shape)
274 | 
275 |         # scale factor in x & y to scale the landmarks
276 |         new_h, new_w = self.face_shape
277 |         fx = new_w / w
278 |         fy = new_h / h
279 |         # translate the landmarks then scale them
280 |         landmarks -= rect[0]
281 |         for landmark in landmarks:
282 |             landmark[0] *= fx
283 |             landmark[1] *= fy
284 | 
285 |         # face rect
286 |         rect[0] = (0,0)
287 |         rect[1] = (x2-x1, y2-y1)
288 | 
289 |         return image, rect, landmarks
290 | 
291 |     def save_image(self, img, full_name, k, id, save_path):
292 |         full_name = full_name.split('/')
293 |         image_name = full_name[-1][:-4] + '_' + str(k) + '_' + str(id) + '.jpg'
294 |         image_path = os.path.join(save_path, 'images', image_name)
295 |         cv2.imwrite(image_path, img)
296 |         return image_path
297 | 
298 |     def read_annotations(self, annotations):
299 |         annotations = annotations.split(' ')
300 | 
301 |         landmarks = annotations[0:196]
302 |         rect = annotations[196:200]        
303 |         attributes = annotations[200:206]
304 |         image_path = annotations[206]
305 | 
306 |         landmarks = [float(landmark) for landmark in landmarks]        
307 |         landmarks = np.array(landmarks, dtype=np.float).reshape(98,2)
308 |         rect = [float(coord) for coord in rect]
309 |         rect = np.array(rect, dtype=np.float).reshape((2,2))
310 | 
311 |         return {
312 |             'landmarks': landmarks,
313 |             'rect' : rect,
314 |             'attributes': attributes,
315 |             'path': image_path
316 |         }
317 | 
318 |     def read_image(self, name):
319 |         path = os.path.join(self.images_root, name)
320 |         image = cv2.imread(path, cv2.IMREAD_COLOR)
321 |         return image        
322 | 
323 | def main():
324 |     parser = argparse.ArgumentParser()
325 |     parser.add_argument('--mode', type=str, choices=['train', 'test'], default='train')
326 |     parser.add_argument('--n', type=int, default=5, help='number of augumented images per image')
327 |     args = parser.parse_args()
328 | 
329 |     augumentor = Data_Augumentor(args.n)
330 |     augumentor.generate_dataset(args.mode)
331 |     
332 | if __name__ == "__main__":
333 |     main()
334 | 


--------------------------------------------------------------------------------
/model/BottleneckResidual.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Amr Elsersy
 3 | email: amrelsersay@gmail.com
 4 | -----------------------------------------------------------------------------------
 5 | Description: Inverted Resuedial Linear Block used in PFLD backbone
 6 | """
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | class BottleneckResidualBlock(nn.Module):
11 |     """
12 |         Inverted Resuedial Linear Block from MobileNetv2 paper
13 |         Uses Depth wise Conv & Residual & Expant-Squeeze 
14 |     """
15 |     def __init__(self, in_channels, out_channels, expand_factor=2, stride = 1, padding=1, force_residual=False):
16 |         super(BottleneckResidualBlock, self).__init__()
17 | 
18 |         # residual component is not used in case of stride = 1 & in_n = out_n  
19 |         assert stride in [1,2]
20 |         self.use_residual_component = True if stride == 1 and in_channels == out_channels else False
21 | 
22 |         # Expantion 1x1 Conv to increase num of channels
23 |         expand_channels = in_channels * expand_factor
24 |         self.expantion_pointwise_conv = nn.Conv2d(in_channels, expand_channels, kernel_size=1, stride=1, bias=False)
25 |         self.bn1 = nn.BatchNorm2d(expand_channels)
26 |         # ============================= we modified it from ReLU6 to normal ReLU ===================
27 |         self.relu = nn.ReLU(inplace=True)
28 |         self.relu2 = nn.ReLU(inplace=True)
29 | 
30 |         # Depth wise 3x3 Conv 
31 |         self.depth_wise_conv = nn.Conv2d(expand_channels, expand_channels, kernel_size=3, stride=stride, 
32 |                                         groups=expand_channels, padding=padding, bias=False)
33 |         self.bn2 = nn.BatchNorm2d(expand_channels)
34 | 
35 |         # Squeeze (Projection) 1x1 Conv to reduce n_channels to match the initial number of channels
36 |         self.squeeze_pointwise_conv = nn.Conv2d(expand_channels, out_channels, kernel_size=1, stride=1, bias=False)
37 |         self.bn3 = nn.BatchNorm2d(out_channels)
38 | 
39 |     """ Notes:
40 |         - if expand factor = 1, we will skip the expantion layer, but in PFLD it is never = 1
41 |         - Bottleneck Residual solves the problem of low quality feature extraction in low resolution (small dim tensors),
42 |           as it expands the dims to a higher resolution then apply depth conv then squeeze it again.
43 |         - it also solves the problem of very deep networks using residual.
44 |         - it also have small size of parameters as it seperate the depth wise conv from point wise
45 |         - it is called inverted residual as it follow the approach of narrow-wide-narrow instead of wide-narrow-wide
46 |         - it is called linear because it has linear activation(identity) at the last layer(squeeze_pointwise)
47 |     """
48 |     def forward_without_res(self, x):
49 |         # expantion 1x1 conv
50 |         x = self.expantion_pointwise_conv(x)
51 |         x = self.bn1(x)
52 |         x = self.relu(x)
53 |         # depth wise 3x3 conv
54 |         x = self.depth_wise_conv(x)
55 |         x = self.bn2(x)
56 |         x = self.relu2(x)
57 |         # squeeze 1x1 conv
58 |         x = self.squeeze_pointwise_conv(x)
59 |         x = self.bn3(x)
60 |         return x
61 | 
62 |     def forward(self, x):
63 |         if self.use_residual_component:
64 |             return x + self.forward_without_res(x)
65 |         else:
66 |             return self.forward_without_res(x)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     bottleneck = BottleneckResidualBlock(4, 4, expand_factor=2, stride=1, padding=1)
71 |     x = torch.randn((2,4,1280,720)) # batch, channels, W, H
72 |     print("input_shape:", x.shape)
73 |     y = bottleneck(x)
74 |     print("output_shape:", y.shape)
75 | 


--------------------------------------------------------------------------------
/model/DepthSepConv.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Amr Elsersy
 3 | email: amrelsersay@gmail.com
 4 | -----------------------------------------------------------------------------------
 5 | Description: Depth Wise Separable Conv2D used in PFLD 
 6 | """
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | class DepthSepConvBlock(nn.Module):
11 |     """
12 |         Depth wise Separable Convolution Block proposed in MobileNet
13 |         Used in PFLD backbone
14 |     """
15 |     def __init__(self, in_channels, out_channels):
16 |         super(DepthSepConvBlock, self).__init__()
17 |         """
18 |             Notes:
19 |                 - groups parameter: perform a groups of conv 
20 |                 https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
21 |                 - Separates channel expantion from depth wise conv 
22 |         """
23 |         self.depth_wise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, groups=in_channels,padding=1)
24 |         self.bn1 = nn.BatchNorm2d(in_channels)
25 |         self.relu = nn.ReLU(inplace=True)
26 |         self.point_wise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1)
27 |         self.bn2 = nn.BatchNorm2d(out_channels)
28 | 
29 |     def forward(self, x):
30 |         # depth wise
31 |         x = self.depth_wise_conv(x)
32 |         x = self.bn1(x)
33 |         x = self.relu(x)
34 |         # point wise
35 |         x = self.point_wise_conv(x)
36 |         x = self.bn2(x)
37 |         x = self.relu(x)
38 | 
39 |         return x
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     depth_conv = DepthSepConvBlock(3,10)
44 |     print(depth_conv)
45 |     x = torch.randn((4, 3,112,112)) # batch, Channels, W, H
46 |     print(x.shape)
47 |     y = depth_conv(x)
48 |     print(y.shape)
49 | 


--------------------------------------------------------------------------------
/model/Loss.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Amr Elsersy
 3 | email: amrelsersay@gmail.com
 4 | -----------------------------------------------------------------------------------
 5 | Description: PFLD Loss
 6 | """
 7 | import torch
 8 | import torch.nn as nn
 9 | import numpy as np 
10 | 
11 | """
12 |     Weighted L2 Loss Function that computes the  Sum(weight * |y` - y|^2)
13 |     Sum is denoted for landmarks num
14 |     then it is avaraged over the batch examples
15 | 
16 |     weight is function of euler angles & attributes of each example
17 | """
18 | 
19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20 | 
21 | class PFLD_L2Loss(nn.Module):
22 |     def __init__(self):
23 |         super(PFLD_L2Loss, self).__init__()
24 |         self.l2_loss = nn.MSELoss(reduction='sum')
25 |         
26 | 
27 |     def forward(self, landmarks, gt_landmarks, angles, gt_angles, attributes):
28 | 
29 |         diff = (angles-gt_angles)
30 |         # it should be converted to radians .. but since diff is small the weight will be allways samll, so it is better to deal with degrees
31 |         # to_radians = 0.0174532925
32 |         # diff *= to_radians 
33 |         angles_weight = torch.sum(1-torch.cos(diff), axis=1)
34 |         
35 |         # attributes weight .... v2
36 |         attributes = attributes.float()
37 |         attributes_weight = torch.sum(attributes, axis=1)        
38 | 
39 |         # if we don't get the max .. all attributes =0 so weight will be 0 even if there is an error in
40 |         # landmarks & angle, so we add a hing 1 to that weight to limit that .. same for angles
41 |         attributes_weight += 1
42 |         angles_weight += 1
43 | 
44 |         # L2 Landmarks Loss
45 |         # shape (batch_size, 1) ... mean on both axes(1,2) to sum all x & all y seperatly them sum them
46 |         landmarks_loss = torch.sum((landmarks-gt_landmarks)**2, 1)
47 | 
48 |         # print("landmakrs loss", landmarks_loss)
49 |         # print(f"\nangles_weight: {angles_weight}") 
50 |         # print(f"\nattributes_weight: {attributes_weight}")
51 |         # mean on batch size
52 |         return torch.mean(attributes_weight * angles_weight * landmarks_loss) , torch.mean(landmarks_loss)
53 | 
54 | if __name__ == "__main__":
55 |     batch_size= 1
56 |     landmarks = torch.randn((batch_size, 196)) * 255
57 |     gt_landmarks = torch.randn((batch_size, 196)) * 255
58 | 
59 |     angles = torch.randn((batch_size, 3)) * 360
60 |     gt_angles = torch.randn((batch_size, 3)) * 360
61 |     # attributes = torch.randn((batch_size,6))
62 |     attributes = torch.zeros((batch_size, 6))
63 |     # attributes[:, 1] = 0
64 |     # attributes[0:2, 3:5] = 0
65 |     # attributes[:,3:5] = 1
66 |     loss = PFLD_L2Loss()
67 |     print(loss(landmarks, gt_landmarks, angles, gt_angles, attributes))
68 | 


--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: PFLD & Auxiliary Models for landmarks detection & head pose estimation
  6 | """
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | import sys
 11 | sys.path.insert(1, 'model')
 12 | 
 13 | from DepthSepConv import DepthSepConvBlock
 14 | from BottleneckResidual import BottleneckResidualBlock
 15 | 
 16 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 17 | 
 18 | def ConvBlock(in_channels, out_channels, kernel_size=3, stride=1, padding=0):
 19 |     return nn.Sequential(
 20 |         nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
 21 |         nn.BatchNorm2d(out_channels),
 22 |         nn.ReLU(inplace=True)
 23 |     )
 24 | 
 25 | def ConvRelu(in_channels, out_channels, kernel_size=3, stride=1, padding=0):
 26 |     return nn.Sequential(
 27 |         nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
 28 |         nn.ReLU(inplace=True)
 29 |     )
 30 | 
 31 | class PFLD(nn.Module):
 32 |     def __init__(self, device_cpu = False):
 33 |         super(PFLD, self).__init__()
 34 |         self.device = torch.device('cpu') if device_cpu else device
 35 | 
 36 |         self.conv = ConvBlock(in_channels=3, out_channels=64, stride=2, padding=1)
 37 |         self.depth_wise_conv = DepthSepConvBlock(in_channels=64, out_channels=64).to(device)
 38 | 
 39 |         # 1 Bottlenck Non-Resiudal(as stride =2) used for reducing tensor dim size
 40 |         self.bottleneck_1_first = BottleneckResidualBlock(64, 64, expand_factor=2, stride=2).to(device)
 41 |         # 4 Bottleneck Residual Blocks with the same in/out channel size
 42 |         self.bottleneck_1 = nn.ModuleList([BottleneckResidualBlock(64, 64, expand_factor=2, stride=1).to(device) for i in range(3)])
 43 |         self.bottleneck_1_last = BottleneckResidualBlock(64, 64, expand_factor=2, stride=1).to(device)
 44 | 
 45 |         # 1 Bottleneck to expand channel size
 46 |         self.bottleneck_2 = BottleneckResidualBlock(64, 128, expand_factor=2, stride=2).to(device)        
 47 |         
 48 |         # 6 Bottleneck Resiudal Blocks with the same in/out channel size
 49 |         self.bottleneck_3 = nn.ModuleList([BottleneckResidualBlock(128,128, expand_factor=4, stride=1).to(device) for i in range(6)])
 50 |         self.bottleneck_3[0].use_residual_component = False
 51 | 
 52 |         # last Bottleneck to reduce channel size
 53 |         self.bottleneck_4 = BottleneckResidualBlock(128, 16, expand_factor=2, stride=1).to(device) #16x 14x14
 54 | 
 55 |         # last layers S1 & S2 & S3 used together as input to the head as a multi scale features
 56 |         self.conv1 = ConvBlock(in_channels=16, out_channels=32, stride=2, padding=1) # 16x 14x14 -> 32x 7x7
 57 |         self.conv2 = ConvRelu(in_channels=32, out_channels=128, kernel_size=7) # 32x 7x7 -> 128x 1x1 
 58 | 
 59 |         # avg pooling is used to flatten the output of the last conv layers
 60 |         self.avg_pool1 = nn.AvgPool2d(kernel_size=14)
 61 |         self.avg_pool2 = nn.AvgPool2d(kernel_size=7)
 62 |         # input = 16(flatten of bottleneck4) + 32(flatten of conv1) + 128(flatten of conv2)
 63 |         self.fc = nn.Linear(176, 196)
 64 |         
 65 |     def forward(self, x):
 66 |         x = self.conv(x)
 67 |         x = self.depth_wise_conv(x)
 68 |         
 69 |         # ======== bottleneck 1 ========
 70 |         x = self.bottleneck_1_first(x) 
 71 |         for block1 in self.bottleneck_1:
 72 |             x = block1(x)
 73 | 
 74 |         # Auxiliary Network takes that branch as its input
 75 |         features_auxiliary = self.bottleneck_1_last(x)
 76 |         
 77 |         # ======== bottleneck 2 ========
 78 |         x = self.bottleneck_2(features_auxiliary)
 79 | 
 80 |         # bottleneck 3
 81 |         for block3 in self.bottleneck_3:
 82 |             x = block3(x)
 83 | 
 84 |         # ======== bottleneck 4 ========
 85 |         x = self.bottleneck_4(x)
 86 | 
 87 |         # ======== S1 & S2 & S3 ========
 88 |         s1 = self.avg_pool1(x)
 89 |         s1 = s1.view(s1.shape[0], -1)
 90 | 
 91 |         x = self.conv1(x)
 92 |         s2 = self.avg_pool2(x)
 93 |         s2 = s2.view(s2.shape[0], -1)
 94 | 
 95 |         s3 = self.conv2(x)
 96 |         s3 = s3.view(s3.shape[0], -1)
 97 | 
 98 |         # 176 = 16 + 32 + 128 of s1 + s2 + s3 concatination
 99 |         multi_scale_features = torch.cat([s1,s2,s3], dim=1)
100 |         landmarks = self.fc(multi_scale_features)
101 |         return features_auxiliary, landmarks
102 | 
103 | 
104 | class AuxiliaryNet(nn.Module):
105 |     """
106 |         Head Pose Estimation Net
107 |         Euler Angles Regression
108 |     """
109 |     def __init__(self):
110 |         super(AuxiliaryNet, self).__init__()
111 | 
112 |         self.conv1 = nn.Conv2d(in_channels=64,  out_channels= 128, kernel_size=3, stride=2, padding=1)
113 |         self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
114 |         self.conv3 = nn.Conv2d(in_channels=128, out_channels=32,  kernel_size=3, stride=2, padding=1)
115 |         self.conv4 = nn.Conv2d(in_channels=32,  out_channels=128,  kernel_size=7, stride=1, padding=1)
116 |         self.max_pool = nn.MaxPool2d(3)
117 |         self.fc1 = nn.Linear(128, 32)
118 |         self.fc2 = nn.Linear(32, 3)
119 | 
120 |     def forward(self, x):
121 |         """
122 |             Computes Euler angles
123 |             Parameters:
124 |                 x: shape = 64 channel x  28x28            
125 |             Returns:
126 |                 tensor(3,1) euler angles
127 |         """
128 |         x = self.conv1(x)
129 |         x = self.conv2(x)
130 |         x = self.conv3(x)
131 |         x = self.conv4(x)
132 |         x = self.max_pool(x)
133 |         # Flatten
134 |         x = x.view(x.shape[0], -1)
135 | 
136 |         x = self.fc1(x)
137 |         x = self.fc2(x)
138 | 
139 |         return x
140 | 
141 | if __name__ == "__main__":
142 |     auxiliary = AuxiliaryNet().to(device)
143 |     pfld = PFLD().to(device)
144 | 
145 |     x = torch.randn((10, 3,112,112)).to(device)
146 |     print("x shape:",x.shape)
147 |     features, landmarks = pfld(x)
148 |     print("features:",features.shape)
149 |     print("landmarks:",landmarks.shape)
150 | 
151 |     euler_angles = auxiliary(features)
152 |     print("euler_angles", euler_angles.shape)
153 | 
154 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | 
2 | torchvision==0.8.2
3 | numpy==1.19.4
4 | torch==1.7.1
5 | tensorboardX==2.0.1
6 | tqdm==4.55.0
7 | opencv_python==4.1.2.30
8 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Training & Validation
  6 | """
  7 | import numpy as np 
  8 | import argparse
  9 | import logging
 10 | import time
 11 | import os
 12 | from tqdm import tqdm
 13 | import cv2
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.optim
 18 | import torch.utils.tensorboard as tensorboard
 19 | 
 20 | from dataset import WFLW_Dataset
 21 | from dataset import create_test_loader, create_train_loader
 22 | from visualization import WFLW_Visualizer
 23 | import torchvision.transforms.transforms as transforms
 24 | 
 25 | from model.Loss import PFLD_L2Loss
 26 | from model.model import PFLD, AuxiliaryNet
 27 | from model.DepthSepConv import DepthSepConvBlock
 28 | from model.BottleneckResidual import BottleneckResidualBlock
 29 | 
 30 | from utils import to_numpy_image
 31 | import torch.backends.cudnn as cudnn
 32 | 
 33 | cudnn.benchmark = True
 34 | cudnn.determinstic = True
 35 | cudnn.enabled = True
 36 | 
 37 | 
 38 | def parse_args():
 39 |     parser = argparse.ArgumentParser()
 40 |     parser.add_argument('--datapath', type=str, default='data', help='root path of augumented WFLW dataset')
 41 |     parser.add_argument('--pretrained',type=str,default='checkpoint/model_weights/weights.pth.tar',help='load weights')
 42 |     parser.add_argument('--mode', type=str, default='test', choices=['train', 'test'])
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | # ======================================================================
 47 | 
 48 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 49 | args = parse_args()
 50 | 
 51 | def main():
 52 |     # ========= dataset ===========
 53 |     dataset = WFLW_Dataset(root=args.datapath, mode=args.mode, transform=transforms.ToTensor())
 54 |     visualizer = WFLW_Visualizer()
 55 |     # =========== models ============= 
 56 |     pfld = PFLD().to(device)
 57 |     auxiliarynet = AuxiliaryNet().to(device)
 58 |     print(pfld)
 59 |     # ========= load weights ===========
 60 |     checkpoint = torch.load(args.pretrained, map_location=device)
 61 |     # print(pfld.load_state_dict(checkpoint["pfld"]).keys())
 62 |     # return
 63 |     pfld.load_state_dict(checkpoint["pfld"], strict=False)
 64 |     auxiliarynet.load_state_dict(checkpoint["auxiliary"])
 65 |     print(f'\n\tLoaded checkpoint from {args.pretrained}\n')
 66 |     time.sleep(1)
 67 | 
 68 |     pfld.eval()
 69 |     auxiliarynet.eval()
 70 | 
 71 |     with torch.no_grad():
 72 |         for i in range(len(dataset)):
 73 |             image, labels = dataset[i]
 74 | 
 75 |             image = image.unsqueeze(0)
 76 |             image = image.to(device)
 77 |             landmarks = labels['landmarks'].squeeze() # shape (batch, 98, 2)
 78 | 
 79 |             pfld = pfld.to(device)
 80 |             auxiliarynet = auxiliarynet.to(device)
 81 |             t1 = time.time()
 82 |             featrues, pred_landmarks = pfld(image)
 83 |             t2 = time.time()
 84 |             pred_angles = auxiliarynet(featrues)
 85 |             print('pred_angles',pred_angles)
 86 |             print('gt_angles',labels['euler_angles'])
 87 |             print(f'gt_landmarks shape {landmarks.shape}, \n {landmarks[:5]}')
 88 |             print(f'landmarks shape {pred_landmarks.shape}, \n {pred_landmarks.reshape(98,2)[:5]}')
 89 | 
 90 |             t3 = time.time()
 91 |             # print(f"\ttime PFLD landmarks= {round((t2-t1)*1000,3)} ms")
 92 |             # print(f"\ttime auxiliary euler= {round((t3-t2)*1000,3)} ms")
 93 |             # print(f"\ttotal time= {round((t3-t1)*1000,3)} ms\n")
 94 | 
 95 |             landmarks = landmarks.cpu().reshape(98,2).numpy()
 96 |             landmarks = (landmarks*112.0).astype(np.int32) 
 97 | 
 98 |             pred_landmarks = pred_landmarks.cpu().reshape(98,2).numpy()
 99 |             pred_landmarks = (pred_landmarks*112.0).astype(np.int32) 
100 | 
101 |             image = to_numpy_image(image[0].cpu())
102 |             image = (image*255).astype(np.uint8)
103 |             image = np.clip(image, 0, 255)
104 | 
105 |             cv2.imwrite("ray2.jpg", image)
106 |             img = cv2.imread("ray2.jpg")
107 |             img2 = np.copy(img)
108 |             img2[:,:] = 0
109 | 
110 |             visualizer.landmarks_radius = 1
111 |             visualizer.landmarks_color = (0,255,0)
112 |             img = visualizer.draw_landmarks(img, pred_landmarks)
113 |             img2 = visualizer.draw_landmarks(img2, pred_landmarks)
114 |             visualizer.landmarks_color = (0,0,255)
115 |             # img = visualizer.draw_landmarks(img, landmarks)
116 |             visualizer.show(img2, wait=False, winname="black")
117 |             visualizer.show(img)
118 |             print('*'*70,'\n')
119 | 
120 |             if visualizer.user_press == 27:
121 |                 break
122 | 
123 | def overfit_one_mini_batch():
124 | 
125 |     # ========= dataset ===========
126 |     dataloader = create_test_loader(batch_size=20)
127 |     # =========== models ============= 
128 |     pfld_model = PFLD().to(device)
129 |     auxiliary_model = AuxiliaryNet().to(device)
130 | 
131 |     pfld_model.train()
132 |     auxiliary_model.train()
133 |     criterion = PFLD_L2Loss().to(device)
134 |     parameters = list(pfld_model.parameters()) + list(auxiliary_model.parameters())
135 |     optimizer = torch.optim.Adam(parameters, lr=0.0001, weight_decay=1e-6)
136 |     
137 |     image, labels = next(iter(dataloader))
138 |     print(image.shape)
139 |     time.sleep(5)
140 |     for i in range(6000):
141 |         euler_angles = labels['euler_angles'].squeeze() # shape (batch, 3)
142 |         attributes = labels['attributes'].squeeze() # shape (batch, 6)
143 |         landmarks = labels['landmarks'].squeeze() # shape (batch, 98, 2)
144 |         landmarks = landmarks.reshape((landmarks.shape[0], 196)) # reshape landmarks to match loss function
145 | 
146 |         image = image.to(device)
147 |         landmarks = landmarks.to(device)
148 |         euler_angles = euler_angles.to(device)
149 |         attributes = attributes.to(device)
150 |         pfld_model = pfld_model.to(device)
151 |         auxiliary_model = auxiliary_model.to(device)
152 |  
153 |         featrues, pred_landmarks = pfld_model(image)
154 |         pred_angles = auxiliary_model(featrues)
155 |         weighted_loss, loss = criterion(pred_landmarks, landmarks, pred_angles, euler_angles, attributes)
156 | 
157 |         train_w_loss = round(weighted_loss.item(),3)
158 |         train_loss = round(loss.item(),3)
159 |         print(f"\t.. weighted_loss= {train_w_loss} ... loss={train_loss}")
160 | 
161 |         optimizer.zero_grad()
162 |         weighted_loss.backward()
163 |         optimizer.step()
164 | 
165 | def show_model_tensorboard():
166 |     import torch.utils.tensorboard as tensorboard
167 | 
168 |     pfld = PFLD()
169 |     auxiliarynet = AuxiliaryNet()
170 | 
171 |     dataloader = create_test_loader(batch_size=20, transform=True)
172 |     dataitr = iter(dataloader)
173 |     image, labels = dataitr.next()
174 |     print("ray2")
175 |     writer = tensorboard.SummaryWriter("checkpoint/ray22")
176 |     writer.add_graph(pfld, image)
177 |     print("added model to tensorboard")
178 |     time.sleep(4)
179 |     writer.close()
180 |     
181 | 
182 | if __name__ == "__main__":
183 |     main()
184 |     # overfit_one_mini_batch()
185 |     # show_model_tensorboard()


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Training & Validation
  6 | """
  7 | import numpy as np 
  8 | import argparse
  9 | import logging
 10 | import time
 11 | import os
 12 | from tqdm import tqdm
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | import torch.optim
 17 | import torch.utils.tensorboard as tensorboard
 18 | 
 19 | from dataset import WFLW_Dataset
 20 | from dataset import create_test_loader, create_train_loader
 21 | from visualization import WFLW_Visualizer
 22 | 
 23 | from model.Loss import PFLD_L2Loss
 24 | from model.model import PFLD, AuxiliaryNet
 25 | from model.DepthSepConv import DepthSepConvBlock
 26 | from model.BottleneckResidual import BottleneckResidualBlock
 27 | 
 28 | from utils import to_numpy_image
 29 | import torch.backends.cudnn as cudnn
 30 | 
 31 | cudnn.benchmark = True
 32 | cudnn.determinstic = True
 33 | cudnn.enabled = True
 34 | 
 35 | def parse_args():
 36 |     parser = argparse.ArgumentParser()
 37 |     parser.add_argument('--epochs', type=int, default=800, help='num of training epochs')
 38 |     parser.add_argument('--batch_size', type=int, default=24, help="training batch size")
 39 |     parser.add_argument('--tensorboard', type=str, default='checkpoint/tensorboard', help='path log dir of tensorboard')
 40 |     parser.add_argument('--logging', type=str, default='checkpoint/logging', help='path of logging')
 41 |     parser.add_argument('--lr', type=float, default=0.00007, help='learning rate')
 42 |     parser.add_argument('--weight_decay', type=float, default=1e-6, help='optimizer weight decay')
 43 |     parser.add_argument('--datapath', type=str, default='data', help='root path of augumented WFLW dataset')
 44 |     parser.add_argument('--pretrained', type=str,default='checkpoint/model_weights/weights.pth1.tar',help='load checkpoint')
 45 |     parser.add_argument('--resume', action='store_true', help='resume from pretrained path specified in prev arg')
 46 |     parser.add_argument('--savepath', type=str, default='checkpoint/model_weights', help='save checkpoint path')    
 47 |     parser.add_argument('--savefreq', type=int, default=1, help="save weights each freq num of epochs")
 48 |     parser.add_argument('--logdir', type=str, default='checkpoint/logging', help='logging')    
 49 |     parser.add_argument("--lr_patience", default=40, type=int)
 50 |     args = parser.parse_args()
 51 |     return args
 52 | # ======================================================================
 53 | 
 54 | # device
 55 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 56 | # args
 57 | args = parse_args()
 58 | # logging
 59 | logging.basicConfig(
 60 | format='[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s',
 61 | level=logging.INFO,
 62 | handlers=[logging.FileHandler(args.logdir, mode='w'), logging.StreamHandler()])
 63 | # tensorboard
 64 | writer = tensorboard.SummaryWriter(args.tensorboard)
 65 | 
 66 | 
 67 | def main():
 68 |     # ========= dataloaders ===========
 69 |     train_dataloader = create_train_loader(root=args.datapath,batch_size=args.batch_size)
 70 |     test_dataloader  = create_test_loader(root=args.datapath, batch_size=args.batch_size)    
 71 |     start_epoch = 0
 72 |     # ======== models & loss ========== 
 73 |     pfld = PFLD().to(device)
 74 |     auxiliarynet = AuxiliaryNet().to(device)
 75 |     loss = PFLD_L2Loss().to(device)
 76 |     # ========= load weights ===========
 77 |     if args.resume:
 78 |         checkpoint = torch.load(args.pretrained)
 79 |         pfld.load_state_dict(checkpoint["pfld"], strict=False)
 80 |         auxiliarynet.load_state_dict(checkpoint["auxiliary"])
 81 |         start_epoch = checkpoint['epoch'] + 1
 82 |         print(f'\tLoaded checkpoint from {args.pretrained}\n')
 83 |         # logging.info(f'\tLoaded checkpoint from {args.pretrained}\n')
 84 |         time.sleep(1)
 85 |     else:
 86 |         print("******************* Start training from scratch *******************\n")
 87 |         time.sleep(5)
 88 |     # =========== optimizer =========== 
 89 |     # parameters = list(pfld.parameters()) + list(auxiliarynet.parameters())
 90 |     parameters = [
 91 |         { 'params': pfld.parameters() }, 
 92 |         { 'params': auxiliarynet.parameters()  } 
 93 |     ]
 94 |     optimizer = torch.optim.Adam(parameters, lr=args.lr, weight_decay=args.weight_decay)
 95 |     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=args.lr_patience, verbose=True)
 96 |     # ========================================================================
 97 |     for epoch in range(start_epoch, args.epochs):
 98 |         # =========== train / validate ===========
 99 |         w_train_loss, train_loss = train_one_epoch(pfld, auxiliarynet, loss, optimizer, train_dataloader, epoch)
100 |         val_loss = validate(pfld, auxiliarynet, loss, test_dataloader, epoch)
101 |         scheduler.step(val_loss)
102 |         logging.info(f"\ttraining epoch={epoch} .. weighted_loss= {w_train_loss} ... loss={train_loss}")
103 |         # ============= tensorboard =============
104 |         # writer.add_scalar('train_weighted_loss',w_train_loss, epoch)
105 |         writer.add_scalar('train_loss',train_loss, epoch)
106 |         writer.add_scalar('val_loss',val_loss, epoch)
107 |         # ============== save model =============
108 |         if epoch % args.savefreq == 0:
109 |             checkpoint_state = {
110 |                 "pfld": pfld.state_dict(),
111 |                 "auxiliary": auxiliarynet.state_dict(),
112 |                 "epoch": epoch
113 |             }
114 |             savepath = os.path.join(args.savepath, f'weights.pth_epoch_{epoch}.tar')
115 |             torch.save(checkpoint_state, savepath)
116 |             print(f'\n\t*** Saved checkpoint in {savepath} ***\n')
117 |             time.sleep(2)
118 |     writer.close()
119 | 
120 | def train_one_epoch(pfld_model, auxiliary_model, criterion, optimizer, dataloader, epoch):
121 |     weighted_loss = 0
122 |     loss = 0
123 |     pfld_model.train()
124 |     auxiliary_model.train()
125 | 
126 |     for image, labels in tqdm(dataloader):
127 |         euler_angles = labels['euler_angles'].squeeze() # shape (batch, 3)
128 |         attributes = labels['attributes'].squeeze() # shape (batch, 6)
129 |         landmarks = labels['landmarks'].squeeze() # shape (batch, 98, 2)
130 |         landmarks = landmarks.reshape((landmarks.shape[0], 196)) # reshape landmarks to match loss function
131 | 
132 |         image = image.to(device)
133 |         landmarks = landmarks.to(device)
134 |         euler_angles = euler_angles.to(device)
135 |         attributes = attributes.to(device)
136 |         pfld_model = pfld_model.to(device)
137 |         auxiliary_model = auxiliary_model.to(device)
138 |  
139 |         featrues, pred_landmarks = pfld_model(image)
140 |         pred_angles = auxiliary_model(featrues)
141 |         weighted_loss, loss = criterion(pred_landmarks, landmarks, pred_angles, euler_angles, attributes)
142 | 
143 |         train_w_loss = round(weighted_loss.item(),3)
144 |         train_loss = round(loss.item(),3)
145 |         print(f"training epoch={epoch} .. weighted_loss= {train_w_loss} ... loss={train_loss}\n")
146 | 
147 |         optimizer.zero_grad()
148 |         weighted_loss.backward()
149 |         optimizer.step()
150 | 
151 |     return weighted_loss.item(), loss.item()    
152 | 
153 | 
154 | def validate(pfld_model, auxiliary_model, criterion, dataloader, epoch):
155 |     validation_losses = []
156 |     pfld_model.eval()
157 |     auxiliary_model.eval()
158 | 
159 |     with torch.no_grad():
160 |         for image, labels in tqdm(dataloader):
161 | 
162 |             euler_angles = labels['euler_angles'].squeeze() # shape (batch, 3)
163 |             attributes = labels['attributes'].squeeze() # shape (batch, 6)
164 |             landmarks = labels['landmarks'].squeeze() # shape (batch, 98, 2)
165 |             landmarks = landmarks.reshape((landmarks.shape[0], 196)) # reshape landmarks to match loss function
166 | 
167 |             image = image.to(device)
168 |             landmarks = landmarks.to(device)
169 |             euler_angles = euler_angles.to(device)
170 |             attributes = attributes.to(device)
171 |             pfld_model = pfld_model.to(device)
172 |             auxiliary_model = auxiliary_model.to(device)
173 | 
174 |             featrues, pred_landmarks = pfld_model(image)
175 |             pred_angles = auxiliary_model(featrues)
176 |             weighted_loss, loss = criterion(pred_landmarks, landmarks, pred_angles, euler_angles, attributes)
177 | 
178 |             weighted_loss = round(weighted_loss.item(),3)
179 |             loss = round(loss.item(),3)
180 |             print(f"\tval epoch={epoch} .. val_weighted_loss= {weighted_loss} ... val_loss={loss}\n")
181 |             # logging.info(f"\tval epoch={epoch} .. val_weighted_loss= {weighted_loss} ... val_loss={loss}\n")
182 |             
183 |             validation_losses.append(loss)
184 | 
185 |         avg_val_loss = round(np.mean(validation_losses).item(),3)
186 |                
187 |         print('*'*70,f'\n\tEvaluation average loss= {avg_val_loss}\n')
188 |         logging.info('*'*70 + f'\n\tEvaluation average loss= {avg_val_loss}\n')
189 |         time.sleep(1)
190 |         return avg_val_loss
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     main()
195 | 
196 |     # import torchvision.transforms.transforms as transforms
197 |     # transform = transforms.Compose([transforms.ToTensor()])    
198 |     # dataset = WFLW_Dataset(mode='train', transform=True)
199 |     # # ============ From tensor to image ... crahses in any function in cv2 ==================
200 |     # dataloader = create_train_loader(transform=True)
201 |     # for images, labels in dataloader:
202 |     #     print(images.shape)
203 |     #     image = images[0]
204 |     #     print(image.shape)
205 |     #     image = to_numpy_image(images[0])
206 |     #     print(image.shape)
207 |     #     import cv2
208 |     #     landmarks = labels['landmarks'].squeeze()[0]
209 |     #     euler_angles = labels['euler_angles'].squeeze()[0]
210 |     #     attributes = labels['attributes'].squeeze()[0]
211 |     #     l = {}
212 |     #     l['landmarks'] = landmarks.numpy()
213 |     #     l['euler_angles'] = euler_angles.numpy()
214 |     #     l['attributes'] = attributes.numpy()
215 |     #     visualizer = WFLW_Visualizer()
216 |     #     visualizer.visualize(image, l)
217 | 
218 |     #     print(image.shape, image)
219 |     # ====================================================
220 |     # ======= Habd
221 |     # cv2.circle(image, (40,50), 30, (245,0,0), -1) 
222 |     # cv2.imshow("I", image)
223 |     # cv2.waitKey(0)  
224 | 
225 |     # datase2 = WFLW_Dataset(transform=False, mode='val')
226 |     # image2, labels2 = datase2[0]
227 |     # print(image.shape, image2.shape, type(image), type(image2))
228 | 
229 |     # ============= Test reshape and back reshape (works well) ======
230 |     # x = np.array([[
231 |     #     [1,2],
232 |     #     [3,4],
233 |     #     [5,6]
234 |     # ]])
235 |     # print(x.shape)
236 |     # xx = transform(x)
237 |     # print("transform",xx,'\n')
238 |     # xx = xx.reshape((1,1,6))
239 |     # print("flatten",xx,'\n')
240 |     # xx = xx.reshape((1,3,2))
241 |     # print("reshape",xx,'\n')
242 |     
243 |     # ======== Test Landmarks reshape =========
244 |     # x = torch.tensor([
245 |     #     1,2,3,4,5,6
246 |     # ])
247 |     # print(x.shape)
248 |     # x = x.reshape((1,3,2))
249 |     # print("reshape",x,'\n')
250 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: utils functions for Data Augumentation & euler utils
  6 | """
  7 | import cv2
  8 | import numpy as np
  9 | import torch
 10 | from euler_angles import EulerAngles
 11 | import math
 12 | 
 13 | def to_numpy_image(tensor):
 14 |     return np.transpose(tensor.numpy(), (1, 2, 0))
 15 | 
 16 | # =========== Data Augumentation ===================
 17 | 
 18 | def rotatedRectWithMaxArea(side, angle):
 19 |     """
 20 |     Given a square image of size side x side that has been rotated by 'angle' 
 21 |     (in degree), computes the new side of the largest possible
 22 |     axis-aligned square (maximal area) within the rotated image.
 23 |     """
 24 |     # convert to radians
 25 |     angle = angle * math.pi/180
 26 |     # since the solutions for angle, -angle and 180-angle are all the same,
 27 |     # if suffices to look at the first quadrant and the absolute values of sin,cos:
 28 |     sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
 29 | 
 30 |     if side <= 2.*sin_a*cos_a*side or abs(sin_a-cos_a) < 1e-10:
 31 |         # half constrained case: two crop corners touch the longer side,
 32 |         #   the other two corners are on the mid-line parallel to the longer line
 33 |         x = 0.5*side
 34 |         new_side = x/sin_a,x/cos_a
 35 |     else:
 36 |         # fully constrained case: crop touches all 4 sides
 37 |         cos_2a = cos_a*cos_a - sin_a*sin_a
 38 |         new_side = side*(cos_a -sin_a)/cos_2a
 39 | 
 40 |     return int(new_side)
 41 | 
 42 | def flip(image, landmarks):
 43 |     # horizontal flip
 44 |     image = cv2.flip(image, 1)
 45 | 
 46 |     w,h = image.shape[:2]
 47 |     center = (w//2, h//2)
 48 | 
 49 |     # translate it to origin
 50 |     landmarks -= center 
 51 |     # apply reflection(flip) matrix
 52 |     flip_matrix = np.array([
 53 |         [-1, 0],
 54 |         [0 , 1]
 55 |     ])   
 56 |     landmarks = (flip_matrix @ landmarks.T).T
 57 |     # translate again to its position
 58 |     landmarks += center
 59 | 
 60 |     # just flip the order of landmarks points .. mask is from https://github.com/polarisZhao/PFLD-pytorch/blob/master/data/Mirror98.txt
 61 |     flip_mask = [   32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12
 62 |                     ,11,10,9,8,7,6,5,4,3,2,1,0,46,45,44,43,42,50,49,48,47,37,36,35,
 63 |                     34,33,41,40,39,38,51,52,53,54,59,58,57,56,55,72,71,70,69,68,75,
 64 |                     74,73,64,63,62,61,60,67,66,65,82,81,80,79,78,77,76,87,86,85,84,
 65 |                     83,92,91,90,89,88,95,94,93,97,96]
 66 | 
 67 |     landmarks = landmarks[flip_mask]
 68 | 
 69 |     return image, landmarks
 70 | 
 71 | def resize(image, landmarks, size=(112,112)):
 72 |     side = image.shape[0]    
 73 |     scale = size[0] / side
 74 |     image = cv2.resize(image, size)
 75 |     landmarks *= scale
 76 |     return image, landmarks
 77 | 
 78 | 
 79 | # ============= Euler ==================
 80 | def euler_to_rotation(euler_angles) :
 81 |     R_x = np.array([[1,           0,                       0              ],
 82 |                     [0,  np.cos(np.radians(euler_angles[0])), -np.sin(np.radians(euler_angles[0]))],
 83 |                     [0,  np.sin(np.radians(euler_angles[0])),  np.cos(np.radians(euler_angles[0]))]
 84 |                     ])
 85 |                      
 86 |     R_y = np.array([[np.cos(np.radians(euler_angles[1])),    0,      np.sin(np.radians(euler_angles[1]))  ],
 87 |                     [0,                     1,   0                   ],
 88 |                     [-np.sin(np.radians(euler_angles[1])),   0,      np.cos(np.radians(euler_angles[1]))  ]
 89 |                     ])
 90 |                      
 91 |     R_z = np.array([[np.cos(np.radians(euler_angles[2])),    -np.sin(np.radians(euler_angles[2])),    0],
 92 |                     [np.sin(np.radians(euler_angles[2])),    np.cos(np.radians(euler_angles[2])),     0],
 93 |                     [0,                     0,                      1]
 94 |                     ])
 95 |                      
 96 |                      
 97 |     R = R_x @ R_y @ R_z
 98 |     return R        
 99 | 
100 | def get_intrensic_matrix(image):
101 |     e = EulerAngles((image.shape[0], image.shape[1]))
102 |     return e.camera_intrensic_matrix
103 | 
104 | 


--------------------------------------------------------------------------------
/visualization.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Amr Elsersy
  3 | email: amrelsersay@gmail.com
  4 | -----------------------------------------------------------------------------------
  5 | Description: Visualization of dataset with annotations in cv2 & tensorboard
  6 | """
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | import argparse
 11 | from dataset import WFLW_Dataset
 12 | from dataset import create_train_loader, create_test_loader
 13 | 
 14 | import torch
 15 | from torchvision.utils import make_grid
 16 | import torch.utils.tensorboard as tensorboard
 17 | 
 18 | from utils import *
 19 | 
 20 | class WFLW_Visualizer:
 21 |     def __init__(self):
 22 |         self.writer = tensorboard.SummaryWriter("checkpoint/tensorboard")
 23 | 
 24 |         self.rect_color = (0,255,255)
 25 |         self.landmarks_color  = (0,255,0)
 26 |         self.rect_width = 3
 27 |         self.landmarks_radius = 1
 28 |         self.winname = "image"
 29 |         self.crop_resize_shape = (400, 400)
 30 |         self.user_press = None
 31 | 
 32 |     def visualize(self, image, labels, draw_eulers = False):
 33 |         landmarks = labels['landmarks'].astype(np.int32)
 34 |         euler_angles = labels['euler_angles']
 35 |         
 36 |         image = self.draw_landmarks(image, landmarks)
 37 |         if draw_eulers:
 38 |             image = self.draw_euler_angles_approximation(image, euler_angles)
 39 |         self.show(image)        
 40 | 
 41 |     def show(self, image, size = None, wait = True, winname="image"):
 42 |         if size:
 43 |             image = cv2.resize(image, size)
 44 |         else:
 45 |             image = cv2.resize(image, self.crop_resize_shape)
 46 | 
 47 |         cv2.imshow(winname, image)
 48 |         if wait:
 49 |             self.user_press = cv2.waitKey(0) & 0xff
 50 | 
 51 |     def draw_landmarks(self, image, landmarks):
 52 |         for (x,y) in landmarks:
 53 |             cv2.circle(image, (x,y), self.landmarks_radius, self.landmarks_color, -1)
 54 |         return image                
 55 | 
 56 |     def batch_draw_landmarks(self, images, labels):
 57 |         n_batches = images.shape[0]
 58 |         for i in range(n_batches):
 59 |             image = images[i]
 60 |             
 61 |             landmarks = labels['landmarks'].type(torch.IntTensor)
 62 |             landmarks = landmarks[i]
 63 | 
 64 |             image = self.draw_landmarks(image.numpy(), landmarks)
 65 |             images[i] = torch.from_numpy(image)
 66 | 
 67 |         return images
 68 | 
 69 |     def draw_euler_angles(self, image, rvec, tvec, euler_angles, intrensic_matrix):
 70 |         # i, j, k axes in world 3D coord.
 71 |         axis = np.identity(3) * 5
 72 |         # axis_img_pts = intrensic * exstrinsic * axis
 73 |         axis_pts = cv2.projectPoints(axis, rvec, tvec, intrensic_matrix, None)[0]
 74 |         image = self.draw_euler_axis(image, axis_pts, euler_angles)
 75 | 
 76 |         return image
 77 | 
 78 |     def draw_euler_angles_approximation(self, image, euler_angles):
 79 |         axis = np.identity(3) * 5
 80 | 
 81 |         rotation = euler_to_rotation(euler_angles)
 82 |         # for just visualization we will use the avarage value of tvec
 83 |         tvec = np.array([
 84 |             [-1],
 85 |             [-2],
 86 |             [-21]
 87 |         ], dtype=np.float)
 88 | 
 89 |         intrensic = get_intrensic_matrix(image)
 90 | 
 91 |         # from world space to 3D cam space
 92 |         axis_pts = rotation @ axis + tvec
 93 |         # project to image
 94 |         axis_pts = intrensic @ axis_pts
 95 |         # convert from homoginous to image plane
 96 |         axis_pts /= axis_pts[2]
 97 |         # don't need the z component
 98 |         axis_pts = np.delete(axis_pts, 2, axis=0).T
 99 | 
100 |         image = self.draw_euler_axis(image, axis_pts, euler_angles)
101 |         return image
102 | 
103 |     def draw_euler_axis(self, image, axis_pts, euler_angles):
104 |         """
105 |             draw euler axes in the image center 
106 |         """
107 |         center = (image.shape[1]//2, image.shape[0]//2)
108 | 
109 |         axis_pts = axis_pts.astype(np.int32)
110 |         pitch_point = tuple(axis_pts[0].ravel())
111 |         yaw_point   = tuple(axis_pts[1].ravel())
112 |         roll_point  = tuple(axis_pts[2].ravel())
113 | 
114 |         pitch_color = (255,255,0)
115 |         yaw_color   = (0,255,0)
116 |         roll_color  = (0,0,255)
117 | 
118 |         pitch, yaw, roll = euler_angles
119 | 
120 |         cv2.line(image, center,  pitch_point, pitch_color, 2)
121 |         cv2.line(image, center,  yaw_point, yaw_color, 2)
122 |         cv2.line(image, center,  roll_point, roll_color, 2)
123 |         cv2.putText(image, "Pitch:{:.2f}".format(pitch), (0,10), cv2.FONT_HERSHEY_PLAIN, 1, pitch_color)
124 |         cv2.putText(image, "Yaw:{:.2f}".format(yaw), (0,20), cv2.FONT_HERSHEY_PLAIN, 1, yaw_color)
125 |         cv2.putText(image, "Roll:{:.2f}".format(roll), (0,30), cv2.FONT_HERSHEY_PLAIN, 1, roll_color)
126 | 
127 |         # origin
128 |         cv2.circle(image, center, 2, (255,255,255), -1)
129 |         return image
130 | 
131 |     def visualize_tensorboard(self, images, labels, step=0):
132 |         images = self.batch_draw_landmarks(images, labels)
133 |         # format must be specified (N, H, W, C)
134 |         self.writer.add_images("images", images, global_step=step, dataformats="NHW")
135 | 
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     # ======== Argparser ===========
140 |     parser = argparse.ArgumentParser()
141 |     parser.add_argument('--mode', default='train', choices=['train', 'test'], help="choose which dataset to visualize")
142 |     parser.add_argument('--tensorboard', action='store_true', help="visualize images to tensorboard")
143 |     parser.add_argument('--stop_batch', type=int, default=5, help="tensorboard batch index to stop")
144 |     args = parser.parse_args()
145 |     # ================================
146 | 
147 |     visualizer = WFLW_Visualizer()
148 | 
149 |     # Visualize the dataset (train or val) with landmarks
150 |     if not args.tensorboard:
151 |         dataset = WFLW_Dataset(mode=args.mode)
152 |         for i in range(len(dataset)):
153 |             image, labels = dataset[i]
154 |             print('landmarks', labels['landmarks'])
155 | 
156 |             print ("*" * 80, '\n\n\t press n for next example .... ESC to exit')
157 |             print('\tcurrent image: ',labels['image_name'])
158 | 
159 |             visualizer.visualize(image, labels)            
160 |             if visualizer.user_press == 27:
161 |                 break
162 |             
163 | 
164 |     # Tensorboard Visualization on 5 batches with 64 batch size
165 |     else:
166 |         batch_size = 64
167 |         dataloader = create_test_loader(batch_size=batch_size, transform=None)
168 | 
169 |         batch = 0
170 |         for (images, labels) in dataloader:
171 |             batch += 1
172 | 
173 |             visualizer.visualize_tensorboard(images, labels, batch)
174 |             print ("*" * 60, f'\n\n\t Saved {batch_size} images with Step{batch}. run tensorboard @ project root')
175 |             
176 |             if batch == args.stop_batch:
177 |                 break
178 | 
179 |         visualizer.writer.close()
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------