├── __init__.py
├── tools
    ├── __init__.py
    ├── image_tools.py
    ├── utils.py
    ├── vision.py
    ├── imagedb.py
    ├── image_reader.py
    └── detect.py
├── train_net
    ├── __init__.py
    ├── train_p_net.py
    ├── train_r_net.py
    ├── train_o_net.py
    └── models.py
├── prepare_data
    ├── __init__.py
    ├── assemble_onet_imglist.py
    ├── assemble_rnet_imglist.py
    ├── assemble_pnet_imglist.py
    ├── assemble.py
    ├── gen_landmark_12.py
    ├── gen_landmark_48.py
    ├── gen_landmark_24.py
    ├── gen_Pnet_train_data.py
    ├── gen_Rnet_train_data.py
    └── gen_Onet_train_data.py
├── test.jpg
├── result.png
├── test2.jpg
├── training_data
    └── readme.md
├── model_store
    ├── onet_model_final.pt
    ├── pnet_model_final.pt
    └── rnet_model_final.pt
├── anno_store
    └── readme.md
├── test_image.py
├── config.py
├── .gitignore
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/train_net/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/prepare_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/test.jpg


--------------------------------------------------------------------------------
/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/result.png


--------------------------------------------------------------------------------
/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/test2.jpg


--------------------------------------------------------------------------------
/training_data/readme.md:
--------------------------------------------------------------------------------
1 | this folder contains training samples for each training stage
2 | 


--------------------------------------------------------------------------------
/model_store/onet_model_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/onet_model_final.pt


--------------------------------------------------------------------------------
/model_store/pnet_model_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/pnet_model_final.pt


--------------------------------------------------------------------------------
/model_store/rnet_model_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/rnet_model_final.pt


--------------------------------------------------------------------------------
/anno_store/readme.md:
--------------------------------------------------------------------------------
1 | this folder saves image list for all three stage.  
2 | 
3 | I didn't push them to github because of the file size.
4 | 
5 | 
6 | for training pnet and rnet, I use wider face dataset for sampling   
7 | for training onet, I use a third part data for landmark sampling: [Training set](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm)
8 | 


--------------------------------------------------------------------------------
/test_image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May 26 13:57:59 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | import sys
10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
11 | 
12 | import cv2
13 | from tools.detect import create_mtcnn_net, MtcnnDetector
14 | import tools.vision as vision
15 | 
16 | 
17 | if __name__ == '__main__':
18 | 
19 |     pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_model_final.pt",
20 |                                         r_model_path="./model_store/rnet_model_final.pt",
21 |                                         o_model_path="./model_store/onet_model_final.pt", 
22 |                                         use_cuda=False)
23 |     
24 |     mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24)
25 | 
26 |     img = cv2.imread("./test2.jpg")
27 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
28 | 
29 |     bboxs, landmarks = mtcnn_detector.detect_face(img)
30 |     
31 |     #print bboxs.shape[0]
32 |     #print landmarks.shape[0]
33 | 
34 |     vision.vis_face(img, bboxs, landmarks)


--------------------------------------------------------------------------------
/prepare_data/assemble_onet_imglist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May 26 12:21:58 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | 
10 | import sys
11 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
12 | 
13 | import os
14 | import config
15 | import prepare_data.assemble as assemble
16 | 
17 | 
18 | if __name__ == '__main__':
19 | 
20 |     anno_list = []
21 | 
22 |     net_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_LANDMARK_ANNO_FILENAME)
23 |     net_postive_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_POSTIVE_ANNO_FILENAME)
24 |     net_part_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_PART_ANNO_FILENAME)
25 |     net_neg_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_NEGATIVE_ANNO_FILENAME)
26 | 
27 |     anno_list.append(net_postive_file)
28 |     anno_list.append(net_part_file)
29 |     anno_list.append(net_neg_file)
30 |     anno_list.append(net_landmark_file)
31 | 
32 |     imglist_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_TRAIN_IMGLIST_FILENAME)
33 | 
34 |     chose_count = assemble.assemble_data(imglist_file ,anno_list)
35 |     print("PNet train annotation result file path:%s" % imglist_file)
36 | 


--------------------------------------------------------------------------------
/prepare_data/assemble_rnet_imglist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu May 24 19:22:32 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | import sys
 9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
10 | 
11 | import os
12 | import config
13 | import prepare_data.assemble as assemble
14 | 
15 | 
16 | if __name__ == '__main__':
17 | 
18 |     anno_list = []
19 | 
20 |     # rnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_LANDMARK_ANNO_FILENAME)
21 |     rnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_POSTIVE_ANNO_FILENAME)
22 |     rnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_PART_ANNO_FILENAME)
23 |     rnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_NEGATIVE_ANNO_FILENAME)
24 | 
25 |     anno_list.append(rnet_postive_file)
26 |     anno_list.append(rnet_part_file)
27 |     anno_list.append(rnet_neg_file)
28 |     # anno_list.append(rnet_landmark_file)
29 | 
30 |     imglist_file = os.path.join(config.ANNO_STORE_DIR, config.RNET_TRAIN_IMGLIST_FILENAME)
31 |     
32 |     chose_count = assemble.assemble_data(imglist_file ,anno_list)
33 |     print("PNet train annotation result file path:%s, total num of imgs: %d" % (imglist_file, chose_count))
34 |     


--------------------------------------------------------------------------------
/prepare_data/assemble_pnet_imglist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue May 15 15:52:24 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | import sys
10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
11 | 
12 | import os
13 | 
14 | import config
15 | import prepare_data.assemble as assemble
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     
20 |     anno_list = []
21 |     
22 |     # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_LANDMARK_ANNO_FILENAME)
23 |     pnet_postive_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_POSTIVE_ANNO_FILENAME)
24 |     pnet_part_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_PART_ANNO_FILENAME)
25 |     pnet_neg_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_NEGATIVE_ANNO_FILENAME)
26 | 
27 |     anno_list.append(pnet_postive_file)
28 |     anno_list.append(pnet_part_file)
29 |     anno_list.append(pnet_neg_file)
30 |     # anno_list.append(pnet_landmark_file)
31 |     
32 |     imglist_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_TRAIN_IMGLIST_FILENAME)
33 |     
34 |     chose_count = assemble.assemble_data(imglist_file ,anno_list)
35 |     print("PNet train annotation result file path:%s, total num of imgs: %d" % (imglist_file, chose_count))
36 |     
37 |     
38 | 


--------------------------------------------------------------------------------
/tools/image_tools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue May 15 19:02:24 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | import torch
10 | import torchvision.transforms as transforms
11 | import numpy as np
12 | 
13 | 
14 | transform = transforms.ToTensor()
15 | 
16 | 
17 | def convert_image_to_tensor(image):
18 |     """convert an image to pytorch tensor
19 | 
20 |         Parameters:
21 |         ----------
22 |         image: numpy array , h * w * c
23 | 
24 |         Returns:
25 |         -------
26 |         image_tensor: pytorch.FloatTensor, c * h * w
27 |         """
28 |     image = image.astype(np.float32)
29 |     return transform(image)
30 | 
31 | 
32 | def convert_chwTensor_to_hwcNumpy(tensor):
33 |     """convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c)
34 |             Parameters:
35 |             ----------
36 |             tensor: numpy array , count * c * h * w
37 | 
38 |             Returns:
39 |             -------
40 |             numpy array images: count * h * w * c
41 |             """
42 | 
43 |     if isinstance(tensor, torch.FloatTensor):
44 |         return np.transpose(tensor.detach().numpy(), (0,2,3,1))
45 |     else:
46 |         raise Exception("covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension of float data type.")
47 | 


--------------------------------------------------------------------------------
/prepare_data/assemble.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue May 15 15:52:53 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | import os
10 | import numpy.random as npr
11 | import numpy as np
12 | 
13 | def assemble_data(output_file, anno_file_list = []):
14 |     # assemble the annotations to one file
15 |     size = 12
16 |     
17 |     if len(anno_file_list) == 0:
18 |         return 0
19 |     
20 |     if os.path.exists(output_file):
21 |         os.remove(output_file)
22 |      
23 |     chose_count = 0
24 |     for anno_file in anno_file_list:
25 |         with open(anno_file, 'r') as f:
26 |             anno_lines = f.readlines()
27 |             
28 |         base_num = 250000
29 |         # choose the examples in random style
30 |         if len(anno_lines) > base_num * 3:
31 |             idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True)
32 |         elif len(anno_lines) > 100000:
33 |             idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True)
34 |         else:
35 |             idx_keep = np.arange(len(anno_lines))
36 |             np.random.shuffle(idx_keep)
37 |             
38 |         with open(output_file, 'a+') as f:
39 |             for idx in idx_keep:
40 |                 f.write(anno_lines[idx])
41 |                 chose_count += 1
42 |                 
43 |     return chose_count
44 |             
45 |         


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue May 15 09:37:39 2018
 5 | 
 6 | @author: wujiyang
 7 | """
 8 | 
 9 | import os
10 | 
11 | 
12 | MODLE_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/model_store"
13 | 
14 | ANNO_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/anno_store"
15 | 
16 | TRAIN_DATA_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/training_data"
17 | 
18 | LOG_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/log"
19 | 
20 | USE_CUDA = True
21 | 
22 | TRAIN_BATCH_SIZE = 1024
23 | 
24 | TRAIN_LR = 0.01
25 | 
26 | END_EPOCH = 50
27 | 
28 | PNET_POSTIVE_ANNO_FILENAME = "pos_12.txt"
29 | PNET_NEGATIVE_ANNO_FILENAME = "neg_12.txt"
30 | PNET_PART_ANNO_FILENAME = "part_12.txt"
31 | PNET_LANDMARK_ANNO_FILENAME = "landmark_12.txt"
32 | 
33 | 
34 | RNET_POSTIVE_ANNO_FILENAME = "pos_24.txt"
35 | RNET_NEGATIVE_ANNO_FILENAME = "neg_24.txt"
36 | RNET_PART_ANNO_FILENAME = "part_24.txt"
37 | RNET_LANDMARK_ANNO_FILENAME = "landmark_24.txt"
38 | 
39 | 
40 | ONET_POSTIVE_ANNO_FILENAME = "pos_48.txt"
41 | ONET_NEGATIVE_ANNO_FILENAME = "neg_48.txt"
42 | ONET_PART_ANNO_FILENAME = "part_48.txt"
43 | ONET_LANDMARK_ANNO_FILENAME = "landmark_48.txt"
44 | 
45 | PNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_12.txt"
46 | RNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_24.txt"
47 | ONET_TRAIN_IMGLIST_FILENAME = "imglist_anno_48.txt"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | 
107 | # add by wujiyang
108 | training_data/*
109 | !training_data/readme.md
110 | model_store/*
111 | !model_store/pnet_model_final.pt
112 | !model_store/rnet_model_final.pt
113 | !model_store/onet_model_final.pt
114 | anno_store/*
115 | !anno_store/landmark_imagelist.txt
116 | !anno_store/wider_origin_anno.txt
117 | !anno_store/readme.md
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MTCNN_TRAIN
 2 | MTCNN_Train Scripts with PyTorch 0.4.0  
 3 | 
 4 | ## Declaration
 5 | **The source code in this repository is mainly from [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace).** 
 6 | **I reimplemented the part of MTCNN with PyTorch 0.4.0 and made some optimizations but most remains unchanged. If you want to know more details, please go to [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace)** 
 7 | 
 8 | ---
 9 | ## Introduction 
10 | 
11 | ~~This project is still in progess, I will finish it in my spare time as soon as possible !~~  
12 | 
13 | This project is a reimplementation version of mtcnn face detection, most of the source code is from [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace), I restructed the source code with Pytorch 0.4.0 and made some modifications and optimizations. All the contributions I have made is listed below.
14 | 
15 | ## The Contributions
16 | 1. restruct the source code with PyTorch 0.4.0.
17 | 2. avoid some unnecessary image data copy operation in training data preparation, for example, ./prepare_data/gen_Pnet_data.py and so on.
18 | 3. remove some meaningless operation in traing process, and format the output information during training.
19 | 4. fix the bug that data_loader can't load the last mini_batch when the last minibatch'size is less than the batch_size in ./tools/image_reader.py. 
20 | 5. to be continue.  
21 | 
22 | ## How to use  
23 | For training PNet and RNet, I only use the [Widerface](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) for face classification and face bounding box regression. For training ONet, I use [Widerface](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) for face classification and face bounding box regression and use [Training Dataset](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm) for face landmark regression.
24 | 
25 | 1. Train PNet  
26 | ``` python  
27 | cd MTCNN_TRAIN
28 | python prepare_data/gen_Pnet_train_data.py
29 | python prepare_data/assemble_pnet_imglist.py 
30 | python train_net/train_p_net.py
31 | ```   
32 | 2. Train RNet 
33 | ``` python
34 | cd MTCNN_TRAIN
35 | python prepare_data/gen_Rnet_train_data.py
36 | python prepare_data/assemble_rnet_imglist.py 
37 | python train_net/train_r_net.py
38 | ```  
39 | 3. Train ONet   
40 | ``` python
41 | cd MTCNN_TRAIN
42 | python prepare_data/gen_landmark_48.py
43 | python prepare_data/gen_Onet_train_data.py
44 | python prepare_data/assemble_onet_imglist.py 
45 | python train_net/train_o_net.py
46 | ```   
47 | 4. Test Image
48 | ``` python
49 | cd MTCNN_TRAIN
50 | python test_image.py
51 | ```
52 | 
53 | ## Results  
54 | Because I didn't use much data to train, the detection results is not at the best.  
55 | ![avatar](result.png)   
56 | 
57 | ## Problems  
58 | There still remains a problem to solve: When starting to train each stage network, the first batch will last for a long time about 30 minutes and I don't know why.
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/tools/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 09:37:59 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import numpy as np
 10 | 
 11 | def IoU(box, boxes):
 12 |     """Compute IoU between detect box and gt boxes
 13 | 
 14 |     Parameters:
 15 |     ----------
 16 |     box: numpy array , shape (5, ): x1, y1, x2, y2, score
 17 |         input box
 18 |     boxes: numpy array, shape (n, 4): x1, y1, x2, y2
 19 |         input ground truth boxes
 20 | 
 21 |     Returns:
 22 |     -------
 23 |     ovr: numpy.array, shape (n, )
 24 |         IoU
 25 |     """
 26 |     box_area = (box[2] - box[0]) * (box[3] - box[1])
 27 |     area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
 28 |     
 29 |     xx1 = np.maximum(box[0], boxes[:, 0])
 30 |     yy1 = np.maximum(box[1], boxes[:, 1])
 31 |     xx2 = np.minimum(box[2], boxes[:, 2])
 32 |     yy2 = np.minimum(box[3], boxes[:, 3])
 33 |     
 34 |     # compute the width and height of the inter box 
 35 |     w = np.maximum(0, xx2 - xx1)
 36 |     h = np.maximum(0, yy2 - yy1)
 37 |     
 38 |     inter = w * h
 39 |     ovr = np.true_divide(inter, (box_area + area - inter))
 40 |     
 41 |     
 42 |     return ovr
 43 | 
 44 | 
 45 | def convert_to_square(bbox):
 46 |     ''' Convert bbox to a square which it can include the bbox
 47 |     Parameters:
 48 |         bbox: numpy array, shape n x 5
 49 |         
 50 |     returns:
 51 |         square box
 52 |     '''
 53 |     
 54 |     square_bbox = bbox.copy()
 55 |     h = bbox[:, 3] - bbox[:, 1]
 56 |     w = bbox[:, 2] - bbox[:, 0]
 57 |     max_side = np.maximum(h, w)
 58 |     square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
 59 |     square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
 60 |     square_bbox[:, 2] = square_bbox[:, 0] + max_side
 61 |     square_bbox[:, 3] = square_bbox[:, 1] + max_side
 62 |     
 63 |     return square_bbox
 64 | 
 65 | 
 66 | def nms(dets, thresh, mode='Union'):
 67 |     ''' greedily select bboxes with high confidence,if an box overlap with the highest score box > thres, rule it out
 68 |     
 69 |     params:
 70 |         dets: [[x1, y1, x2, y2, score]]
 71 |         thresh: retain overlap <= thresh
 72 |     return:
 73 |         indexes to keep
 74 |     '''
 75 |     x1 = dets[:, 0]
 76 |     y1 = dets[:, 1]
 77 |     x2 = dets[:, 2]
 78 |     y2 = dets[:, 3]
 79 |     scores = dets[:, 4]
 80 |     
 81 |     areas = (x2 - x1) * (y2 - y1)
 82 |     order = scores.argsort()[::-1] # the index of scores by desc
 83 |     
 84 |     keep = []
 85 |     while order.size > 0:
 86 |         i = order[0]
 87 |         keep.append(i)
 88 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 89 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 90 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 91 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 92 |         
 93 |         w = np.maximum(0.0, xx2 - xx1)
 94 |         h = np.maximum(0.0, yy2 - yy1)
 95 |         inter = w * h
 96 |         inter = w * h
 97 |         if mode == "Union":
 98 |             ovr = inter / (areas[i] + areas[order[1:]] - inter)
 99 |         elif mode == "Minimum":
100 |             ovr = inter / np.minimum(areas[i], areas[order[1:]])
101 |             
102 |         inds = np.where(ovr <= thresh)[0]
103 |         order = order[inds + 1]
104 |     
105 |     return keep
106 |         
107 |         
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/tools/vision.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat May 19 15:14:18 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | import matplotlib.pyplot as plt
  9 | import pylab
 10 | 
 11 | def vis_two(im_array, dest1, dest2, thresh=0.9):
 12 |     """Visualize detection results before and after calibration
 13 |     
 14 |     Parameters:
 15 |     ----------
 16 |     im_array: numpy.ndarray, shape(1, c, h, w)
 17 |         test image in rgb
 18 |     dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
 19 |         detection results before calibration
 20 |     dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
 21 |         detection results after calibration
 22 |     thresh: float
 23 |         boxes with scores > thresh will be drawn in red 
 24 | 
 25 |     Returns:
 26 |     -------
 27 |     """
 28 | 
 29 |     figure = plt.figure()
 30 |     plt.subplot(121)
 31 |     plt.imshow(im_array)
 32 |     figure.suptitle('Face Detector', fontsize=12, color='r')
 33 |     for i in range(dest1.shape[0]):
 34 |         bbox = dest1[i, 0:4]
 35 |         score = dest1[i, 4]
 36 |         landmarks = dest1[i, 5:]
 37 |         if score > thresh:
 38 |             rect = plt.Rectangle((bbox[0], bbox[1]), 
 39 |                                  bbox[2] - bbox[0],
 40 |                                  bbox[3] - bbox[1], fill=False,
 41 |                                  edgecolor='red', linewidth=0.7)
 42 |             plt.gca().add_patch(rect) # get current Axes and do some modification on it
 43 |             landmarks = landmarks.reshape((5, 2))
 44 |             for j in range(5):
 45 |                 plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow', linewidth=1, marker='x', s = 20)
 46 |                 
 47 |     plt.subplot(122)
 48 |     plt.imshow(im_array)
 49 |     for i in range(dest2.shape[0]):
 50 |         bbox = dest2[i, 0:4]
 51 |         score = dest2[i, 4]
 52 |         landmarks = dest2[i, 5:]
 53 |         if score > thresh:
 54 |             rect = plt.Rectangle((bbox[0], bbox[1]),
 55 |                                  bbox[2] - bbox[0],
 56 |                                  bbox[3] - bbox[1], fill=False,
 57 |                                  edgecolor='red', linewidth=0.7)
 58 |             plt.gca().add_patch(rect)
 59 | 
 60 |             landmarks = landmarks.reshape((5, 2))
 61 |             for j in range(5):
 62 |                 plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow', linewidths=1, marker='x', s=20)
 63 |     
 64 |     plt.show()
 65 |     
 66 |     
 67 | 
 68 | def vis_face(im_array, dets, landmarks=None):
 69 |     """Visualize detection results of an image
 70 | 
 71 |     Parameters:
 72 |     ----------
 73 |     im_array: numpy.ndarray, shape(1, c, h, w)
 74 |         test image in rgb
 75 |     dets: numpy.ndarray([[x1 y1 x2 y2 score landmarks]])
 76 |         detection results before calibration
 77 |     landmarks: numpy.ndarray([landmarks for five facial landmarks])
 78 | 
 79 |     Returns:
 80 |     -------
 81 |     """
 82 |     figure = plt.figure()
 83 |     plt.imshow(im_array)
 84 |     figure.suptitle('Face Detector', fontsize=12, color='r')
 85 |     
 86 |     for i in range(dets.shape[0]):
 87 |         bbox = dets[i, 0:4]
 88 |         rect = plt.Rectangle((bbox[0], bbox[1]),
 89 |                              bbox[2] - bbox[0],
 90 |                              bbox[3] - bbox[1], fill=False,
 91 |                              edgecolor='yellow', linewidth=0.9)
 92 |         plt.gca().add_patch(rect)
 93 |         
 94 |     if landmarks is not None:
 95 |         for i in range(landmarks.shape[0]):
 96 |             landmarks_one = landmarks[i, :]
 97 |             landmarks_one = landmarks_one.reshape((5, 2))
 98 |             for j in range(5):
 99 |                 plt.scatter(landmarks_one[j, 0], landmarks_one[j, 1], c='red',linewidths=1, marker='x', s=5)
100 |     
101 |     plt.show()
102 |        


--------------------------------------------------------------------------------
/tools/imagedb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 19:09:40 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import os
 10 | import numpy as np
 11 | 
 12 | class ImageDB(object):
 13 |     def __init__(self, image_annotation_file, prefix_path='', mode='train'):
 14 |         self.prefix_path = prefix_path
 15 |         self.image_annotation_file = image_annotation_file
 16 |         self.classes = ['__background__', 'face']
 17 |         self.num_classes = 2
 18 |         self.image_set_index = self.load_image_set_index()
 19 |         self.num_images = len(self.image_set_index)
 20 |         self.mode = mode
 21 |         
 22 |         
 23 |     def load_image_set_index(self):
 24 |         ''' Get image index 
 25 |         
 26 |         Returns:
 27 |             image_set_index: str, relative path of image
 28 |         '''
 29 |         assert os.path.exists(self.image_annotation_file), 'Path does not exist: {}'.format(self.image_annotation_file)
 30 |         with open(self.image_annotation_file, 'r') as f:
 31 |             image_set_index = [x.strip().split(' ')[0] for x in f.readlines()]
 32 |         return image_set_index
 33 |     
 34 |     
 35 |     def load_imdb(self):
 36 |         ''' Get and save ground truth image database 
 37 |         
 38 |         Returns:
 39 |             gt_imdb: dict, image database with annotations
 40 |         '''
 41 |         
 42 |         gt_imdb = self.load_annotations()
 43 |         
 44 |         return gt_imdb
 45 |     
 46 |     def real_image_path(self, index):
 47 |         ''' Given image's relative index, return full path of image '''
 48 |         
 49 |         index = index.replace("\\", "/")
 50 | 
 51 |         if not os.path.exists(index):
 52 |             image_file = os.path.join(self.prefix_path, index)
 53 |         else:
 54 |             image_file=index
 55 |         if not image_file.endswith('.jpg'):
 56 |             image_file = image_file + '.jpg'
 57 |         assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
 58 |         
 59 |         return image_file
 60 |     
 61 |     def load_annotations(self, annotation_type=1):
 62 |         ''' Load annotations
 63 |         
 64 |         what's the meaning of annotation_type ? I don't know !
 65 |         Returns:
 66 |             imdb: dict, image database with annotations
 67 |         '''
 68 |         
 69 |         assert os.path.exists(self.image_annotation_file), 'annotations not found at {}'.format(self.image_annotation_file)
 70 |         with open(self.image_annotation_file, 'r') as f:
 71 |             annotations = f.readlines()
 72 | 
 73 | 
 74 |         imdb = []
 75 |         for i in range(self.num_images):
 76 |             annotation = annotations[i].strip().split(' ')
 77 |             index = annotation[0]
 78 |             im_path = self.real_image_path(index)
 79 |             imdb_ = dict()
 80 |             imdb_['image'] = im_path
 81 | 
 82 |             if self.mode == 'test':
 83 |                 pass
 84 |             else:
 85 |                 label = annotation[1]
 86 |                 imdb_['label'] = int(label)
 87 |                 imdb_['flipped'] = False
 88 |                 imdb_['bbox_target'] = np.zeros((4,))
 89 |                 imdb_['landmark_target'] = np.zeros((10,))
 90 |                 if len(annotation[2:])==4:
 91 |                     bbox_target = annotation[2:6]
 92 |                     imdb_['bbox_target'] = np.array(bbox_target).astype(float)
 93 |                 if len(annotation[2:])==14:
 94 |                     bbox_target = annotation[2:6]
 95 |                     imdb_['bbox_target'] = np.array(bbox_target).astype(float)
 96 |                     landmark = annotation[6:]
 97 |                     imdb_['landmark_target'] = np.array(landmark).astype(float)
 98 |             imdb.append(imdb_)
 99 |         return imdb
100 |     
101 |     
102 |     def append_flipped_images(self, imdb):
103 |         ''' append flipped images to imdb 
104 |         
105 |         Returns:
106 |             imdb: dict, image database with flipped image annotations
107 |         '''
108 |         print 'append flipped images to imdb ', len(imdb)
109 |         for i in range(len(imdb)):
110 |             imdb_ = imdb[i]
111 |             m_bbox = imdb_['bbox_target'].copy()
112 |             m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0]
113 |             
114 |             landmark_ = imdb_['landmark_target'].copy()
115 |             landmark_ = landmark_.reshape((5, 2))
116 |             landmark_ = np.asarray([(1 - x, y) for (x, y) in landmark_])
117 |             landmark_[[0, 1]] = landmark_[[1, 0]]
118 |             landmark_[[3, 4]] = landmark_[[4, 3]]
119 | 
120 |             item = {'image': imdb_['image'],
121 |                      'label': imdb_['label'],
122 |                      'bbox_target': m_bbox,
123 |                      'landmark_target': landmark_.reshape((10)),
124 |                      'flipped': True}
125 | 
126 |             imdb.append(item)
127 |         self.image_set_index *= 2
128 |         print 'after flipped images appended to imdb ', len(imdb)
129 |         
130 |         return imdb
131 |         
132 |         


--------------------------------------------------------------------------------
/tools/image_reader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 22:02:40 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import numpy as np
 10 | import cv2
 11 | 
 12 | class TrainImageReader:
 13 |     def __init__(self, imdb, im_size, batch_size=256, shuffle=False):
 14 | 
 15 |         self.imdb = imdb
 16 |         self.batch_size = batch_size
 17 |         self.im_size = im_size
 18 |         self.shuffle = shuffle
 19 | 
 20 |         self.cur = 0
 21 |         self.size = len(imdb)
 22 |         self.index = np.arange(self.size)
 23 |         self.num_classes = 2
 24 | 
 25 |         self.batch = None
 26 |         self.data = None
 27 |         self.label = None
 28 | 
 29 |         self.label_names = ['label', 'bbox_target', 'landmark_target']
 30 |         self.reset()
 31 |         self.get_batch()
 32 | 
 33 |     def reset(self):
 34 |         self.cur = 0
 35 |         if self.shuffle:
 36 |             np.random.shuffle(self.index)
 37 | 
 38 |     def iter_next(self):
 39 |         #return self.cur + self.batch_size <= self.size #   can't load the last epoch in the condition
 40 |         return self.cur < self.size
 41 | 
 42 |     def __iter__(self):
 43 |         return self
 44 | 
 45 |     def __next__(self):
 46 |         return self.next()
 47 | 
 48 |     def next(self):
 49 |         if self.iter_next():
 50 |             self.get_batch()
 51 |             self.cur = min(self.cur + self.batch_size, self.size)
 52 |             return self.data, self.label
 53 |         else:
 54 |             raise StopIteration
 55 | 
 56 |     def getindex(self):
 57 |         return self.cur / self.batch_size
 58 | 
 59 |     def getpad(self):
 60 |         ''' pad for the last batch '''
 61 |         if self.cur + self.batch_size > self.size:
 62 |             return self.cur + self.batch_size - self.size
 63 |         else:
 64 |             return 0
 65 | 
 66 |     def get_batch(self):
 67 |         cur_from = self.cur
 68 |         cur_to = min(cur_from + self.batch_size, self.size)
 69 |         imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
 70 |         data, label = get_minibatch(imdb)
 71 |         self.data = data['data']
 72 |         self.label = [label[name] for name in self.label_names]
 73 |         
 74 |         
 75 | 
 76 | def get_minibatch(imdb):
 77 |     num_images = len(imdb)
 78 |     processed_ims = list()
 79 |     cls_label = list()
 80 |     bbox_reg_target = list()
 81 |     landmark_reg_target = list()
 82 | 
 83 |     for i in range(num_images):
 84 |         im = cv2.imread(imdb[i]['image'])
 85 | 
 86 |         if imdb[i]['flipped']:
 87 |             im = im[:, ::-1, :]
 88 |             #im = im.transpose(Image.FLIP_LEFT_RIGHT)
 89 | 
 90 |         cls = imdb[i]['label']
 91 |         bbox_target = imdb[i]['bbox_target']
 92 |         landmark = imdb[i]['landmark_target']
 93 | 
 94 |         processed_ims.append(im)
 95 |         cls_label.append(cls)
 96 |         bbox_reg_target.append(bbox_target)
 97 |         landmark_reg_target.append(landmark)
 98 | 
 99 |     im_array = np.asarray(processed_ims)
100 |     label_array = np.array(cls_label)
101 |     bbox_target_array = np.vstack(bbox_reg_target)
102 |     landmark_target_array = np.vstack(landmark_reg_target)
103 | 
104 |     data = {'data': im_array}
105 |     label = {'label': label_array,
106 |              'bbox_target': bbox_target_array,
107 |              'landmark_target': landmark_target_array
108 |              }
109 | 
110 |     return data, label
111 | 
112 | 
113 | 
114 | class TestImageLoader:
115 |     def __init__(self, imdb, batch_size=1, shuffle=False):
116 |         self.imdb = imdb
117 |         self.batch_size = batch_size
118 |         self.shuffle = shuffle
119 |         self.size = len(imdb)
120 |         self.index = np.arange(self.size)
121 | 
122 |         self.cur = 0
123 |         self.data = None
124 |         self.label = None
125 | 
126 |         self.reset()
127 |         self.get_batch()
128 | 
129 |     def reset(self):
130 |         self.cur = 0
131 |         if self.shuffle:
132 |             np.random.shuffle(self.index)
133 | 
134 |     def iter_next(self):
135 |         return self.cur + self.batch_size <= self.size
136 | 
137 |     def __iter__(self):
138 |         return self
139 | 
140 |     def __next__(self):
141 |         return self.next()
142 | 
143 |     def next(self):
144 |         if self.iter_next():
145 |             self.get_batch()
146 |             self.cur += self.batch_size
147 |             return self.data
148 |         else:
149 |             raise StopIteration
150 | 
151 |     def getindex(self):
152 |         return self.cur / self.batch_size
153 | 
154 |     def getpad(self):
155 |         if self.cur + self.batch_size > self.size:
156 |             return self.cur + self.batch_size - self.size
157 |         else:
158 |             return 0
159 | 
160 |     def get_batch(self):
161 |         cur_from = self.cur
162 |         cur_to = min(cur_from + self.batch_size, self.size)
163 |         imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
164 |         data = get_testbatch(imdb)
165 |         self.data = data['data']
166 |         
167 |         
168 | def get_testbatch(imdb):
169 |     assert len(imdb) == 1, "Single batch only"
170 |     im = cv2.imread(imdb[0]['image'])
171 |     data = {'data': im}
172 |     return data
173 |     


--------------------------------------------------------------------------------
/train_net/train_p_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 18:48:03 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 11 | 
 12 | import os
 13 | import argparse
 14 | import datetime
 15 | import torch
 16 | import config
 17 | from tools.image_reader import TrainImageReader
 18 | from train_net.models import PNet, LossFn
 19 | from train_net.models import compute_accuracy
 20 | import tools.image_tools as image_tools
 21 | from tools.imagedb import ImageDB
 22 | 
 23 | 
 24 | def train_p_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True):
 25 |     
 26 |     # initialize the PNet ,loss function and set optimization for this network
 27 |     if not os.path.exists(model_store_path):
 28 |         os.makedirs(model_store_path)
 29 |     net = PNet(is_train=True, use_cuda=use_cuda)
 30 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 31 |     if use_cuda:
 32 |         net.to(device)
 33 |     lossfn = LossFn()
 34 |     optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
 35 |     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1)
 36 |     # load training image
 37 |     imagedb = ImageDB(annotation_file)
 38 |     gt_imdb = imagedb.load_imdb()
 39 |     gt_imdb = imagedb.append_flipped_images(gt_imdb)
 40 |     train_data = TrainImageReader(gt_imdb, 12, batch_size, shuffle=True)
 41 |     
 42 |     # train net 
 43 |     net.train()
 44 |     for cur_epoch in range(end_epoch):
 45 |         scheduler.step()
 46 |         train_data.reset() # shuffle the data for this epoch
 47 |         for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data):
 48 |             im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])]
 49 |             im_tensor = torch.stack(im_tensor)
 50 |             
 51 |             gt_label = torch.from_numpy(gt_label).float()
 52 |             gt_bbox = torch.from_numpy(gt_bbox).float()
 53 |             # gt_landmark = torch.from_numpy(gt_landmark).float()
 54 |             if use_cuda:
 55 |                 im_tensor = im_tensor.to(device)
 56 |                 gt_label = gt_label.to(device)
 57 |                 gt_bbox = gt_bbox.to(device)
 58 |             
 59 |             cls_pred, box_offset_pred = net(im_tensor)
 60 |             cls_loss = lossfn.cls_loss(gt_label, cls_pred)
 61 |             box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred)
 62 |             all_loss = cls_loss * 1.0 + box_offset_loss * 0.5
 63 |             
 64 |             if batch_idx % frequent == 0:
 65 |                 accuracy = compute_accuracy(cls_pred, gt_label)
 66 |                 print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, lr: %.6f" % 
 67 |                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 
 68 |                        all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), scheduler.get_lr()[0]))
 69 |             
 70 |             optimizer.zero_grad()
 71 |             all_loss.backward()
 72 |             optimizer.step()
 73 |         
 74 |         # TODO: add validation set for trained model   
 75 |         
 76 |         if (cur_epoch + 1) % 10 == 0:
 77 |             torch.save(net.state_dict(), os.path.join(model_store_path,"pnet_model_epoch_%d.pt" % (cur_epoch + 1)))
 78 | 
 79 |     torch.save(net.state_dict(), os.path.join(model_store_path, 'pnet_nodel_final.pt'))
 80 | 
 81 | 
 82 | 
 83 | 
 84 | def parse_args():
 85 |     parser = argparse.ArgumentParser(description='Train PNet',
 86 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 87 | 
 88 | 
 89 |     parser.add_argument('--anno_file', dest='annotation_file', help='training data annotation file', 
 90 |                         default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), type=str)
 91 |     parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
 92 |                         default=config.MODLE_STORE_DIR, type=str)
 93 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
 94 |                         default=config.END_EPOCH, type=int)
 95 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
 96 |                         default=200, type=int)
 97 |     parser.add_argument('--base_lr', dest='base_lr', help='learning rate',
 98 |                         default=config.TRAIN_LR, type=float)
 99 |     parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
100 |                         default=config.TRAIN_BATCH_SIZE, type=int)
101 |     parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
102 |                         default=config.USE_CUDA, type=bool)
103 | 
104 |     args = parser.parse_args()
105 |     return args
106 | 
107 | if __name__ == '__main__':
108 |     args = parse_args()
109 |     # print('train Pnet argument:')
110 |     # print(args)
111 | 
112 |     train_p_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
113 |                 end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
114 | 


--------------------------------------------------------------------------------
/train_net/train_r_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu May 24 19:27:03 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | import sys
  9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 10 | 
 11 | import os
 12 | import argparse
 13 | import datetime
 14 | import torch
 15 | import config
 16 | from tools.image_reader import TrainImageReader
 17 | from train_net.models import RNet, LossFn
 18 | from train_net.models import compute_accuracy
 19 | import tools.image_tools as image_tools
 20 | from tools.imagedb import ImageDB
 21 | 
 22 | 
 23 | def train_r_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True):
 24 |     
 25 |     # initialize the RNet ,loss function and set optimization for this network
 26 |     if not os.path.exists(model_store_path):
 27 |         os.makedirs(model_store_path)
 28 |         
 29 |     net = RNet(is_train=True, use_cuda=use_cuda)
 30 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 31 |     if use_cuda:
 32 |         net.to(device)
 33 |     lossfn = LossFn()
 34 |     optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
 35 |     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1)
 36 |     # load training image
 37 |     imagedb = ImageDB(annotation_file)
 38 |     gt_imdb = imagedb.load_imdb()
 39 |     gt_imdb = imagedb.append_flipped_images(gt_imdb)
 40 |     train_data = TrainImageReader(gt_imdb, 24, batch_size, shuffle=True)
 41 |     
 42 |     # train net 
 43 |     net.train()
 44 |     for cur_epoch in range(end_epoch):
 45 |         scheduler.step()
 46 |         train_data.reset() # shuffle the data for this epoch
 47 |         for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data):
 48 |             im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])]
 49 |             im_tensor = torch.stack(im_tensor)
 50 |             
 51 |             gt_label = torch.from_numpy(gt_label).float()
 52 |             gt_bbox = torch.from_numpy(gt_bbox).float()
 53 |             # gt_landmark = torch.from_numpy(gt_landmark).float()
 54 |             if use_cuda:
 55 |                 im_tensor = im_tensor.to(device)
 56 |                 gt_label = gt_label.to(device)
 57 |                 gt_bbox = gt_bbox.to(device)
 58 |             
 59 |             cls_pred, box_offset_pred = net(im_tensor)
 60 |             cls_loss = lossfn.cls_loss(gt_label, cls_pred)
 61 |             box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred)
 62 |             all_loss = cls_loss * 1.0 + box_offset_loss * 0.5
 63 |             
 64 |             if batch_idx % frequent == 0:
 65 |                 accuracy = compute_accuracy(cls_pred, gt_label)
 66 |                 print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, lr: %.6f" % 
 67 |                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 
 68 |                        all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), scheduler.get_lr()[0]))
 69 |             
 70 |             optimizer.zero_grad()
 71 |             all_loss.backward()
 72 |             optimizer.step()
 73 |         
 74 |         # TODO: add validation set for trained model   
 75 |         
 76 |         if (cur_epoch + 1) % 10 == 0:
 77 |             torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_model_epoch_%d.pt" % (cur_epoch + 1)))
 78 | 
 79 |     torch.save(net.state_dict(), os.path.join(model_store_path, 'rnet_model_final.pt'))
 80 | 
 81 | 
 82 | 
 83 | 
 84 | def parse_args():
 85 |     parser = argparse.ArgumentParser(description='Train RNet',
 86 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 87 | 
 88 | 
 89 |     parser.add_argument('--anno_file', dest='annotation_file',help='training data annotation file', 
 90 |                         default=os.path.join(config.ANNO_STORE_DIR,config.RNET_TRAIN_IMGLIST_FILENAME), type=str)
 91 |     parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
 92 |                         default=config.MODLE_STORE_DIR, type=str)
 93 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
 94 |                         default=config.END_EPOCH, type=int)
 95 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
 96 |                         default=200, type=int)
 97 |     parser.add_argument('--base_lr', dest='base_lr', help='learning rate',
 98 |                         default=config.TRAIN_LR, type=float)
 99 |     parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
100 |                         default=config.TRAIN_BATCH_SIZE, type=int)
101 |     parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
102 |                         default=config.USE_CUDA, type=bool)
103 | 
104 |     args = parser.parse_args()
105 |     return args
106 | 
107 | if __name__ == '__main__':
108 |     args = parse_args()
109 |     # print('train Rnet argument:')
110 |     # print(args)
111 | 
112 |     train_r_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
113 |                 end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
114 | 


--------------------------------------------------------------------------------
/train_net/train_o_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri May 25 21:29:56 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 11 | 
 12 | import os
 13 | import argparse
 14 | import datetime
 15 | import torch
 16 | import config
 17 | from tools.image_reader import TrainImageReader
 18 | from train_net.models import ONet, LossFn
 19 | from train_net.models import compute_accuracy
 20 | import tools.image_tools as image_tools
 21 | from tools.imagedb import ImageDB
 22 | 
 23 | def train_o_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True):
 24 |     
 25 |     # initialize the ONet ,loss function and set optimization for this network
 26 |     if not os.path.exists(model_store_path):
 27 |         os.makedirs(model_store_path)
 28 |         
 29 |     net = ONet(is_train=True, use_cuda=use_cuda)
 30 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 31 |     if use_cuda:
 32 |         net.to(device)
 33 |     lossfn = LossFn()
 34 |     optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
 35 |     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1)
 36 |     # load training image
 37 |     imagedb = ImageDB(annotation_file)
 38 |     gt_imdb = imagedb.load_imdb()
 39 |     gt_imdb = imagedb.append_flipped_images(gt_imdb)
 40 |     train_data = TrainImageReader(gt_imdb, 48, batch_size, shuffle=True)
 41 | 
 42 |     # train net 
 43 |     net.train()
 44 |     for cur_epoch in range(end_epoch):
 45 |         scheduler.step()
 46 |         train_data.reset() # shuffle the data for this epoch
 47 |         for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data):
 48 |             im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])]
 49 |             im_tensor = torch.stack(im_tensor)
 50 |             
 51 |             gt_label = torch.from_numpy(gt_label).float()
 52 |             gt_bbox = torch.from_numpy(gt_bbox).float()
 53 |             gt_landmark = torch.from_numpy(gt_landmark).float()
 54 |             if use_cuda:
 55 |                 im_tensor = im_tensor.to(device)
 56 |                 gt_label = gt_label.to(device)
 57 |                 gt_bbox = gt_bbox.to(device)
 58 |                 gt_landmark = gt_landmark.to(device)
 59 |             
 60 |             cls_pred, box_offset_pred, landmark_offset_pred = net(im_tensor)
 61 |             cls_loss = lossfn.cls_loss(gt_label, cls_pred)
 62 |             box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred)
 63 |             landmark_loss = lossfn.landmark_loss(gt_label, gt_landmark, landmark_offset_pred)
 64 |             all_loss = cls_loss * 0.8 + box_offset_loss * 0.6 + landmark_loss * 1.5
 65 |             
 66 |             if batch_idx % frequent == 0:
 67 |                 accuracy = compute_accuracy(cls_pred, gt_label)
 68 |                 print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, landmark_loss: %.6f, lr: %.6f" % 
 69 |                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 
 70 |                        all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), landmark_loss.data.tolist(), scheduler.get_lr()[0]))
 71 |             
 72 |             optimizer.zero_grad()
 73 |             all_loss.backward()
 74 |             optimizer.step()
 75 |         
 76 |         # TODO: add validation set for trained model   
 77 |         
 78 |         if (cur_epoch + 1) % 10 == 0:
 79 |             torch.save(net.state_dict(), os.path.join(model_store_path,"onet_model_epoch_%d.pt" % (cur_epoch + 1)))
 80 | 
 81 |     torch.save(net.state_dict(), os.path.join(model_store_path, 'onet_model_final.pt'))
 82 |     
 83 |     
 84 |     
 85 | def parse_args():
 86 |     parser = argparse.ArgumentParser(description='Train ONet',
 87 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 88 | 
 89 | 
 90 |     parser.add_argument('--anno_file', dest='annotation_file', help='training data annotation file',
 91 |                         default=os.path.join(config.ANNO_STORE_DIR,config.ONET_TRAIN_IMGLIST_FILENAME), type=str)
 92 |     parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
 93 |                         default=config.MODLE_STORE_DIR, type=str)
 94 |     parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
 95 |                         default=config.END_EPOCH, type=int)
 96 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
 97 |                         default=200, type=int)
 98 |     parser.add_argument('--base_lr', dest='base_lr', help='learning rate',
 99 |                         default=config.TRAIN_LR, type=float)
100 |     parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
101 |                         default=config.TRAIN_BATCH_SIZE, type=int)
102 |     parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
103 |                         default=config.USE_CUDA, type=bool)
104 | 
105 |     args = parser.parse_args()
106 |     return args
107 | 
108 | if __name__ == '__main__':
109 |     args = parse_args()
110 |     # print('train Onet argument:')
111 |     # print(args)
112 | 
113 |     train_o_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
114 |                 end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda)


--------------------------------------------------------------------------------
/prepare_data/gen_landmark_12.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat May 26 14:38:20 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN')
 11 | 
 12 | import os
 13 | import cv2
 14 | import numpy as np
 15 | import sys
 16 | import numpy.random as npr
 17 | import argparse
 18 | import config
 19 | import tools.utils as utils
 20 | 
 21 | 
 22 | def gen_data(anno_file, data_dir, prefix):
 23 | 
 24 |     size = 12
 25 |     
 26 |     landmark_imgs_save_dir = os.path.join(data_dir,"12/landmark")
 27 |     if not os.path.exists(landmark_imgs_save_dir):
 28 |         os.makedirs(landmark_imgs_save_dir)
 29 | 
 30 |     anno_dir = config.ANNO_STORE_DIR
 31 |     if not os.path.exists(anno_dir):
 32 |         os.makedirs(anno_dir)
 33 | 
 34 |     landmark_anno_filename = config.PNET_LANDMARK_ANNO_FILENAME
 35 |     save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
 36 | 
 37 |     f = open(save_landmark_anno, 'w')
 38 | 
 39 |     with open(anno_file, 'r') as f2:
 40 |         annotations = f2.readlines()
 41 | 
 42 |     num = len(annotations)
 43 |     print("%d total images" % num)
 44 | 
 45 |     l_idx =0
 46 |     idx = 0
 47 |     # image_path bbox landmark(5*2)
 48 |     for annotation in annotations:
 49 |         # print imgPath
 50 | 
 51 |         annotation = annotation.strip().split(' ')
 52 |         assert len(annotation)==15,"each line should have 15 element"
 53 |         im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
 54 | 
 55 |         gt_box = map(float, annotation[1:5])
 56 |         # the bounging box in original anno_file is [left, right, top, bottom]
 57 |         gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 
 58 |         gt_box = np.array(gt_box, dtype=np.int32)
 59 |         
 60 |         landmark = map(float, annotation[5:])
 61 |         landmark = np.array(landmark, dtype=np.float)
 62 | 
 63 |         img = cv2.imread(im_path)
 64 |         assert (img is not None)
 65 | 
 66 |         height, width, channel = img.shape
 67 |         # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
 68 |         # crop_face = cv2.resize(crop_face,(size,size))
 69 | 
 70 |         idx = idx + 1
 71 |         if idx % 100 == 0:
 72 |             print("%d images done, landmark images: %d"%(idx,l_idx))
 73 | 
 74 |         x1, y1, x2, y2 = gt_box
 75 | 
 76 |         # gt's width
 77 |         w = x2 - x1
 78 |         # gt's height
 79 |         h = y2 - y1
 80 |         if max(w, h) < 40 or x1 < 0 or y1 < 0:
 81 |             continue
 82 |         # random shift
 83 |         for i in range(20):
 84 |             bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
 85 |             delta_x = npr.randint(-w * 0.2, w * 0.2)
 86 |             delta_y = npr.randint(-h * 0.2, h * 0.2)
 87 |             nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
 88 |             ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
 89 | 
 90 |             nx2 = nx1 + bbox_size
 91 |             ny2 = ny1 + bbox_size
 92 |             if nx2 > width or ny2 > height:
 93 |                 continue
 94 |             crop_box = np.array([nx1, ny1, nx2, ny2])
 95 |             cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
 96 |             resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
 97 | 
 98 |             offset_x1 = (x1 - nx1) / float(bbox_size)
 99 |             offset_y1 = (y1 - ny1) / float(bbox_size)
100 |             offset_x2 = (x2 - nx2) / float(bbox_size)
101 |             offset_y2 = (y2 - ny2) / float(bbox_size)
102 | 
103 |             offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
104 |             offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
105 | 
106 |             offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
107 |             offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
108 | 
109 |             offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
110 |             offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
111 | 
112 |             offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
113 |             offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
114 | 
115 |             offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
116 |             offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
117 | 
118 | 
119 |             # cal iou
120 |             iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
121 |             if iou > 0.65:
122 |                 save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
123 |                 cv2.imwrite(save_file, resized_im)
124 | 
125 |                 f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
126 |                 (offset_x1, offset_y1, offset_x2, offset_y2, \
127 |                 offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
128 | 
129 |                 l_idx += 1
130 | 
131 |     f.close()
132 | 
133 | 
134 | 
135 | 
136 | def parse_args():
137 |     parser = argparse.ArgumentParser(description='Test mtcnn',
138 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
139 | 
140 |     parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder',
141 |                         default=config.TRAIN_DATA_DIR, type=str)
142 |     parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file',
143 |                         default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str)
144 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
145 |                         default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str)
146 | 
147 | 
148 |     args = parser.parse_args()
149 |     return args
150 | 
151 | if __name__ == '__main__':
152 |     args = parse_args()
153 | 
154 |     gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
155 | 
156 | 


--------------------------------------------------------------------------------
/prepare_data/gen_landmark_48.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri May 25 09:05:31 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | import sys
  9 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN')
 10 | 
 11 | import os
 12 | import cv2
 13 | import numpy as np
 14 | import sys
 15 | import numpy.random as npr
 16 | import argparse
 17 | import config
 18 | import tools.utils as utils
 19 | 
 20 | 
 21 | def gen_data(anno_file, data_dir, prefix):
 22 | 
 23 |     size = 48
 24 |     
 25 |     landmark_imgs_save_dir = os.path.join(data_dir,"48/landmark")
 26 |     if not os.path.exists(landmark_imgs_save_dir):
 27 |         os.makedirs(landmark_imgs_save_dir)
 28 | 
 29 |     anno_dir = config.ANNO_STORE_DIR
 30 |     if not os.path.exists(anno_dir):
 31 |         os.makedirs(anno_dir)
 32 | 
 33 |     landmark_anno_filename = config.ONET_LANDMARK_ANNO_FILENAME
 34 |     save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
 35 | 
 36 |     f = open(save_landmark_anno, 'w')
 37 | 
 38 |     with open(anno_file, 'r') as f2:
 39 |         annotations = f2.readlines()
 40 | 
 41 |     num = len(annotations)
 42 |     print("%d total images" % num)
 43 | 
 44 |     l_idx =0
 45 |     idx = 0
 46 |     # image_path bbox landmark(5*2)
 47 |     for annotation in annotations:
 48 |         # print imgPath
 49 | 
 50 |         annotation = annotation.strip().split(' ')
 51 |         assert len(annotation)==15,"each line should have 15 element"
 52 |         im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
 53 | 
 54 |         gt_box = map(float, annotation[1:5])
 55 |         # the bounging box in original anno_file is [left, right, top, bottom]
 56 |         gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 
 57 |         gt_box = np.array(gt_box, dtype=np.int32)
 58 |         
 59 |         landmark = map(float, annotation[5:])
 60 |         landmark = np.array(landmark, dtype=np.float)
 61 | 
 62 |         img = cv2.imread(im_path)
 63 |         assert (img is not None)
 64 | 
 65 |         height, width, channel = img.shape
 66 |         # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
 67 |         # crop_face = cv2.resize(crop_face,(size,size))
 68 | 
 69 |         idx = idx + 1
 70 |         if idx % 100 == 0:
 71 |             print("%d images done, landmark images: %d"%(idx,l_idx))
 72 | 
 73 |         x1, y1, x2, y2 = gt_box
 74 | 
 75 |         # gt's width
 76 |         w = x2 - x1
 77 |         # gt's height
 78 |         h = y2 - y1
 79 |         if max(w, h) < 40 or x1 < 0 or y1 < 0:
 80 |             continue
 81 |         # random shift
 82 |         for i in range(20):
 83 |             bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
 84 |             delta_x = npr.randint(-w * 0.2, w * 0.2)
 85 |             delta_y = npr.randint(-h * 0.2, h * 0.2)
 86 |             nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
 87 |             ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
 88 | 
 89 |             nx2 = nx1 + bbox_size
 90 |             ny2 = ny1 + bbox_size
 91 |             if nx2 > width or ny2 > height:
 92 |                 continue
 93 |             crop_box = np.array([nx1, ny1, nx2, ny2])
 94 |             cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
 95 |             resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
 96 | 
 97 |             offset_x1 = (x1 - nx1) / float(bbox_size)
 98 |             offset_y1 = (y1 - ny1) / float(bbox_size)
 99 |             offset_x2 = (x2 - nx2) / float(bbox_size)
100 |             offset_y2 = (y2 - ny2) / float(bbox_size)
101 | 
102 |             offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
103 |             offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
104 | 
105 |             offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
106 |             offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
107 | 
108 |             offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
109 |             offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
110 | 
111 |             offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
112 |             offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
113 | 
114 |             offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
115 |             offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
116 | 
117 | 
118 |             # cal iou
119 |             iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
120 |             if iou > 0.65:
121 |                 save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
122 |                 cv2.imwrite(save_file, resized_im)
123 | 
124 |                 f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
125 |                 (offset_x1, offset_y1, offset_x2, offset_y2, \
126 |                 offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
127 | 
128 |                 l_idx += 1
129 | 
130 |     f.close()
131 | 
132 | 
133 | 
134 | 
135 | def parse_args():
136 |     parser = argparse.ArgumentParser(description='Test mtcnn',
137 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
138 | 
139 |     parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder',
140 |                         default=config.TRAIN_DATA_DIR, type=str)
141 |     parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file',
142 |                         default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str)
143 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
144 |                         default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str)
145 | 
146 | 
147 |     args = parser.parse_args()
148 |     return args
149 | 
150 | if __name__ == '__main__':
151 |     args = parse_args()
152 | 
153 |     gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/prepare_data/gen_landmark_24.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat May 26 14:38:08 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN')
 11 | 
 12 | import os
 13 | import cv2
 14 | import numpy as np
 15 | import sys
 16 | import numpy.random as npr
 17 | import argparse
 18 | import config
 19 | import tools.utils as utils
 20 | 
 21 | 
 22 | def gen_data(anno_file, data_dir, prefix):
 23 | 
 24 |     size = 24
 25 |     
 26 |     landmark_imgs_save_dir = os.path.join(data_dir,"24/landmark")
 27 |     if not os.path.exists(landmark_imgs_save_dir):
 28 |         os.makedirs(landmark_imgs_save_dir)
 29 | 
 30 |     anno_dir = config.ANNO_STORE_DIR
 31 |     if not os.path.exists(anno_dir):
 32 |         os.makedirs(anno_dir)
 33 | 
 34 |     landmark_anno_filename = config.RNET_LANDMARK_ANNO_FILENAME
 35 |     save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
 36 | 
 37 |     f = open(save_landmark_anno, 'w')
 38 | 
 39 |     with open(anno_file, 'r') as f2:
 40 |         annotations = f2.readlines()
 41 | 
 42 |     num = len(annotations)
 43 |     print("%d total images" % num)
 44 | 
 45 |     l_idx =0
 46 |     idx = 0
 47 |     # image_path bbox landmark(5*2)
 48 |     for annotation in annotations:
 49 |         # print imgPath
 50 | 
 51 |         annotation = annotation.strip().split(' ')
 52 |         assert len(annotation)==15,"each line should have 15 element"
 53 |         im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
 54 | 
 55 |         gt_box = map(float, annotation[1:5])
 56 |         # the bounging box in original anno_file is [left, right, top, bottom]
 57 |         gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 
 58 |         gt_box = np.array(gt_box, dtype=np.int32)
 59 |         
 60 |         landmark = map(float, annotation[5:])
 61 |         landmark = np.array(landmark, dtype=np.float)
 62 | 
 63 |         img = cv2.imread(im_path)
 64 |         assert (img is not None)
 65 | 
 66 |         height, width, channel = img.shape
 67 |         # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
 68 |         # crop_face = cv2.resize(crop_face,(size,size))
 69 | 
 70 |         idx = idx + 1
 71 |         if idx % 100 == 0:
 72 |             print("%d images done, landmark images: %d"%(idx,l_idx))
 73 | 
 74 |         x1, y1, x2, y2 = gt_box
 75 | 
 76 |         # gt's width
 77 |         w = x2 - x1
 78 |         # gt's height
 79 |         h = y2 - y1
 80 |         if max(w, h) < 40 or x1 < 0 or y1 < 0:
 81 |             continue
 82 |         # random shift
 83 |         for i in range(20):
 84 |             bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
 85 |             delta_x = npr.randint(-w * 0.2, w * 0.2)
 86 |             delta_y = npr.randint(-h * 0.2, h * 0.2)
 87 |             nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
 88 |             ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
 89 | 
 90 |             nx2 = nx1 + bbox_size
 91 |             ny2 = ny1 + bbox_size
 92 |             if nx2 > width or ny2 > height:
 93 |                 continue
 94 |             crop_box = np.array([nx1, ny1, nx2, ny2])
 95 |             cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
 96 |             resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
 97 | 
 98 |             offset_x1 = (x1 - nx1) / float(bbox_size)
 99 |             offset_y1 = (y1 - ny1) / float(bbox_size)
100 |             offset_x2 = (x2 - nx2) / float(bbox_size)
101 |             offset_y2 = (y2 - ny2) / float(bbox_size)
102 | 
103 |             offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
104 |             offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
105 | 
106 |             offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
107 |             offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
108 | 
109 |             offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
110 |             offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
111 | 
112 |             offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
113 |             offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
114 | 
115 |             offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
116 |             offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
117 | 
118 | 
119 |             # cal iou
120 |             iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
121 |             if iou > 0.65:
122 |                 save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
123 |                 cv2.imwrite(save_file, resized_im)
124 | 
125 |                 f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
126 |                 (offset_x1, offset_y1, offset_x2, offset_y2, \
127 |                 offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
128 | 
129 |                 l_idx += 1
130 | 
131 |     f.close()
132 | 
133 | 
134 | 
135 | 
136 | def parse_args():
137 |     parser = argparse.ArgumentParser(description='Test mtcnn',
138 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
139 | 
140 |     parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder',
141 |                         default=config.TRAIN_DATA_DIR, type=str)
142 |     parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file',
143 |                         default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str)
144 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
145 |                         default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str)
146 | 
147 | 
148 |     args = parser.parse_args()
149 |     return args
150 | 
151 | if __name__ == '__main__':
152 |     args = parse_args()
153 | 
154 |     gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/prepare_data/gen_Pnet_train_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 09:35:11 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | import sys
  9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 10 | 
 11 | import argparse
 12 | import numpy as np
 13 | import cv2
 14 | import os
 15 | import numpy.random as npr
 16 | 
 17 | import config
 18 | from tools.utils import IoU
 19 | 
 20 | 
 21 | def gen_pnet_data(data_dir, anno_file, prefix):
 22 |     neg_save_dir =  os.path.join(data_dir, "12/negative")
 23 |     pos_save_dir =  os.path.join(data_dir, "12/positive")
 24 |     part_save_dir = os.path.join(data_dir, "12/part")
 25 | 
 26 |     for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
 27 |         if not os.path.exists(dir_path):
 28 |             os.makedirs(dir_path)
 29 | 
 30 |     save_dir = os.path.join(data_dir,"pnet")
 31 |     if not os.path.exists(save_dir):
 32 |         os.mkdir(save_dir)
 33 | 
 34 |     post_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_POSTIVE_ANNO_FILENAME)
 35 |     neg_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_NEGATIVE_ANNO_FILENAME)
 36 |     part_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_PART_ANNO_FILENAME)
 37 |     
 38 |     f1 = open(post_save_file, 'w')
 39 |     f2 = open(neg_save_file, 'w')
 40 |     f3 = open(part_save_file, 'w')
 41 | 
 42 |     with open(anno_file, 'r') as f:
 43 |         annotations = f.readlines()
 44 | 
 45 |     num = len(annotations)
 46 |     print("%d pics in total" % num)
 47 |     
 48 |     p_idx = 0   # positive examples index
 49 |     n_idx = 0   # negative examples index
 50 |     d_idx = 0   # partface examples index
 51 |     idx = 0     # pics index
 52 |     box_idx = 0 # boxes index
 53 |     
 54 |     for annotation in annotations:
 55 |     # for i in range(100):
 56 |         annotation = annotation.strip().split(' ')
 57 |         # annotation = annotations[i].strip().split(' ')
 58 |         im_path = os.path.join(prefix,annotation[0])
 59 |         bbox = list(map(float, annotation[1:]))
 60 |         boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4)
 61 |         img = cv2.imread(im_path)
 62 |         idx += 1
 63 | 
 64 |         height, width, channel = img.shape
 65 | 
 66 |         neg_num = 0
 67 |         while neg_num < 50:
 68 |             size = npr.randint(12, min(width, height) / 2)
 69 |             nx = npr.randint(0, width - size)
 70 |             ny = npr.randint(0, height - size)
 71 |             crop_box = np.array([nx, ny, nx + size, ny + size])
 72 | 
 73 |             Iou = IoU(crop_box, boxes)
 74 |             
 75 |             if np.max(Iou) < 0.3:
 76 |                 # Iou with all gts must below 0.3
 77 |                 save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
 78 |                 f2.write(save_file + ' 0\n')
 79 |                 cropped_im = img[ny : ny + size, nx : nx + size, :]
 80 |                 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
 81 |                 cv2.imwrite(save_file, resized_im)
 82 |                 n_idx += 1
 83 |                 neg_num += 1
 84 | 
 85 |         for box in boxes:
 86 |             # box (x_left, y_top, x_right, y_bottom)
 87 |             x1, y1, x2, y2 = box
 88 |             w = x2 - x1
 89 |             h = y2 - y1
 90 | 
 91 |             # ignore small faces
 92 |             # in case the ground truth boxes of small faces are not accurate
 93 |             if max(w, h) < 40 or x1 < 0 or y1 < 0:
 94 |                 continue
 95 | 
 96 |             # generate negative examples that have overlap with gt
 97 |             for i in range(5):
 98 |                 size = npr.randint(12,  min(width, height) / 2)
 99 |                 # delta_x and delta_y are offsets of (x1, y1)
100 |                 delta_x = npr.randint(max(-size, -x1), w)
101 |                 delta_y = npr.randint(max(-size, -y1), h)
102 |                 nx1 = max(0, x1 + delta_x)
103 |                 ny1 = max(0, y1 + delta_y)
104 | 
105 |                 if nx1 + size > width or ny1 + size > height:
106 |                     continue
107 |                 crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
108 |                 Iou = IoU(crop_box, boxes)
109 | 
110 |                 if np.max(Iou) < 0.3:
111 |                     # Iou with all gts must below 0.3
112 |                     save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
113 |                     cropped_im = img[ny1 : ny1 + size, nx1 : nx1 + size, :]
114 |                     resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
115 |                     f2.write(save_file + ' 0\n') # neg samples with label 0
116 |                     cv2.imwrite(save_file, resized_im)
117 |                     n_idx += 1
118 | 
119 |             # generate positive examples and part faces
120 |             for i in range(20):
121 |                 size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
122 | 
123 |                 # delta here is the offset of box center
124 |                 delta_x = npr.randint(-w * 0.2, w * 0.2)
125 |                 delta_y = npr.randint(-h * 0.2, h * 0.2)
126 | 
127 |                 nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
128 |                 ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
129 |                 nx2 = nx1 + size
130 |                 ny2 = ny1 + size
131 | 
132 |                 if nx2 > width or ny2 > height:
133 |                     continue
134 |                 crop_box = np.array([nx1, ny1, nx2, ny2])
135 | 
136 |                 offset_x1 = (x1 - nx1) / float(size)
137 |                 offset_y1 = (y1 - ny1) / float(size)
138 |                 offset_x2 = (x2 - nx2) / float(size)
139 |                 offset_y2 = (y2 - ny2) / float(size)
140 | 
141 |                 cropped_im = img[ny1 : ny2, nx1 : nx2, :]
142 |                 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
143 | 
144 |                 box_ = box.reshape(1, -1)
145 |                 if IoU(crop_box, box_) >= 0.65:
146 |                     save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)  # positive samples with label 1 and offset
147 |                     f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
148 |                     cv2.imwrite(save_file, resized_im)
149 |                     p_idx += 1
150 |                 elif IoU(crop_box, box_) >= 0.4:
151 |                     save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
152 |                     f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
153 |                     cv2.imwrite(save_file, resized_im)
154 |                     d_idx += 1
155 |             box_idx += 1
156 |             print("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx))
157 | 
158 |     f1.close()
159 |     f2.close()
160 |     f3.close()
161 |         
162 |         
163 |         
164 | def parse_args():
165 |     parser = argparse.ArgumentParser(description='Test mtcnn',
166 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
167 | 
168 |     parser.add_argument('--face_traindata_store', dest='traindata_store', help='face train data temporary folder',
169 |                         default=config.TRAIN_DATA_DIR, type=str)
170 |     parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
171 |                         default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
172 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
173 |                         default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str)
174 | 
175 | 
176 | 
177 | 
178 |     args = parser.parse_args()
179 |     return args
180 | 
181 | if __name__ == '__main__':
182 |     args = parse_args()
183 |     gen_pnet_data(args.traindata_store,args.annotation_file,args.prefix_path)
184 |     # gen_pnet_data('training_data', 'anno_store/wider_origin_anno.txt',  '/home/wujiyang/data/Widerface/WIDER_train/images')
185 |         
186 | 


--------------------------------------------------------------------------------
/train_net/models.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 15 16:41:52 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | def weights_init(m):
 14 |     if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
 15 |         nn.init.xavier_uniform_(m.weight.data)
 16 |         nn.init.constant_(m.bias, 0.1)
 17 |         
 18 | 
 19 | def compute_accuracy(prob_cls, gt_cls):
 20 |     '''return a tensor which contains predicted accuracy'''
 21 |     prob_cls = torch.squeeze(prob_cls)
 22 |     gt_cls = torch.squeeze(gt_cls)
 23 |     
 24 |     # only positive and negative examples has the classification loss which labels 1 and 0
 25 |     mask = torch.ge(gt_cls, 0)
 26 |     valid_gt_cls = torch.masked_select(gt_cls,mask)
 27 |     valid_prob_cls = torch.masked_select(prob_cls,mask)
 28 |     # computer predicted accuracy
 29 |     size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0])
 30 |     prob_ones = torch.ge(valid_prob_cls,0.6).float()
 31 |     right_ones = torch.eq(prob_ones,valid_gt_cls).float()
 32 | 
 33 |     return torch.div(torch.mul(torch.sum(right_ones),float(1.0)),float(size))
 34 | 
 35 |         
 36 | class LossFn:
 37 |     def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1):
 38 |         # loss function
 39 |         self.cls_factor = cls_factor
 40 |         self.box_factor = box_factor
 41 |         self.land_factor = landmark_factor
 42 |         self.loss_cls = nn.BCELoss()
 43 |         self.loss_box = nn.MSELoss()
 44 |         self.loss_landmark = nn.MSELoss()
 45 |         
 46 |     def cls_loss(self, gt_label, pred_label):
 47 |         pred_label = torch.squeeze(pred_label)
 48 |         gt_label = torch.squeeze(gt_label)
 49 |         # only use negative samples and positive samples for classification which labels 0 and 1
 50 |         mask = torch.ge(gt_label, 0)
 51 |         valid_gt_label = torch.masked_select(gt_label, mask)
 52 |         valid_pred_label = torch.masked_select(pred_label, mask)
 53 |         return self.loss_cls(valid_pred_label, valid_gt_label) * self.cls_factor
 54 |     
 55 |     def box_loss(self, gt_label, gt_offset, pred_offset):
 56 |         pred_offset = torch.squeeze(pred_offset)
 57 |         gt_offset = torch.squeeze(gt_offset)
 58 |         gt_label = torch.squeeze(gt_label)
 59 |         # only use positive samples and partface samples for bounding box regression which labels 1 and -1
 60 |         unmask = torch.eq(gt_label,0)
 61 |         mask = torch.eq(unmask,0)
 62 |         #convert mask to dim index
 63 |         chose_index = torch.nonzero(mask.data)
 64 |         chose_index = torch.squeeze(chose_index)
 65 |         #only valid element can effect the loss
 66 |         valid_gt_offset = gt_offset[chose_index, :]
 67 |         valid_pred_offset = pred_offset[chose_index, :]
 68 |         return self.loss_box(valid_pred_offset, valid_gt_offset) * self.box_factor
 69 |     
 70 |     def landmark_loss(self, gt_label, gt_landmark, pred_landmark):
 71 |         pred_landmark = torch.squeeze(pred_landmark)
 72 |         gt_landmark = torch.squeeze(gt_landmark)
 73 |         gt_label = torch.squeeze(gt_label)
 74 |         # only CelebA data been used in landmark regression
 75 |         mask = torch.eq(gt_label, -2)
 76 |         
 77 |         chose_index = torch.nonzero(mask.data)
 78 |         chose_index = torch.squeeze(chose_index)
 79 | 
 80 |         valid_gt_landmark = gt_landmark[chose_index, :]
 81 |         valid_pred_landmark = pred_landmark[chose_index, :]
 82 |         return self.loss_landmark(valid_pred_landmark,valid_gt_landmark) * self.land_factor
 83 |     
 84 |     
 85 | 
 86 | class PNet(nn.Module):
 87 |     '''PNet'''
 88 |     def __init__(self, is_train=False, use_cuda=True):
 89 |         super(PNet, self).__init__()
 90 |         self.is_train = is_train
 91 |         self.use_cuda = use_cuda
 92 |         
 93 |         # backend
 94 |         self.pre_layer = nn.Sequential(
 95 |             nn.Conv2d(3, 10, kernel_size=3, stride=1),
 96 |             nn.PReLU(), 
 97 |             nn.MaxPool2d(kernel_size=2, stride=2),
 98 |             nn.Conv2d(10, 16, kernel_size=3, stride=1), 
 99 |             nn.PReLU(), 
100 |             nn.Conv2d(16, 32, kernel_size=3, stride=1),
101 |             nn.PReLU()
102 |         )
103 |         
104 |         # face classification 
105 |         self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1)
106 |         # bounding box regresion
107 |         self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1)
108 |         # landmark localization
109 |         self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1)
110 | 
111 |         # weight initiation with xavier
112 |         self.apply(weights_init)
113 |         
114 |     def forward(self, x):
115 |         x = self.pre_layer(x)
116 |         label = F.sigmoid(self.conv4_1(x))
117 |         offset = self.conv4_2(x)
118 |         # landmark = self.conv4_3(x)
119 | 
120 |         if self.is_train is True:
121 |             return label,offset
122 |         
123 |         return label, offset
124 |         
125 |     
126 | class RNet(nn.Module):
127 |     ''' RNet '''
128 | 
129 |     def __init__(self,is_train=False, use_cuda=True):
130 |         super(RNet, self).__init__()
131 |         self.is_train = is_train
132 |         self.use_cuda = use_cuda
133 |         # backend
134 |         self.pre_layer = nn.Sequential(
135 |             nn.Conv2d(3, 28, kernel_size=3, stride=1),
136 |             nn.PReLU(),
137 |             nn.MaxPool2d(kernel_size=3, stride=2), 
138 |             nn.Conv2d(28, 48, kernel_size=3, stride=1), 
139 |             nn.PReLU(), 
140 |             nn.MaxPool2d(kernel_size=3, stride=2), 
141 |             nn.Conv2d(48, 64, kernel_size=2, stride=1), 
142 |             nn.PReLU()  
143 | 
144 |         )
145 |         # this is little different from MTCNN paper, cause in pytroch, pooliing is calculated by floor()
146 |         self.conv4 = nn.Linear(64*2*2, 128)  
147 |         self.prelu4 = nn.PReLU() 
148 |         # face calssification
149 |         self.conv5_1 = nn.Linear(128, 1)
150 |         # bounding box regression
151 |         self.conv5_2 = nn.Linear(128, 4)
152 |         # lanbmark localization
153 |         self.conv5_3 = nn.Linear(128, 10)
154 |         # weight initiation weih xavier
155 |         self.apply(weights_init)
156 | 
157 |     def forward(self, x):
158 |         # backend
159 |         x = self.pre_layer(x)
160 |         #x = x.view(-1, x.size(0))
161 |         x = x.view(-1, 64 * 2 * 2)
162 |         x = self.conv4(x)
163 |         x = self.prelu4(x)
164 |         # detection
165 |         det = torch.sigmoid(self.conv5_1(x))
166 |         box = self.conv5_2(x)
167 | 
168 |         if self.is_train is True:
169 |             return det, box
170 |         
171 |         return det, box
172 |     
173 | 
174 | class ONet(nn.Module):
175 |     ''' ONet '''
176 |     def __init__(self, is_train=False, use_cuda=True):
177 |         super(ONet, self).__init__()
178 |         self.is_train = is_train
179 |         self.use_cuda = use_cuda
180 |         # backend
181 |         self.pre_layer = nn.Sequential(
182 |             nn.Conv2d(3, 32, kernel_size=3, stride=1),
183 |             nn.PReLU(), 
184 |             nn.MaxPool2d(kernel_size=3, stride=2), 
185 |             nn.Conv2d(32, 64, kernel_size=3, stride=1),
186 |             nn.PReLU(), 
187 |             nn.MaxPool2d(kernel_size=3, stride=2),
188 |             nn.Conv2d(64, 64, kernel_size=3, stride=1), 
189 |             nn.PReLU(),
190 |             nn.MaxPool2d(kernel_size=2,stride=2), 
191 |             nn.Conv2d(64,128,kernel_size=2,stride=1), 
192 |             nn.PReLU() 
193 |         )
194 |         self.conv5 = nn.Linear(128*2*2, 256) 
195 |         self.prelu5 = nn.PReLU() 
196 |         # face classification
197 |         self.conv6_1 = nn.Linear(256, 1)
198 |         # bounding box regression
199 |         self.conv6_2 = nn.Linear(256, 4)
200 |         # lanbmark localization
201 |         self.conv6_3 = nn.Linear(256, 10)
202 |         # weight initiation weih xavier
203 |         self.apply(weights_init)
204 | 
205 |     def forward(self, x):
206 |         # backend
207 |         x = self.pre_layer(x)
208 |         x = x.view(-1, 128*2*2)
209 |         x = self.conv5(x)
210 |         x = self.prelu5(x)
211 |         # detection
212 |         det = torch.sigmoid(self.conv6_1(x))
213 |         box = self.conv6_2(x)
214 |         landmark = self.conv6_3(x)
215 |         if self.is_train is True:
216 |             return det, box, landmark
217 |         
218 |         return det, box, landmark
219 |     
220 |         
221 |         


--------------------------------------------------------------------------------
/prepare_data/gen_Rnet_train_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon May 21 22:17:05 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | import sys
  9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 10 | 
 11 | import cv2
 12 | import argparse
 13 | import numpy as np
 14 | from tools.detect import MtcnnDetector, create_mtcnn_net
 15 | from tools.imagedb import ImageDB
 16 | from tools.image_reader import TestImageLoader
 17 | import time
 18 | import os
 19 | import cPickle
 20 | 
 21 | from tools.utils import convert_to_square, IoU
 22 | import config
 23 | import tools.vision as vision
 24 | 
 25 | 
 26 | def gen_rnet_data(data_dir, anno_file, pnet_model_file, prefix_path='', use_cuda=True, vis=False):
 27 |     # load the pnet and pnet_detector
 28 |     
 29 |     pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda)
 30 |     mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12)
 31 |     
 32 |     imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path)
 33 |     imdb = imagedb.load_imdb()
 34 |     image_reader = TestImageLoader(imdb, 1, False)
 35 |     
 36 |     all_boxes = list()
 37 |     batch_idx = 0
 38 |     
 39 |     for databatch in image_reader:
 40 |         if batch_idx % 100 == 0:
 41 |             print ("%d images done" % batch_idx)
 42 |         im = databatch
 43 |         t = time.time()
 44 |         boxes, boxes_align = mtcnn_detector.detect_pnet(im=im)
 45 |         if boxes_align is None:
 46 |             all_boxes.append(np.array([]))
 47 |             batch_idx += 1
 48 |             continue
 49 |         if vis:
 50 |             rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
 51 |             vision.vis_two(rgb_im, boxes, boxes_align)
 52 |             
 53 |         t1 = time.time() - t
 54 |         print 'time cost for image ', batch_idx, '/', image_reader.size, ': ', t1
 55 |         all_boxes.append(boxes_align)
 56 |         batch_idx += 1
 57 |         
 58 |     save_path = config.MODLE_STORE_DIR
 59 |     if not os.path.exists(save_path):
 60 |         os.mkdir(save_path)
 61 |     
 62 |     save_file = os.path.join(save_path, "pnet_detections_%d.pkl" % int(time.time()))
 63 |     
 64 |     with open(save_file, 'wb') as f:
 65 |         cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
 66 |     
 67 |     
 68 |     #save_file = '/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/pnet_detections_1527162101.pkl'
 69 |     get_rnet_sample_data(data_dir, anno_file, save_file, prefix_path)
 70 |     
 71 | def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path):
 72 |     
 73 |     neg_save_dir = os.path.join(data_dir, "24/negative")
 74 |     pos_save_dir = os.path.join(data_dir, "24/positive")
 75 |     part_save_dir = os.path.join(data_dir, "24/part")
 76 | 
 77 |     for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
 78 |         if not os.path.exists(dir_path):
 79 |             os.makedirs(dir_path)
 80 |     
 81 |     # load ground truth from annotation file
 82 |     # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image
 83 |     with open(anno_file, 'r') as f:
 84 |         annotations = f.readlines()
 85 |         
 86 |     image_size = 24
 87 |     im_idx_list = list()
 88 |     gt_boxes_list = list()
 89 |     num_of_images = len(annotations)
 90 |     print ("processing %d images in total" % num_of_images)
 91 |     for annotation in annotations:
 92 |     #for i in range(10):
 93 |         annotation = annotation.strip().split(' ')
 94 |         #annotation = annotations[i].strip().split(' ')
 95 |         im_idx = os.path.join(prefix_path, annotation[0])
 96 |         boxes = map(float, annotation[1:])
 97 |         boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
 98 |         im_idx_list.append(im_idx)
 99 |         gt_boxes_list.append(boxes)
100 |     
101 |     save_path = config.ANNO_STORE_DIR
102 |     if not os.path.exists(save_path):
103 |         os.makedirs(save_path)
104 |         
105 |     f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
106 |     f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
107 |     f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')
108 |     
109 |     det_handle = open(det_boxes_file, 'r')
110 |     det_boxes = cPickle.load(det_handle)
111 |     print(len(det_boxes), num_of_images)
112 |     assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
113 |     
114 |     # index of neg, pos and part face, used as their image names
115 |     n_idx = 0
116 |     p_idx = 0
117 |     d_idx = 0
118 |     image_done = 0
119 |     for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
120 |         image_done += 1
121 |         if image_done % 100 == 0:
122 |             print("%d images done" % image_done)
123 |         if dets.shape[0] == 0:
124 |             continue
125 |         img = cv2.imread(im_idx)
126 |         dets = convert_to_square(dets)
127 |         dets[:, 0:4] = np.round(dets[:, 0:4])
128 |         
129 |         # each image have at most 50 neg_samples
130 |         cur_n_idx = 0
131 |         for box in dets:
132 |             x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
133 |             width = x_right - x_left
134 |             height = y_bottom - y_top
135 |             # ignore box that is too small or beyond image border
136 |             if width < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[1] or y_bottom >= img.shape[0]:
137 |                 continue
138 |             # compute intersection over union(IoU) between current box and all gt boxes
139 |             Iou = IoU(box, gts)
140 |             cropped_im = img[y_top:y_bottom, x_left:x_right, :]
141 |             resized_im = cv2.resize(cropped_im, (image_size, image_size),
142 |                                     interpolation=cv2.INTER_LINEAR)
143 |             # save negative images and write label
144 |             
145 |             if np.max(Iou) < 0.3:
146 |                 # Iou with all gts must below 0.3
147 |                 cur_n_idx += 1
148 |                 if cur_n_idx <= 50:
149 |                     save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
150 |                     f2.write(save_file + ' 0\n')
151 |                     cv2.imwrite(save_file, resized_im)
152 |                     n_idx += 1
153 |             else:
154 |                 # find gt_box with the highest iou
155 |                 idx = np.argmax(Iou)
156 |                 assigned_gt = gts[idx]
157 |                 x1, y1, x2, y2 = assigned_gt
158 | 
159 |                 # compute bbox reg label
160 |                 offset_x1 = (x1 - x_left) / float(width)
161 |                 offset_y1 = (y1 - y_top) / float(height)
162 |                 offset_x2 = (x2 - x_right) / float(width)
163 |                 offset_y2 = (y2 - y_bottom) / float(height)
164 | 
165 |                 # save positive and part-face images and write labels
166 |                 if np.max(Iou) >= 0.65:
167 |                     save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
168 |                     f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
169 |                     offset_x1, offset_y1, offset_x2, offset_y2))
170 |                     cv2.imwrite(save_file, resized_im)
171 |                     p_idx += 1
172 | 
173 |                 elif np.max(Iou) >= 0.4:
174 |                     save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
175 |                     f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
176 |                     offset_x1, offset_y1, offset_x2, offset_y2))
177 |                     cv2.imwrite(save_file, resized_im)
178 |                     d_idx += 1
179 |                     
180 |     f1.close()
181 |     f2.close()
182 |     f3.close()
183 |     
184 | 
185 | def model_store_path():
186 |     return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store"
187 | 
188 | 
189 | 
190 | def parse_args():
191 |     parser = argparse.ArgumentParser(description='Test mtcnn',
192 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
193 | 
194 |     parser.add_argument('--face_traindata_store', dest='traindata_store', help='dface train data temporary folder',
195 |                         default=config.TRAIN_DATA_DIR, type=str)
196 |     parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
197 |                         default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
198 |     parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path',
199 |                         default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/pnet_model_final.pt', type=str)
200 |     parser.add_argument('--gpu', dest='use_cuda', help='with gpu',
201 |                         default=config.USE_CUDA, type=bool)
202 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
203 |                         default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str)
204 | 
205 | 
206 |     args = parser.parse_args()
207 |     return args
208 | 
209 | 
210 | 
211 | if __name__ == '__main__':
212 |     args = parse_args()
213 |     gen_rnet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.prefix_path, args.use_cuda)
214 |         
215 |     
216 |     
217 |     
218 |         


--------------------------------------------------------------------------------
/prepare_data/gen_Onet_train_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri May 25 20:12:13 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 11 | 
 12 | import cv2
 13 | import argparse
 14 | import numpy as np
 15 | from tools.detect import MtcnnDetector, create_mtcnn_net
 16 | from tools.imagedb import ImageDB
 17 | from tools.image_reader import TestImageLoader
 18 | import time
 19 | import os
 20 | import cPickle
 21 | 
 22 | from tools.utils import convert_to_square, IoU
 23 | import config
 24 | import tools.vision as vision
 25 | 
 26 | def gen_onet_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False):
 27 |     
 28 |     pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda)
 29 |     mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12)
 30 | 
 31 |     imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path)
 32 |     imdb = imagedb.load_imdb()
 33 |     image_reader = TestImageLoader(imdb,1,False)
 34 | 
 35 |     all_boxes = list()
 36 |     batch_idx = 0
 37 |     
 38 |     for databatch in image_reader:
 39 |         if batch_idx % 100 == 0:
 40 |             print("%d images done" % batch_idx)
 41 |         im = databatch
 42 |         t = time.time()
 43 |         #detect an image by pnet and rnet
 44 |         p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im)
 45 |         boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align)
 46 |         if boxes_align is None:
 47 |             all_boxes.append(np.array([]))
 48 |             batch_idx += 1
 49 |             continue
 50 |         if vis:
 51 |             rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
 52 |             vision.vis_two(rgb_im, boxes, boxes_align)
 53 | 
 54 |         t1 = time.time() - t
 55 |         print 'time cost for image ', batch_idx, '/', image_reader.size, ': ', t1
 56 |         all_boxes.append(boxes_align)
 57 |         batch_idx += 1
 58 |         
 59 |     save_path = config.MODLE_STORE_DIR
 60 |     if not os.path.exists(save_path):
 61 |         os.mkdir(save_path)
 62 |     
 63 |     save_file = os.path.join(save_path, "rnet_detections_%d.pkl" % int(time.time()))
 64 |     
 65 |     with open(save_file, 'wb') as f:
 66 |         cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
 67 |     
 68 |     
 69 |     #save_file = '/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/rnet_detections_1527304558.pkl'
 70 |     get_onet_sample_data(data_dir, anno_file, save_file, prefix_path)
 71 |     
 72 |     
 73 | def get_onet_sample_data(data_dir, anno_file, det_boxs_file, prefix):
 74 |     
 75 |     neg_save_dir = os.path.join(data_dir, "48/negative")
 76 |     pos_save_dir = os.path.join(data_dir, "48/positive")
 77 |     part_save_dir = os.path.join(data_dir, "48/part")
 78 | 
 79 |     for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
 80 |         if not os.path.exists(dir_path):
 81 |             os.makedirs(dir_path)
 82 | 
 83 |     # load ground truth from annotation file
 84 |     # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
 85 | 
 86 |     with open(anno_file, 'r') as f:
 87 |         annotations = f.readlines()
 88 | 
 89 |     image_size = 48
 90 |     im_idx_list = list()
 91 |     gt_boxes_list = list()
 92 |     num_of_images = len(annotations)
 93 |     print("processing %d images in total" % num_of_images)
 94 |     
 95 |     for annotation in annotations:
 96 |         annotation = annotation.strip().split(' ')
 97 |         im_idx = os.path.join(prefix,annotation[0])
 98 | 
 99 |         boxes = map(float, annotation[1:])
100 |         boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
101 |         im_idx_list.append(im_idx)
102 |         gt_boxes_list.append(boxes)
103 | 
104 | 
105 |     save_path = config.ANNO_STORE_DIR
106 |     if not os.path.exists(save_path):
107 |         os.makedirs(save_path)
108 | 
109 |     f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
110 |     f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
111 |     f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')
112 | 
113 |     det_handle = open(det_boxs_file, 'r')
114 | 
115 |     det_boxes = cPickle.load(det_handle)
116 |     print(len(det_boxes), num_of_images)
117 |     assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
118 |     
119 |     # index of neg, pos and part face, used as their image names
120 |     n_idx = 0
121 |     p_idx = 0
122 |     d_idx = 0
123 |     image_done = 0
124 |     for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
125 |         image_done += 1
126 |         if image_done % 100 == 0:
127 |             print("%d images done" % image_done)
128 |         if dets.shape[0] == 0:
129 |             continue
130 |         img = cv2.imread(im_idx)
131 |         dets = convert_to_square(dets)
132 |         dets[:, 0:4] = np.round(dets[:, 0:4])
133 | 
134 |         # each image have at most 50 neg_samples
135 |         cur_n_idx = 0
136 |         for box in dets:
137 |             x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
138 |             width = x_right - x_left
139 |             height = y_bottom - y_top
140 |             # ignore box that is too small or beyond image border
141 |             if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
142 |                 continue
143 |             # compute intersection over union(IoU) between current box and all gt boxes
144 |             Iou = IoU(box, gts)
145 |             cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
146 |             resized_im = cv2.resize(cropped_im, (image_size, image_size),
147 |                                     interpolation=cv2.INTER_LINEAR)
148 | 
149 |             # save negative images and write label
150 |             if np.max(Iou) < 0.3:
151 |                 # Iou with all gts must below 0.3
152 |                 cur_n_idx += 1
153 |                 if cur_n_idx <= 50:
154 |                     save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
155 |                     f2.write(save_file + ' 0\n')
156 |                     cv2.imwrite(save_file, resized_im)
157 |                     n_idx += 1
158 |             else:
159 |                 # find gt_box with the highest iou
160 |                 idx = np.argmax(Iou)
161 |                 assigned_gt = gts[idx]
162 |                 x1, y1, x2, y2 = assigned_gt
163 | 
164 |                 # compute bbox reg label
165 |                 offset_x1 = (x1 - x_left) / float(width)
166 |                 offset_y1 = (y1 - y_top) / float(height)
167 |                 offset_x2 = (x2 - x_right) / float(width)
168 |                 offset_y2 = (y2 - y_bottom) / float(height)
169 | 
170 |                 # save positive and part-face images and write labels
171 |                 if np.max(Iou) >= 0.65:
172 |                     save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
173 |                     f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
174 |                     offset_x1, offset_y1, offset_x2, offset_y2))
175 |                     cv2.imwrite(save_file, resized_im)
176 |                     p_idx += 1
177 | 
178 |                 elif np.max(Iou) >= 0.4:
179 |                     save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
180 |                     f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
181 |                     offset_x1, offset_y1, offset_x2, offset_y2))
182 |                     cv2.imwrite(save_file, resized_im)
183 |                     d_idx += 1
184 |     f1.close()
185 |     f2.close()
186 |     f3.close()
187 |     
188 |     
189 |     
190 | def model_store_path():
191 |     return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store"
192 | 
193 | def parse_args():
194 |     parser = argparse.ArgumentParser(description='Test mtcnn',
195 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
196 | 
197 |     parser.add_argument('--face_traindata_store', dest='traindata_store', help='face train data temporary folder',
198 |                         default=config.TRAIN_DATA_DIR, type=str)
199 |     parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
200 |                         default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
201 |     parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path',
202 |                         default=os.path.join(config.MODLE_STORE_DIR,"pnet_model_final.pt"), type=str)
203 |     parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path',
204 |                         default=os.path.join(config.MODLE_STORE_DIR,"rnet_model_final.pt"), type=str)
205 |     parser.add_argument('--gpu', dest='use_cuda', help='with gpu',
206 |                         default=config.USE_CUDA, type=bool)
207 |     parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
208 |                         default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str)
209 | 
210 |     args = parser.parse_args()
211 |     return args
212 | 
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     args = parse_args()
217 |     gen_onet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda)
218 | 
219 | 


--------------------------------------------------------------------------------
/tools/detect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat May 19 20:55:10 2018
  5 | 
  6 | @author: wujiyang
  7 | """
  8 | 
  9 | import sys
 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN")
 11 | 
 12 | import cv2
 13 | import time
 14 | import numpy as np
 15 | import torch
 16 | from train_net.models import PNet, RNet, ONet
 17 | import tools.utils as utils
 18 | import tools.image_tools as image_tools
 19 | 
 20 | 
 21 | def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):
 22 |     
 23 |     pnet, rnet, onet = None, None, None
 24 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 25 |     
 26 |     if p_model_path is not None:
 27 |         pnet = PNet(use_cuda=use_cuda)
 28 |         pnet.load_state_dict(torch.load(p_model_path))
 29 |         if(use_cuda):
 30 |             pnet.to(device)
 31 |         
 32 |         pnet.eval()
 33 |     
 34 |     if r_model_path is not None:
 35 |         rnet = RNet(use_cuda=use_cuda)
 36 |         rnet.load_state_dict(torch.load(r_model_path))
 37 |         if(use_cuda):
 38 |             rnet.to(device)
 39 |         
 40 |         rnet.eval()
 41 |         
 42 |     if o_model_path is not None:
 43 |         onet = ONet(use_cuda=use_cuda)
 44 |         onet.load_state_dict(torch.load(o_model_path))
 45 |         if(use_cuda):
 46 |             onet.to(device)
 47 |         
 48 |         onet.eval()
 49 |         
 50 |     return pnet, rnet, onet
 51 | 
 52 | 
 53 | 
 54 | class MtcnnDetector(object):
 55 |     ''' P, R, O net for face detection and landmark alignment'''
 56 |     def __init__(self, 
 57 |                  pnet=None, 
 58 |                  rnet=None, 
 59 |                  onet=None, 
 60 |                  min_face_size=12, 
 61 |                  stride=2, 
 62 |                  threshold=[0.6, 0.7, 0.7],
 63 |                  scale_factor=0.709):
 64 |         self.pnet_detector = pnet
 65 |         self.rnet_detector = rnet
 66 |         self.onet_detector = onet
 67 |         self.min_face_size = min_face_size
 68 |         self.stride=stride
 69 |         self.thresh = threshold
 70 |         self.scale_factor = scale_factor
 71 |     
 72 |     def unique_image_format(self, im):
 73 |          if not isinstance(im,np.ndarray):
 74 |             if im.mode == 'I':
 75 |                 im = np.array(im, np.int32, copy=False)
 76 |             elif im.mode == 'I;16':
 77 |                 im = np.array(im, np.int16, copy=False)
 78 |             else:
 79 |                 im = np.asarray(im)
 80 |                 
 81 |          return im
 82 |     
 83 |     def square_bbox(self, bbox):
 84 |         '''
 85 |         convert bbox to square
 86 |         Parameters:
 87 |             bbox: numpy array, shape n x m
 88 |         Returns:
 89 |             square bbox
 90 |         '''
 91 |         square_bbox = bbox.copy()
 92 | 
 93 |         h = bbox[:, 3] - bbox[:, 1] + 1
 94 |         w = bbox[:, 2] - bbox[:, 0] + 1
 95 |         l = np.maximum(h,w)
 96 |         square_bbox[:, 0] = bbox[:, 0] + w*0.5 - l*0.5
 97 |         square_bbox[:, 1] = bbox[:, 1] + h*0.5 - l*0.5
 98 | 
 99 |         square_bbox[:, 2] = square_bbox[:, 0] + l - 1
100 |         square_bbox[:, 3] = square_bbox[:, 1] + l - 1
101 |         return square_bbox
102 |     
103 |     
104 |     def generate_bounding_box(self, map, reg, scale, threshold):
105 |         """ TODO： 这个函数没看懂 """
106 |         '''
107 |         generate bbox from feature map
108 |         for PNet, there exists no fc layer, only convolution layer ,so feature map n x m x 1/4
109 |         Parameters:
110 |             map: numpy array , n x m x 1, detect score for each position
111 |             reg: numpy array , n x m x 4, bbox
112 |             scale: float number, scale of this detection
113 |             threshold: float number, detect threshold
114 |         Returns:
115 |             bbox array
116 |         '''
117 |         stride = 2
118 |         cellsize = 12
119 | 
120 |         t_index = np.where(map > threshold)
121 |         # find nothing
122 |         if t_index[0].size == 0:
123 |             return np.array([])
124 | 
125 |         dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)]
126 |         reg = np.array([dx1, dy1, dx2, dy2])
127 |         
128 |         score = map[t_index[0], t_index[1], 0]
129 |         boundingbox = np.vstack([np.round((stride * t_index[1]) / scale),
130 |                                  np.round((stride * t_index[0]) / scale),
131 |                                  np.round((stride * t_index[1] + cellsize) / scale),
132 |                                  np.round((stride * t_index[0] + cellsize) / scale),
133 |                                  score,
134 |                                  reg,
135 |                                  # landmarks
136 |                                  ])
137 | 
138 |         return boundingbox.T
139 |     
140 |     
141 |     def resize_image(self, img, scale):
142 |         """
143 |             resize image and transform dimention to [batchsize, channel, height, width]
144 |         Parameters:
145 |         ----------
146 |             img: numpy array , height x width x channel,input image, channels in BGR order here
147 |             scale: float number, scale factor of resize operation
148 |         Returns:
149 |         -------
150 |             transformed image tensor , 1 x channel x height x width
151 |         """
152 |         height, width, channels = img.shape
153 |         new_height = int(height * scale)     # resized new height
154 |         new_width = int(width * scale)       # resized new width
155 |         new_dim = (new_width, new_height)
156 |         img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR)      # resized image
157 |         
158 |         return img_resized
159 |     
160 |     def pad(self, bboxes, w, h):
161 |         """
162 |             pad the the boxes
163 |         Parameters:
164 |         ----------
165 |             bboxes: numpy array, n x 5, input bboxes
166 |             w: float number, width of the input image
167 |             h: float number, height of the input image
168 |         Returns :
169 |         ------
170 |             dy, dx : numpy array, n x 1, start point of the bbox in target image
171 |             edy, edx : numpy array, n x 1, end point of the bbox in target image
172 |             y, x : numpy array, n x 1, start point of the bbox in original image
173 |             ey, ex : numpy array, n x 1, end point of the bbox in original image
174 |             tmph, tmpw: numpy array, n x 1, height and width of the bbox
175 |         """
176 |         
177 |         tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32)
178 |         tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32)
179 |         numbox = bboxes.shape[0]
180 | 
181 |         dx = np.zeros((numbox, ))
182 |         dy = np.zeros((numbox, ))
183 |         edx, edy  = tmpw.copy()-1, tmph.copy()-1
184 | 
185 |         x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
186 | 
187 |         tmp_index = np.where(ex > w-1)
188 |         edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
189 |         ex[tmp_index] = w - 1
190 | 
191 |         tmp_index = np.where(ey > h-1)
192 |         edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
193 |         ey[tmp_index] = h - 1
194 | 
195 |         tmp_index = np.where(x < 0)
196 |         dx[tmp_index] = 0 - x[tmp_index]
197 |         x[tmp_index] = 0
198 | 
199 |         tmp_index = np.where(y < 0)
200 |         dy[tmp_index] = 0 - y[tmp_index]
201 |         y[tmp_index] = 0
202 | 
203 |         return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
204 |         return_list = [item.astype(np.int32) for item in return_list]
205 | 
206 |         return return_list
207 |         
208 |     
209 |     def detect_pnet(self, im):
210 |         """Get face candidates through pnet
211 | 
212 |         Parameters:
213 |         ----------
214 |         im: numpy array, input image array
215 | 
216 |         Returns:
217 |         -------
218 |         boxes: numpy array
219 |             detected boxes before calibration
220 |         boxes_align: numpy array
221 |             boxes after calibration
222 |         """
223 |         h, w, c = im.shape
224 |         net_size = 12
225 |         current_scale = float(net_size) / self.min_face_size    # find initial scale
226 |         im_resized = self.resize_image(im, current_scale)
227 |         current_height, current_width, _ = im_resized.shape
228 |         
229 |         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
230 |         
231 |         # fcn for pnet
232 |         all_boxes = list()
233 |         while min(current_height, current_width) > net_size:
234 |             feed_imgs = []
235 |             image_tensor = image_tools.convert_image_to_tensor(im_resized)
236 |             feed_imgs.append(image_tensor)
237 |             feed_imgs = torch.stack(feed_imgs)
238 |             
239 |             if self.pnet_detector.use_cuda:
240 |                 feed_imgs = feed_imgs.to(device)
241 |                 
242 |             cls_map, reg = self.pnet_detector(feed_imgs)
243 |             cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
244 |             reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
245 |             
246 |             boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0])
247 | 
248 |             current_scale *= self.scale_factor
249 |             im_resized = self.resize_image(im, current_scale)
250 |             current_height, current_width, _ = im_resized.shape
251 | 
252 |             if boxes.size == 0:
253 |                 continue
254 |             keep = utils.nms(boxes[:, :5], 0.5, 'Union')
255 |             boxes = boxes[keep]
256 |             all_boxes.append(boxes)
257 |             
258 |         if len(all_boxes) == 0:
259 |             return None, None
260 |         
261 |         all_boxes = np.vstack(all_boxes)
262 | 
263 |         # merge the detection from first stage
264 |         keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
265 |         all_boxes = all_boxes[keep]
266 | 
267 |         bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
268 |         bh = all_boxes[:, 3] - all_boxes[:, 1] + 1
269 |             
270 |         boxes = np.vstack([all_boxes[:,0],
271 |                    all_boxes[:,1],
272 |                    all_boxes[:,2],
273 |                    all_boxes[:,3],
274 |                    all_boxes[:,4]
275 |                   ])
276 | 
277 |         boxes = boxes.T
278 | 
279 |         align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
280 |         align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
281 |         align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
282 |         align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh
283 | 
284 |         # refine the boxes
285 |         boxes_align = np.vstack([align_topx,
286 |                               align_topy,
287 |                               align_bottomx,
288 |                               align_bottomy,
289 |                               all_boxes[:, 4]
290 |                               ])
291 |         boxes_align = boxes_align.T
292 | 
293 |         return boxes, boxes_align
294 |     
295 |     
296 |     def detect_rnet(self, im, dets):
297 |         """Get face candidates using rnet
298 | 
299 |         Parameters:
300 |         ----------
301 |         im: numpy array
302 |             input image array
303 |         dets: numpy array
304 |             detection results of pnet
305 | 
306 |         Returns:
307 |         -------
308 |         boxes: numpy array
309 |             detected boxes before calibration
310 |         boxes_align: numpy array
311 |             boxes after calibration
312 |         """
313 |         h, w, c = im.shape
314 |         if dets is None:
315 |             return None, None
316 | 
317 |         dets = self.square_bbox(dets)
318 |         dets[:, 0:4] = np.round(dets[:, 0:4])
319 | 
320 |         [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
321 |         num_boxes = dets.shape[0]
322 |         
323 |         cropped_ims_tensors = []
324 |         for i in range(num_boxes):
325 |             try:
326 |                 if tmph[i] > 0 and tmpw[i] > 0:
327 |                     tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
328 |                     tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
329 |                     crop_im = cv2.resize(tmp, (24, 24))
330 |                     crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
331 |                     # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
332 |                     cropped_ims_tensors.append(crop_im_tensor)
333 |             except ValueError, e:
334 |                 print e.message
335 |             
336 |         feed_imgs = torch.stack(cropped_ims_tensors)
337 |         
338 |         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
339 |         if self.rnet_detector.use_cuda:
340 |             feed_imgs = feed_imgs.to(device)
341 |             
342 |         cls_map, reg = self.rnet_detector(feed_imgs)
343 |         cls_map = cls_map.cpu().data.numpy()
344 |         reg = reg.cpu().data.numpy()
345 |         
346 |         keep_inds = np.where(cls_map > self.thresh[1])[0]
347 | 
348 |         if len(keep_inds) > 0:
349 |             boxes = dets[keep_inds]
350 |             cls = cls_map[keep_inds]
351 |             reg = reg[keep_inds]
352 |         else:
353 |             return None, None
354 | 
355 |         keep = utils.nms(boxes, 0.7)
356 |         if len(keep) == 0:
357 |             return None, None
358 | 
359 |         keep_cls = cls[keep]
360 |         keep_boxes = boxes[keep]
361 |         keep_reg = reg[keep]
362 |         bw = keep_boxes[:, 2] - keep_boxes[:, 0]
363 |         bh = keep_boxes[:, 3] - keep_boxes[:, 1]
364 |         boxes = np.vstack([ keep_boxes[:,0],
365 |                               keep_boxes[:,1],
366 |                               keep_boxes[:,2],
367 |                               keep_boxes[:,3],
368 |                               keep_cls[:,0]
369 |                             ])
370 |         align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw
371 |         align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh
372 |         align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw
373 |         align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh
374 |         
375 |         boxes_align = np.vstack([align_topx,
376 |                                align_topy,
377 |                                align_bottomx,
378 |                                align_bottomy,
379 |                                keep_cls[:, 0]
380 |                              ])
381 |         boxes = boxes.T
382 |         boxes_align = boxes_align.T
383 | 
384 |         return boxes, boxes_align
385 |     
386 |     
387 |     def detect_onet(self, im, dets):
388 |         """Get face candidates using onet
389 | 
390 |         Parameters:
391 |         ----------
392 |         im: numpy array
393 |             input image array
394 |         dets: numpy array
395 |             detection results of rnet
396 | 
397 |         Returns:
398 |         -------
399 |         boxes_align: numpy array
400 |             boxes after calibration
401 |         landmarks_align: numpy array
402 |             landmarks after calibration
403 | 
404 |         """
405 |         h, w, c = im.shape
406 |         if dets is None:
407 |             return None, None
408 | 
409 |         dets = self.square_bbox(dets)
410 |         dets[:, 0:4] = np.round(dets[:, 0:4])
411 |         
412 |         [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
413 |         num_boxes = dets.shape[0]
414 |         
415 |         cropped_ims_tensors = []
416 |         for i in range(num_boxes):
417 |             try:
418 |                 if tmph[i] > 0 and tmpw[i] > 0:
419 |                     tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
420 |                     tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
421 |                     crop_im = cv2.resize(tmp, (48, 48))
422 |                     crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
423 |                     # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
424 |                     cropped_ims_tensors.append(crop_im_tensor)
425 |             except ValueError, e:
426 |                 print e.message
427 |                 
428 |         feed_imgs = torch.stack(cropped_ims_tensors)
429 | 
430 |         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
431 |         if self.rnet_detector.use_cuda:
432 |             feed_imgs = feed_imgs.to(device)
433 |             
434 |         cls_map, reg, landmark = self.onet_detector(feed_imgs)
435 | 
436 |         cls_map = cls_map.cpu().data.numpy()
437 |         reg = reg.cpu().data.numpy()
438 |         landmark = landmark.cpu().data.numpy()
439 | 
440 |         keep_inds = np.where(cls_map > self.thresh[2])[0]
441 |         
442 |         if len(keep_inds) > 0:
443 |             boxes = dets[keep_inds]
444 |             cls = cls_map[keep_inds]
445 |             reg = reg[keep_inds]
446 |             landmark = landmark[keep_inds]
447 |         else:
448 |             return None, None
449 |         
450 |         keep = utils.nms(boxes, 0.7, mode="Minimum")
451 | 
452 |         if len(keep) == 0:
453 |             return None, None
454 | 
455 |         keep_cls = cls[keep]
456 |         keep_boxes = boxes[keep]
457 |         keep_reg = reg[keep]
458 |         keep_landmark = landmark[keep]
459 | 
460 |         bw = keep_boxes[:, 2] - keep_boxes[:, 0]
461 |         bh = keep_boxes[:, 3] - keep_boxes[:, 1]
462 | 
463 | 
464 |         align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
465 |         align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
466 |         align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
467 |         align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh
468 | 
469 |         align_landmark_topx = keep_boxes[:, 0]
470 |         align_landmark_topy = keep_boxes[:, 1]
471 |         
472 |         
473 |         
474 |         boxes_align = np.vstack([align_topx,
475 |                                  align_topy,
476 |                                  align_bottomx,
477 |                                  align_bottomy,
478 |                                  keep_cls[:, 0]
479 |                                  ])
480 | 
481 |         boxes_align = boxes_align.T
482 | 
483 |         landmark =  np.vstack([
484 |                                  align_landmark_topx + keep_landmark[:, 0] * bw,
485 |                                  align_landmark_topy + keep_landmark[:, 1] * bh,
486 |                                  align_landmark_topx + keep_landmark[:, 2] * bw,
487 |                                  align_landmark_topy + keep_landmark[:, 3] * bh,
488 |                                  align_landmark_topx + keep_landmark[:, 4] * bw,
489 |                                  align_landmark_topy + keep_landmark[:, 5] * bh,
490 |                                  align_landmark_topx + keep_landmark[:, 6] * bw,
491 |                                  align_landmark_topy + keep_landmark[:, 7] * bh,
492 |                                  align_landmark_topx + keep_landmark[:, 8] * bw,
493 |                                  align_landmark_topy + keep_landmark[:, 9] * bh,
494 |                                  ])
495 | 
496 |         landmark_align = landmark.T
497 | 
498 |         return boxes_align, landmark_align
499 |     
500 |     
501 |     
502 |     def detect_face(self, img):
503 |         """Detect face over image
504 |         """
505 |         boxes_align = np.array([])
506 |         landmark_align = np.array([])
507 | 
508 |         t = time.time()
509 | 
510 |         # pnet
511 |         if self.pnet_detector:
512 |             boxes, boxes_align = self.detect_pnet(img)
513 |             if boxes_align is None:
514 |                 return np.array([]), np.array([])
515 | 
516 |             t1 = time.time() - t
517 |             t = time.time()
518 | 
519 |         # rnet
520 |         if self.rnet_detector:
521 |             boxes, boxes_align = self.detect_rnet(img, boxes_align)
522 |             if boxes_align is None:
523 |                 return np.array([]), np.array([])
524 | 
525 |             t2 = time.time() - t
526 |             t = time.time()
527 | 
528 |         # onet
529 |         if self.onet_detector:
530 |             boxes_align, landmark_align = self.detect_onet(img, boxes_align)
531 |             if boxes_align is None:
532 |                 return np.array([]), np.array([])
533 | 
534 |             t3 = time.time() - t
535 |             t = time.time()
536 |             print("time cost " + '{:.3f}'.format(t1+t2+t3) + '  pnet {:.3f}  rnet {:.3f}  onet {:.3f}'.format(t1, t2, t3))
537 | 
538 |         return boxes_align, landmark_align
539 |     
540 |     
541 | 


--------------------------------------------------------------------------------