├── __init__.py ├── tools ├── __init__.py ├── image_tools.py ├── utils.py ├── vision.py ├── imagedb.py ├── image_reader.py └── detect.py ├── train_net ├── __init__.py ├── train_p_net.py ├── train_r_net.py ├── train_o_net.py └── models.py ├── prepare_data ├── __init__.py ├── assemble_onet_imglist.py ├── assemble_rnet_imglist.py ├── assemble_pnet_imglist.py ├── assemble.py ├── gen_landmark_12.py ├── gen_landmark_48.py ├── gen_landmark_24.py ├── gen_Pnet_train_data.py ├── gen_Rnet_train_data.py └── gen_Onet_train_data.py ├── test.jpg ├── result.png ├── test2.jpg ├── training_data └── readme.md ├── model_store ├── onet_model_final.pt ├── pnet_model_final.pt └── rnet_model_final.pt ├── anno_store └── readme.md ├── test_image.py ├── config.py ├── .gitignore └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train_net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepare_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/test.jpg -------------------------------------------------------------------------------- /result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/result.png -------------------------------------------------------------------------------- /test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/test2.jpg -------------------------------------------------------------------------------- /training_data/readme.md: -------------------------------------------------------------------------------- 1 | this folder contains training samples for each training stage 2 | -------------------------------------------------------------------------------- /model_store/onet_model_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/onet_model_final.pt -------------------------------------------------------------------------------- /model_store/pnet_model_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/pnet_model_final.pt -------------------------------------------------------------------------------- /model_store/rnet_model_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wujiyang/MTCNN_TRAIN/HEAD/model_store/rnet_model_final.pt -------------------------------------------------------------------------------- /anno_store/readme.md: -------------------------------------------------------------------------------- 1 | this folder saves image list for all three stage. 2 | 3 | I didn't push them to github because of the file size. 4 | 5 | 6 | for training pnet and rnet, I use wider face dataset for sampling 7 | for training onet, I use a third part data for landmark sampling: [Training set](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm) 8 | -------------------------------------------------------------------------------- /test_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 26 13:57:59 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import cv2 13 | from tools.detect import create_mtcnn_net, MtcnnDetector 14 | import tools.vision as vision 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_model_final.pt", 20 | r_model_path="./model_store/rnet_model_final.pt", 21 | o_model_path="./model_store/onet_model_final.pt", 22 | use_cuda=False) 23 | 24 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) 25 | 26 | img = cv2.imread("./test2.jpg") 27 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 28 | 29 | bboxs, landmarks = mtcnn_detector.detect_face(img) 30 | 31 | #print bboxs.shape[0] 32 | #print landmarks.shape[0] 33 | 34 | vision.vis_face(img, bboxs, landmarks) -------------------------------------------------------------------------------- /prepare_data/assemble_onet_imglist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 26 12:21:58 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | 10 | import sys 11 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 12 | 13 | import os 14 | import config 15 | import prepare_data.assemble as assemble 16 | 17 | 18 | if __name__ == '__main__': 19 | 20 | anno_list = [] 21 | 22 | net_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_LANDMARK_ANNO_FILENAME) 23 | net_postive_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_POSTIVE_ANNO_FILENAME) 24 | net_part_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_PART_ANNO_FILENAME) 25 | net_neg_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_NEGATIVE_ANNO_FILENAME) 26 | 27 | anno_list.append(net_postive_file) 28 | anno_list.append(net_part_file) 29 | anno_list.append(net_neg_file) 30 | anno_list.append(net_landmark_file) 31 | 32 | imglist_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_TRAIN_IMGLIST_FILENAME) 33 | 34 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 35 | print("PNet train annotation result file path:%s" % imglist_file) 36 | -------------------------------------------------------------------------------- /prepare_data/assemble_rnet_imglist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu May 24 19:22:32 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import sys 9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 10 | 11 | import os 12 | import config 13 | import prepare_data.assemble as assemble 14 | 15 | 16 | if __name__ == '__main__': 17 | 18 | anno_list = [] 19 | 20 | # rnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_LANDMARK_ANNO_FILENAME) 21 | rnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_POSTIVE_ANNO_FILENAME) 22 | rnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_PART_ANNO_FILENAME) 23 | rnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_NEGATIVE_ANNO_FILENAME) 24 | 25 | anno_list.append(rnet_postive_file) 26 | anno_list.append(rnet_part_file) 27 | anno_list.append(rnet_neg_file) 28 | # anno_list.append(rnet_landmark_file) 29 | 30 | imglist_file = os.path.join(config.ANNO_STORE_DIR, config.RNET_TRAIN_IMGLIST_FILENAME) 31 | 32 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 33 | print("PNet train annotation result file path:%s, total num of imgs: %d" % (imglist_file, chose_count)) 34 | -------------------------------------------------------------------------------- /prepare_data/assemble_pnet_imglist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 15:52:24 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import os 13 | 14 | import config 15 | import prepare_data.assemble as assemble 16 | 17 | 18 | if __name__ == '__main__': 19 | 20 | anno_list = [] 21 | 22 | # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_LANDMARK_ANNO_FILENAME) 23 | pnet_postive_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_POSTIVE_ANNO_FILENAME) 24 | pnet_part_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_PART_ANNO_FILENAME) 25 | pnet_neg_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_NEGATIVE_ANNO_FILENAME) 26 | 27 | anno_list.append(pnet_postive_file) 28 | anno_list.append(pnet_part_file) 29 | anno_list.append(pnet_neg_file) 30 | # anno_list.append(pnet_landmark_file) 31 | 32 | imglist_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_TRAIN_IMGLIST_FILENAME) 33 | 34 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 35 | print("PNet train annotation result file path:%s, total num of imgs: %d" % (imglist_file, chose_count)) 36 | 37 | 38 | -------------------------------------------------------------------------------- /tools/image_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 19:02:24 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import torch 10 | import torchvision.transforms as transforms 11 | import numpy as np 12 | 13 | 14 | transform = transforms.ToTensor() 15 | 16 | 17 | def convert_image_to_tensor(image): 18 | """convert an image to pytorch tensor 19 | 20 | Parameters: 21 | ---------- 22 | image: numpy array , h * w * c 23 | 24 | Returns: 25 | ------- 26 | image_tensor: pytorch.FloatTensor, c * h * w 27 | """ 28 | image = image.astype(np.float32) 29 | return transform(image) 30 | 31 | 32 | def convert_chwTensor_to_hwcNumpy(tensor): 33 | """convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c) 34 | Parameters: 35 | ---------- 36 | tensor: numpy array , count * c * h * w 37 | 38 | Returns: 39 | ------- 40 | numpy array images: count * h * w * c 41 | """ 42 | 43 | if isinstance(tensor, torch.FloatTensor): 44 | return np.transpose(tensor.detach().numpy(), (0,2,3,1)) 45 | else: 46 | raise Exception("covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension of float data type.") 47 | -------------------------------------------------------------------------------- /prepare_data/assemble.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 15:52:53 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import os 10 | import numpy.random as npr 11 | import numpy as np 12 | 13 | def assemble_data(output_file, anno_file_list = []): 14 | # assemble the annotations to one file 15 | size = 12 16 | 17 | if len(anno_file_list) == 0: 18 | return 0 19 | 20 | if os.path.exists(output_file): 21 | os.remove(output_file) 22 | 23 | chose_count = 0 24 | for anno_file in anno_file_list: 25 | with open(anno_file, 'r') as f: 26 | anno_lines = f.readlines() 27 | 28 | base_num = 250000 29 | # choose the examples in random style 30 | if len(anno_lines) > base_num * 3: 31 | idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True) 32 | elif len(anno_lines) > 100000: 33 | idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True) 34 | else: 35 | idx_keep = np.arange(len(anno_lines)) 36 | np.random.shuffle(idx_keep) 37 | 38 | with open(output_file, 'a+') as f: 39 | for idx in idx_keep: 40 | f.write(anno_lines[idx]) 41 | chose_count += 1 42 | 43 | return chose_count 44 | 45 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 09:37:39 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import os 10 | 11 | 12 | MODLE_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/model_store" 13 | 14 | ANNO_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/anno_store" 15 | 16 | TRAIN_DATA_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/training_data" 17 | 18 | LOG_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + "/MTCNN_TRAIN/log" 19 | 20 | USE_CUDA = True 21 | 22 | TRAIN_BATCH_SIZE = 1024 23 | 24 | TRAIN_LR = 0.01 25 | 26 | END_EPOCH = 50 27 | 28 | PNET_POSTIVE_ANNO_FILENAME = "pos_12.txt" 29 | PNET_NEGATIVE_ANNO_FILENAME = "neg_12.txt" 30 | PNET_PART_ANNO_FILENAME = "part_12.txt" 31 | PNET_LANDMARK_ANNO_FILENAME = "landmark_12.txt" 32 | 33 | 34 | RNET_POSTIVE_ANNO_FILENAME = "pos_24.txt" 35 | RNET_NEGATIVE_ANNO_FILENAME = "neg_24.txt" 36 | RNET_PART_ANNO_FILENAME = "part_24.txt" 37 | RNET_LANDMARK_ANNO_FILENAME = "landmark_24.txt" 38 | 39 | 40 | ONET_POSTIVE_ANNO_FILENAME = "pos_48.txt" 41 | ONET_NEGATIVE_ANNO_FILENAME = "neg_48.txt" 42 | ONET_PART_ANNO_FILENAME = "part_48.txt" 43 | ONET_LANDMARK_ANNO_FILENAME = "landmark_48.txt" 44 | 45 | PNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_12.txt" 46 | RNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_24.txt" 47 | ONET_TRAIN_IMGLIST_FILENAME = "imglist_anno_48.txt" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | 107 | # add by wujiyang 108 | training_data/* 109 | !training_data/readme.md 110 | model_store/* 111 | !model_store/pnet_model_final.pt 112 | !model_store/rnet_model_final.pt 113 | !model_store/onet_model_final.pt 114 | anno_store/* 115 | !anno_store/landmark_imagelist.txt 116 | !anno_store/wider_origin_anno.txt 117 | !anno_store/readme.md 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MTCNN_TRAIN 2 | MTCNN_Train Scripts with PyTorch 0.4.0 3 | 4 | ## Declaration 5 | **The source code in this repository is mainly from [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace).** 6 | **I reimplemented the part of MTCNN with PyTorch 0.4.0 and made some optimizations but most remains unchanged. If you want to know more details, please go to [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace)** 7 | 8 | --- 9 | ## Introduction 10 | 11 | ~~This project is still in progess, I will finish it in my spare time as soon as possible !~~ 12 | 13 | This project is a reimplementation version of mtcnn face detection, most of the source code is from [kuaikuaikim/DFace](https://github.com/kuaikuaikim/DFace), I restructed the source code with Pytorch 0.4.0 and made some modifications and optimizations. All the contributions I have made is listed below. 14 | 15 | ## The Contributions 16 | 1. restruct the source code with PyTorch 0.4.0. 17 | 2. avoid some unnecessary image data copy operation in training data preparation, for example, ./prepare_data/gen_Pnet_data.py and so on. 18 | 3. remove some meaningless operation in traing process, and format the output information during training. 19 | 4. fix the bug that data_loader can't load the last mini_batch when the last minibatch'size is less than the batch_size in ./tools/image_reader.py. 20 | 5. to be continue. 21 | 22 | ## How to use 23 | For training PNet and RNet, I only use the [Widerface](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) for face classification and face bounding box regression. For training ONet, I use [Widerface](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) for face classification and face bounding box regression and use [Training Dataset](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm) for face landmark regression. 24 | 25 | 1. Train PNet 26 | ``` python 27 | cd MTCNN_TRAIN 28 | python prepare_data/gen_Pnet_train_data.py 29 | python prepare_data/assemble_pnet_imglist.py 30 | python train_net/train_p_net.py 31 | ``` 32 | 2. Train RNet 33 | ``` python 34 | cd MTCNN_TRAIN 35 | python prepare_data/gen_Rnet_train_data.py 36 | python prepare_data/assemble_rnet_imglist.py 37 | python train_net/train_r_net.py 38 | ``` 39 | 3. Train ONet 40 | ``` python 41 | cd MTCNN_TRAIN 42 | python prepare_data/gen_landmark_48.py 43 | python prepare_data/gen_Onet_train_data.py 44 | python prepare_data/assemble_onet_imglist.py 45 | python train_net/train_o_net.py 46 | ``` 47 | 4. Test Image 48 | ``` python 49 | cd MTCNN_TRAIN 50 | python test_image.py 51 | ``` 52 | 53 | ## Results 54 | Because I didn't use much data to train, the detection results is not at the best. 55 | ![avatar](result.png) 56 | 57 | ## Problems 58 | There still remains a problem to solve: When starting to train each stage network, the first batch will last for a long time about 30 minutes and I don't know why. 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 09:37:59 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import numpy as np 10 | 11 | def IoU(box, boxes): 12 | """Compute IoU between detect box and gt boxes 13 | 14 | Parameters: 15 | ---------- 16 | box: numpy array , shape (5, ): x1, y1, x2, y2, score 17 | input box 18 | boxes: numpy array, shape (n, 4): x1, y1, x2, y2 19 | input ground truth boxes 20 | 21 | Returns: 22 | ------- 23 | ovr: numpy.array, shape (n, ) 24 | IoU 25 | """ 26 | box_area = (box[2] - box[0]) * (box[3] - box[1]) 27 | area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 28 | 29 | xx1 = np.maximum(box[0], boxes[:, 0]) 30 | yy1 = np.maximum(box[1], boxes[:, 1]) 31 | xx2 = np.minimum(box[2], boxes[:, 2]) 32 | yy2 = np.minimum(box[3], boxes[:, 3]) 33 | 34 | # compute the width and height of the inter box 35 | w = np.maximum(0, xx2 - xx1) 36 | h = np.maximum(0, yy2 - yy1) 37 | 38 | inter = w * h 39 | ovr = np.true_divide(inter, (box_area + area - inter)) 40 | 41 | 42 | return ovr 43 | 44 | 45 | def convert_to_square(bbox): 46 | ''' Convert bbox to a square which it can include the bbox 47 | Parameters: 48 | bbox: numpy array, shape n x 5 49 | 50 | returns: 51 | square box 52 | ''' 53 | 54 | square_bbox = bbox.copy() 55 | h = bbox[:, 3] - bbox[:, 1] 56 | w = bbox[:, 2] - bbox[:, 0] 57 | max_side = np.maximum(h, w) 58 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 59 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 60 | square_bbox[:, 2] = square_bbox[:, 0] + max_side 61 | square_bbox[:, 3] = square_bbox[:, 1] + max_side 62 | 63 | return square_bbox 64 | 65 | 66 | def nms(dets, thresh, mode='Union'): 67 | ''' greedily select bboxes with high confidence,if an box overlap with the highest score box > thres, rule it out 68 | 69 | params: 70 | dets: [[x1, y1, x2, y2, score]] 71 | thresh: retain overlap <= thresh 72 | return: 73 | indexes to keep 74 | ''' 75 | x1 = dets[:, 0] 76 | y1 = dets[:, 1] 77 | x2 = dets[:, 2] 78 | y2 = dets[:, 3] 79 | scores = dets[:, 4] 80 | 81 | areas = (x2 - x1) * (y2 - y1) 82 | order = scores.argsort()[::-1] # the index of scores by desc 83 | 84 | keep = [] 85 | while order.size > 0: 86 | i = order[0] 87 | keep.append(i) 88 | xx1 = np.maximum(x1[i], x1[order[1:]]) 89 | yy1 = np.maximum(y1[i], y1[order[1:]]) 90 | xx2 = np.minimum(x2[i], x2[order[1:]]) 91 | yy2 = np.minimum(y2[i], y2[order[1:]]) 92 | 93 | w = np.maximum(0.0, xx2 - xx1) 94 | h = np.maximum(0.0, yy2 - yy1) 95 | inter = w * h 96 | inter = w * h 97 | if mode == "Union": 98 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 99 | elif mode == "Minimum": 100 | ovr = inter / np.minimum(areas[i], areas[order[1:]]) 101 | 102 | inds = np.where(ovr <= thresh)[0] 103 | order = order[inds + 1] 104 | 105 | return keep 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /tools/vision.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 19 15:14:18 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import matplotlib.pyplot as plt 9 | import pylab 10 | 11 | def vis_two(im_array, dest1, dest2, thresh=0.9): 12 | """Visualize detection results before and after calibration 13 | 14 | Parameters: 15 | ---------- 16 | im_array: numpy.ndarray, shape(1, c, h, w) 17 | test image in rgb 18 | dets1: numpy.ndarray([[x1 y1 x2 y2 score]]) 19 | detection results before calibration 20 | dets2: numpy.ndarray([[x1 y1 x2 y2 score]]) 21 | detection results after calibration 22 | thresh: float 23 | boxes with scores > thresh will be drawn in red 24 | 25 | Returns: 26 | ------- 27 | """ 28 | 29 | figure = plt.figure() 30 | plt.subplot(121) 31 | plt.imshow(im_array) 32 | figure.suptitle('Face Detector', fontsize=12, color='r') 33 | for i in range(dest1.shape[0]): 34 | bbox = dest1[i, 0:4] 35 | score = dest1[i, 4] 36 | landmarks = dest1[i, 5:] 37 | if score > thresh: 38 | rect = plt.Rectangle((bbox[0], bbox[1]), 39 | bbox[2] - bbox[0], 40 | bbox[3] - bbox[1], fill=False, 41 | edgecolor='red', linewidth=0.7) 42 | plt.gca().add_patch(rect) # get current Axes and do some modification on it 43 | landmarks = landmarks.reshape((5, 2)) 44 | for j in range(5): 45 | plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow', linewidth=1, marker='x', s = 20) 46 | 47 | plt.subplot(122) 48 | plt.imshow(im_array) 49 | for i in range(dest2.shape[0]): 50 | bbox = dest2[i, 0:4] 51 | score = dest2[i, 4] 52 | landmarks = dest2[i, 5:] 53 | if score > thresh: 54 | rect = plt.Rectangle((bbox[0], bbox[1]), 55 | bbox[2] - bbox[0], 56 | bbox[3] - bbox[1], fill=False, 57 | edgecolor='red', linewidth=0.7) 58 | plt.gca().add_patch(rect) 59 | 60 | landmarks = landmarks.reshape((5, 2)) 61 | for j in range(5): 62 | plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow', linewidths=1, marker='x', s=20) 63 | 64 | plt.show() 65 | 66 | 67 | 68 | def vis_face(im_array, dets, landmarks=None): 69 | """Visualize detection results of an image 70 | 71 | Parameters: 72 | ---------- 73 | im_array: numpy.ndarray, shape(1, c, h, w) 74 | test image in rgb 75 | dets: numpy.ndarray([[x1 y1 x2 y2 score landmarks]]) 76 | detection results before calibration 77 | landmarks: numpy.ndarray([landmarks for five facial landmarks]) 78 | 79 | Returns: 80 | ------- 81 | """ 82 | figure = plt.figure() 83 | plt.imshow(im_array) 84 | figure.suptitle('Face Detector', fontsize=12, color='r') 85 | 86 | for i in range(dets.shape[0]): 87 | bbox = dets[i, 0:4] 88 | rect = plt.Rectangle((bbox[0], bbox[1]), 89 | bbox[2] - bbox[0], 90 | bbox[3] - bbox[1], fill=False, 91 | edgecolor='yellow', linewidth=0.9) 92 | plt.gca().add_patch(rect) 93 | 94 | if landmarks is not None: 95 | for i in range(landmarks.shape[0]): 96 | landmarks_one = landmarks[i, :] 97 | landmarks_one = landmarks_one.reshape((5, 2)) 98 | for j in range(5): 99 | plt.scatter(landmarks_one[j, 0], landmarks_one[j, 1], c='red',linewidths=1, marker='x', s=5) 100 | 101 | plt.show() 102 | -------------------------------------------------------------------------------- /tools/imagedb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 19:09:40 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import os 10 | import numpy as np 11 | 12 | class ImageDB(object): 13 | def __init__(self, image_annotation_file, prefix_path='', mode='train'): 14 | self.prefix_path = prefix_path 15 | self.image_annotation_file = image_annotation_file 16 | self.classes = ['__background__', 'face'] 17 | self.num_classes = 2 18 | self.image_set_index = self.load_image_set_index() 19 | self.num_images = len(self.image_set_index) 20 | self.mode = mode 21 | 22 | 23 | def load_image_set_index(self): 24 | ''' Get image index 25 | 26 | Returns: 27 | image_set_index: str, relative path of image 28 | ''' 29 | assert os.path.exists(self.image_annotation_file), 'Path does not exist: {}'.format(self.image_annotation_file) 30 | with open(self.image_annotation_file, 'r') as f: 31 | image_set_index = [x.strip().split(' ')[0] for x in f.readlines()] 32 | return image_set_index 33 | 34 | 35 | def load_imdb(self): 36 | ''' Get and save ground truth image database 37 | 38 | Returns: 39 | gt_imdb: dict, image database with annotations 40 | ''' 41 | 42 | gt_imdb = self.load_annotations() 43 | 44 | return gt_imdb 45 | 46 | def real_image_path(self, index): 47 | ''' Given image's relative index, return full path of image ''' 48 | 49 | index = index.replace("\\", "/") 50 | 51 | if not os.path.exists(index): 52 | image_file = os.path.join(self.prefix_path, index) 53 | else: 54 | image_file=index 55 | if not image_file.endswith('.jpg'): 56 | image_file = image_file + '.jpg' 57 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file) 58 | 59 | return image_file 60 | 61 | def load_annotations(self, annotation_type=1): 62 | ''' Load annotations 63 | 64 | what's the meaning of annotation_type ? I don't know ! 65 | Returns: 66 | imdb: dict, image database with annotations 67 | ''' 68 | 69 | assert os.path.exists(self.image_annotation_file), 'annotations not found at {}'.format(self.image_annotation_file) 70 | with open(self.image_annotation_file, 'r') as f: 71 | annotations = f.readlines() 72 | 73 | 74 | imdb = [] 75 | for i in range(self.num_images): 76 | annotation = annotations[i].strip().split(' ') 77 | index = annotation[0] 78 | im_path = self.real_image_path(index) 79 | imdb_ = dict() 80 | imdb_['image'] = im_path 81 | 82 | if self.mode == 'test': 83 | pass 84 | else: 85 | label = annotation[1] 86 | imdb_['label'] = int(label) 87 | imdb_['flipped'] = False 88 | imdb_['bbox_target'] = np.zeros((4,)) 89 | imdb_['landmark_target'] = np.zeros((10,)) 90 | if len(annotation[2:])==4: 91 | bbox_target = annotation[2:6] 92 | imdb_['bbox_target'] = np.array(bbox_target).astype(float) 93 | if len(annotation[2:])==14: 94 | bbox_target = annotation[2:6] 95 | imdb_['bbox_target'] = np.array(bbox_target).astype(float) 96 | landmark = annotation[6:] 97 | imdb_['landmark_target'] = np.array(landmark).astype(float) 98 | imdb.append(imdb_) 99 | return imdb 100 | 101 | 102 | def append_flipped_images(self, imdb): 103 | ''' append flipped images to imdb 104 | 105 | Returns: 106 | imdb: dict, image database with flipped image annotations 107 | ''' 108 | print 'append flipped images to imdb ', len(imdb) 109 | for i in range(len(imdb)): 110 | imdb_ = imdb[i] 111 | m_bbox = imdb_['bbox_target'].copy() 112 | m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0] 113 | 114 | landmark_ = imdb_['landmark_target'].copy() 115 | landmark_ = landmark_.reshape((5, 2)) 116 | landmark_ = np.asarray([(1 - x, y) for (x, y) in landmark_]) 117 | landmark_[[0, 1]] = landmark_[[1, 0]] 118 | landmark_[[3, 4]] = landmark_[[4, 3]] 119 | 120 | item = {'image': imdb_['image'], 121 | 'label': imdb_['label'], 122 | 'bbox_target': m_bbox, 123 | 'landmark_target': landmark_.reshape((10)), 124 | 'flipped': True} 125 | 126 | imdb.append(item) 127 | self.image_set_index *= 2 128 | print 'after flipped images appended to imdb ', len(imdb) 129 | 130 | return imdb 131 | 132 | -------------------------------------------------------------------------------- /tools/image_reader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 22:02:40 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import numpy as np 10 | import cv2 11 | 12 | class TrainImageReader: 13 | def __init__(self, imdb, im_size, batch_size=256, shuffle=False): 14 | 15 | self.imdb = imdb 16 | self.batch_size = batch_size 17 | self.im_size = im_size 18 | self.shuffle = shuffle 19 | 20 | self.cur = 0 21 | self.size = len(imdb) 22 | self.index = np.arange(self.size) 23 | self.num_classes = 2 24 | 25 | self.batch = None 26 | self.data = None 27 | self.label = None 28 | 29 | self.label_names = ['label', 'bbox_target', 'landmark_target'] 30 | self.reset() 31 | self.get_batch() 32 | 33 | def reset(self): 34 | self.cur = 0 35 | if self.shuffle: 36 | np.random.shuffle(self.index) 37 | 38 | def iter_next(self): 39 | #return self.cur + self.batch_size <= self.size # can't load the last epoch in the condition 40 | return self.cur < self.size 41 | 42 | def __iter__(self): 43 | return self 44 | 45 | def __next__(self): 46 | return self.next() 47 | 48 | def next(self): 49 | if self.iter_next(): 50 | self.get_batch() 51 | self.cur = min(self.cur + self.batch_size, self.size) 52 | return self.data, self.label 53 | else: 54 | raise StopIteration 55 | 56 | def getindex(self): 57 | return self.cur / self.batch_size 58 | 59 | def getpad(self): 60 | ''' pad for the last batch ''' 61 | if self.cur + self.batch_size > self.size: 62 | return self.cur + self.batch_size - self.size 63 | else: 64 | return 0 65 | 66 | def get_batch(self): 67 | cur_from = self.cur 68 | cur_to = min(cur_from + self.batch_size, self.size) 69 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)] 70 | data, label = get_minibatch(imdb) 71 | self.data = data['data'] 72 | self.label = [label[name] for name in self.label_names] 73 | 74 | 75 | 76 | def get_minibatch(imdb): 77 | num_images = len(imdb) 78 | processed_ims = list() 79 | cls_label = list() 80 | bbox_reg_target = list() 81 | landmark_reg_target = list() 82 | 83 | for i in range(num_images): 84 | im = cv2.imread(imdb[i]['image']) 85 | 86 | if imdb[i]['flipped']: 87 | im = im[:, ::-1, :] 88 | #im = im.transpose(Image.FLIP_LEFT_RIGHT) 89 | 90 | cls = imdb[i]['label'] 91 | bbox_target = imdb[i]['bbox_target'] 92 | landmark = imdb[i]['landmark_target'] 93 | 94 | processed_ims.append(im) 95 | cls_label.append(cls) 96 | bbox_reg_target.append(bbox_target) 97 | landmark_reg_target.append(landmark) 98 | 99 | im_array = np.asarray(processed_ims) 100 | label_array = np.array(cls_label) 101 | bbox_target_array = np.vstack(bbox_reg_target) 102 | landmark_target_array = np.vstack(landmark_reg_target) 103 | 104 | data = {'data': im_array} 105 | label = {'label': label_array, 106 | 'bbox_target': bbox_target_array, 107 | 'landmark_target': landmark_target_array 108 | } 109 | 110 | return data, label 111 | 112 | 113 | 114 | class TestImageLoader: 115 | def __init__(self, imdb, batch_size=1, shuffle=False): 116 | self.imdb = imdb 117 | self.batch_size = batch_size 118 | self.shuffle = shuffle 119 | self.size = len(imdb) 120 | self.index = np.arange(self.size) 121 | 122 | self.cur = 0 123 | self.data = None 124 | self.label = None 125 | 126 | self.reset() 127 | self.get_batch() 128 | 129 | def reset(self): 130 | self.cur = 0 131 | if self.shuffle: 132 | np.random.shuffle(self.index) 133 | 134 | def iter_next(self): 135 | return self.cur + self.batch_size <= self.size 136 | 137 | def __iter__(self): 138 | return self 139 | 140 | def __next__(self): 141 | return self.next() 142 | 143 | def next(self): 144 | if self.iter_next(): 145 | self.get_batch() 146 | self.cur += self.batch_size 147 | return self.data 148 | else: 149 | raise StopIteration 150 | 151 | def getindex(self): 152 | return self.cur / self.batch_size 153 | 154 | def getpad(self): 155 | if self.cur + self.batch_size > self.size: 156 | return self.cur + self.batch_size - self.size 157 | else: 158 | return 0 159 | 160 | def get_batch(self): 161 | cur_from = self.cur 162 | cur_to = min(cur_from + self.batch_size, self.size) 163 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)] 164 | data = get_testbatch(imdb) 165 | self.data = data['data'] 166 | 167 | 168 | def get_testbatch(imdb): 169 | assert len(imdb) == 1, "Single batch only" 170 | im = cv2.imread(imdb[0]['image']) 171 | data = {'data': im} 172 | return data 173 | -------------------------------------------------------------------------------- /train_net/train_p_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 18:48:03 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import os 13 | import argparse 14 | import datetime 15 | import torch 16 | import config 17 | from tools.image_reader import TrainImageReader 18 | from train_net.models import PNet, LossFn 19 | from train_net.models import compute_accuracy 20 | import tools.image_tools as image_tools 21 | from tools.imagedb import ImageDB 22 | 23 | 24 | def train_p_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True): 25 | 26 | # initialize the PNet ,loss function and set optimization for this network 27 | if not os.path.exists(model_store_path): 28 | os.makedirs(model_store_path) 29 | net = PNet(is_train=True, use_cuda=use_cuda) 30 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 31 | if use_cuda: 32 | net.to(device) 33 | lossfn = LossFn() 34 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 35 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1) 36 | # load training image 37 | imagedb = ImageDB(annotation_file) 38 | gt_imdb = imagedb.load_imdb() 39 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 40 | train_data = TrainImageReader(gt_imdb, 12, batch_size, shuffle=True) 41 | 42 | # train net 43 | net.train() 44 | for cur_epoch in range(end_epoch): 45 | scheduler.step() 46 | train_data.reset() # shuffle the data for this epoch 47 | for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): 48 | im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])] 49 | im_tensor = torch.stack(im_tensor) 50 | 51 | gt_label = torch.from_numpy(gt_label).float() 52 | gt_bbox = torch.from_numpy(gt_bbox).float() 53 | # gt_landmark = torch.from_numpy(gt_landmark).float() 54 | if use_cuda: 55 | im_tensor = im_tensor.to(device) 56 | gt_label = gt_label.to(device) 57 | gt_bbox = gt_bbox.to(device) 58 | 59 | cls_pred, box_offset_pred = net(im_tensor) 60 | cls_loss = lossfn.cls_loss(gt_label, cls_pred) 61 | box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) 62 | all_loss = cls_loss * 1.0 + box_offset_loss * 0.5 63 | 64 | if batch_idx % frequent == 0: 65 | accuracy = compute_accuracy(cls_pred, gt_label) 66 | print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, lr: %.6f" % 67 | (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 68 | all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), scheduler.get_lr()[0])) 69 | 70 | optimizer.zero_grad() 71 | all_loss.backward() 72 | optimizer.step() 73 | 74 | # TODO: add validation set for trained model 75 | 76 | if (cur_epoch + 1) % 10 == 0: 77 | torch.save(net.state_dict(), os.path.join(model_store_path,"pnet_model_epoch_%d.pt" % (cur_epoch + 1))) 78 | 79 | torch.save(net.state_dict(), os.path.join(model_store_path, 'pnet_nodel_final.pt')) 80 | 81 | 82 | 83 | 84 | def parse_args(): 85 | parser = argparse.ArgumentParser(description='Train PNet', 86 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 87 | 88 | 89 | parser.add_argument('--anno_file', dest='annotation_file', help='training data annotation file', 90 | default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), type=str) 91 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 92 | default=config.MODLE_STORE_DIR, type=str) 93 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 94 | default=config.END_EPOCH, type=int) 95 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 96 | default=200, type=int) 97 | parser.add_argument('--base_lr', dest='base_lr', help='learning rate', 98 | default=config.TRAIN_LR, type=float) 99 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 100 | default=config.TRAIN_BATCH_SIZE, type=int) 101 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 102 | default=config.USE_CUDA, type=bool) 103 | 104 | args = parser.parse_args() 105 | return args 106 | 107 | if __name__ == '__main__': 108 | args = parse_args() 109 | # print('train Pnet argument:') 110 | # print(args) 111 | 112 | train_p_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 113 | end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda) 114 | -------------------------------------------------------------------------------- /train_net/train_r_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu May 24 19:27:03 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import sys 9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 10 | 11 | import os 12 | import argparse 13 | import datetime 14 | import torch 15 | import config 16 | from tools.image_reader import TrainImageReader 17 | from train_net.models import RNet, LossFn 18 | from train_net.models import compute_accuracy 19 | import tools.image_tools as image_tools 20 | from tools.imagedb import ImageDB 21 | 22 | 23 | def train_r_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True): 24 | 25 | # initialize the RNet ,loss function and set optimization for this network 26 | if not os.path.exists(model_store_path): 27 | os.makedirs(model_store_path) 28 | 29 | net = RNet(is_train=True, use_cuda=use_cuda) 30 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 31 | if use_cuda: 32 | net.to(device) 33 | lossfn = LossFn() 34 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 35 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1) 36 | # load training image 37 | imagedb = ImageDB(annotation_file) 38 | gt_imdb = imagedb.load_imdb() 39 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 40 | train_data = TrainImageReader(gt_imdb, 24, batch_size, shuffle=True) 41 | 42 | # train net 43 | net.train() 44 | for cur_epoch in range(end_epoch): 45 | scheduler.step() 46 | train_data.reset() # shuffle the data for this epoch 47 | for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): 48 | im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])] 49 | im_tensor = torch.stack(im_tensor) 50 | 51 | gt_label = torch.from_numpy(gt_label).float() 52 | gt_bbox = torch.from_numpy(gt_bbox).float() 53 | # gt_landmark = torch.from_numpy(gt_landmark).float() 54 | if use_cuda: 55 | im_tensor = im_tensor.to(device) 56 | gt_label = gt_label.to(device) 57 | gt_bbox = gt_bbox.to(device) 58 | 59 | cls_pred, box_offset_pred = net(im_tensor) 60 | cls_loss = lossfn.cls_loss(gt_label, cls_pred) 61 | box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) 62 | all_loss = cls_loss * 1.0 + box_offset_loss * 0.5 63 | 64 | if batch_idx % frequent == 0: 65 | accuracy = compute_accuracy(cls_pred, gt_label) 66 | print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, lr: %.6f" % 67 | (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 68 | all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), scheduler.get_lr()[0])) 69 | 70 | optimizer.zero_grad() 71 | all_loss.backward() 72 | optimizer.step() 73 | 74 | # TODO: add validation set for trained model 75 | 76 | if (cur_epoch + 1) % 10 == 0: 77 | torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_model_epoch_%d.pt" % (cur_epoch + 1))) 78 | 79 | torch.save(net.state_dict(), os.path.join(model_store_path, 'rnet_model_final.pt')) 80 | 81 | 82 | 83 | 84 | def parse_args(): 85 | parser = argparse.ArgumentParser(description='Train RNet', 86 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 87 | 88 | 89 | parser.add_argument('--anno_file', dest='annotation_file',help='training data annotation file', 90 | default=os.path.join(config.ANNO_STORE_DIR,config.RNET_TRAIN_IMGLIST_FILENAME), type=str) 91 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 92 | default=config.MODLE_STORE_DIR, type=str) 93 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 94 | default=config.END_EPOCH, type=int) 95 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 96 | default=200, type=int) 97 | parser.add_argument('--base_lr', dest='base_lr', help='learning rate', 98 | default=config.TRAIN_LR, type=float) 99 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 100 | default=config.TRAIN_BATCH_SIZE, type=int) 101 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 102 | default=config.USE_CUDA, type=bool) 103 | 104 | args = parser.parse_args() 105 | return args 106 | 107 | if __name__ == '__main__': 108 | args = parse_args() 109 | # print('train Rnet argument:') 110 | # print(args) 111 | 112 | train_r_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 113 | end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda) 114 | -------------------------------------------------------------------------------- /train_net/train_o_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri May 25 21:29:56 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import os 13 | import argparse 14 | import datetime 15 | import torch 16 | import config 17 | from tools.image_reader import TrainImageReader 18 | from train_net.models import ONet, LossFn 19 | from train_net.models import compute_accuracy 20 | import tools.image_tools as image_tools 21 | from tools.imagedb import ImageDB 22 | 23 | def train_o_net(annotation_file, model_store_path, end_epoch=50, frequent=200, base_lr=0.01, batch_size=256, use_cuda=True): 24 | 25 | # initialize the ONet ,loss function and set optimization for this network 26 | if not os.path.exists(model_store_path): 27 | os.makedirs(model_store_path) 28 | 29 | net = ONet(is_train=True, use_cuda=use_cuda) 30 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 31 | if use_cuda: 32 | net.to(device) 33 | lossfn = LossFn() 34 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 35 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 25, 40], gamma=0.1) 36 | # load training image 37 | imagedb = ImageDB(annotation_file) 38 | gt_imdb = imagedb.load_imdb() 39 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 40 | train_data = TrainImageReader(gt_imdb, 48, batch_size, shuffle=True) 41 | 42 | # train net 43 | net.train() 44 | for cur_epoch in range(end_epoch): 45 | scheduler.step() 46 | train_data.reset() # shuffle the data for this epoch 47 | for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): 48 | im_tensor = [image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0])] 49 | im_tensor = torch.stack(im_tensor) 50 | 51 | gt_label = torch.from_numpy(gt_label).float() 52 | gt_bbox = torch.from_numpy(gt_bbox).float() 53 | gt_landmark = torch.from_numpy(gt_landmark).float() 54 | if use_cuda: 55 | im_tensor = im_tensor.to(device) 56 | gt_label = gt_label.to(device) 57 | gt_bbox = gt_bbox.to(device) 58 | gt_landmark = gt_landmark.to(device) 59 | 60 | cls_pred, box_offset_pred, landmark_offset_pred = net(im_tensor) 61 | cls_loss = lossfn.cls_loss(gt_label, cls_pred) 62 | box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) 63 | landmark_loss = lossfn.landmark_loss(gt_label, gt_landmark, landmark_offset_pred) 64 | all_loss = cls_loss * 0.8 + box_offset_loss * 0.6 + landmark_loss * 1.5 65 | 66 | if batch_idx % frequent == 0: 67 | accuracy = compute_accuracy(cls_pred, gt_label) 68 | print("[%s, Epoch: %d, Step: %d] accuracy: %.6f, all_loss: %.6f, cls_loss: %.6f, bbox_reg_loss: %.6f, landmark_loss: %.6f, lr: %.6f" % 69 | (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch + 1, batch_idx, accuracy.data.tolist(), 70 | all_loss.data.tolist(), cls_loss.data.tolist(), box_offset_loss.data.tolist(), landmark_loss.data.tolist(), scheduler.get_lr()[0])) 71 | 72 | optimizer.zero_grad() 73 | all_loss.backward() 74 | optimizer.step() 75 | 76 | # TODO: add validation set for trained model 77 | 78 | if (cur_epoch + 1) % 10 == 0: 79 | torch.save(net.state_dict(), os.path.join(model_store_path,"onet_model_epoch_%d.pt" % (cur_epoch + 1))) 80 | 81 | torch.save(net.state_dict(), os.path.join(model_store_path, 'onet_model_final.pt')) 82 | 83 | 84 | 85 | def parse_args(): 86 | parser = argparse.ArgumentParser(description='Train ONet', 87 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 88 | 89 | 90 | parser.add_argument('--anno_file', dest='annotation_file', help='training data annotation file', 91 | default=os.path.join(config.ANNO_STORE_DIR,config.ONET_TRAIN_IMGLIST_FILENAME), type=str) 92 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 93 | default=config.MODLE_STORE_DIR, type=str) 94 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 95 | default=config.END_EPOCH, type=int) 96 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 97 | default=200, type=int) 98 | parser.add_argument('--base_lr', dest='base_lr', help='learning rate', 99 | default=config.TRAIN_LR, type=float) 100 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 101 | default=config.TRAIN_BATCH_SIZE, type=int) 102 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 103 | default=config.USE_CUDA, type=bool) 104 | 105 | args = parser.parse_args() 106 | return args 107 | 108 | if __name__ == '__main__': 109 | args = parse_args() 110 | # print('train Onet argument:') 111 | # print(args) 112 | 113 | train_o_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 114 | end_epoch=args.end_epoch, frequent=args.frequent, base_lr=args.base_lr, batch_size=args.batch_size, use_cuda=args.use_cuda) -------------------------------------------------------------------------------- /prepare_data/gen_landmark_12.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 26 14:38:20 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN') 11 | 12 | import os 13 | import cv2 14 | import numpy as np 15 | import sys 16 | import numpy.random as npr 17 | import argparse 18 | import config 19 | import tools.utils as utils 20 | 21 | 22 | def gen_data(anno_file, data_dir, prefix): 23 | 24 | size = 12 25 | 26 | landmark_imgs_save_dir = os.path.join(data_dir,"12/landmark") 27 | if not os.path.exists(landmark_imgs_save_dir): 28 | os.makedirs(landmark_imgs_save_dir) 29 | 30 | anno_dir = config.ANNO_STORE_DIR 31 | if not os.path.exists(anno_dir): 32 | os.makedirs(anno_dir) 33 | 34 | landmark_anno_filename = config.PNET_LANDMARK_ANNO_FILENAME 35 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 36 | 37 | f = open(save_landmark_anno, 'w') 38 | 39 | with open(anno_file, 'r') as f2: 40 | annotations = f2.readlines() 41 | 42 | num = len(annotations) 43 | print("%d total images" % num) 44 | 45 | l_idx =0 46 | idx = 0 47 | # image_path bbox landmark(5*2) 48 | for annotation in annotations: 49 | # print imgPath 50 | 51 | annotation = annotation.strip().split(' ') 52 | assert len(annotation)==15,"each line should have 15 element" 53 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 54 | 55 | gt_box = map(float, annotation[1:5]) 56 | # the bounging box in original anno_file is [left, right, top, bottom] 57 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 58 | gt_box = np.array(gt_box, dtype=np.int32) 59 | 60 | landmark = map(float, annotation[5:]) 61 | landmark = np.array(landmark, dtype=np.float) 62 | 63 | img = cv2.imread(im_path) 64 | assert (img is not None) 65 | 66 | height, width, channel = img.shape 67 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 68 | # crop_face = cv2.resize(crop_face,(size,size)) 69 | 70 | idx = idx + 1 71 | if idx % 100 == 0: 72 | print("%d images done, landmark images: %d"%(idx,l_idx)) 73 | 74 | x1, y1, x2, y2 = gt_box 75 | 76 | # gt's width 77 | w = x2 - x1 78 | # gt's height 79 | h = y2 - y1 80 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 81 | continue 82 | # random shift 83 | for i in range(20): 84 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 85 | delta_x = npr.randint(-w * 0.2, w * 0.2) 86 | delta_y = npr.randint(-h * 0.2, h * 0.2) 87 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 88 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 89 | 90 | nx2 = nx1 + bbox_size 91 | ny2 = ny1 + bbox_size 92 | if nx2 > width or ny2 > height: 93 | continue 94 | crop_box = np.array([nx1, ny1, nx2, ny2]) 95 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 96 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 97 | 98 | offset_x1 = (x1 - nx1) / float(bbox_size) 99 | offset_y1 = (y1 - ny1) / float(bbox_size) 100 | offset_x2 = (x2 - nx2) / float(bbox_size) 101 | offset_y2 = (y2 - ny2) / float(bbox_size) 102 | 103 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 104 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 105 | 106 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 107 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 108 | 109 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 110 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 111 | 112 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 113 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 114 | 115 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 116 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 117 | 118 | 119 | # cal iou 120 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 121 | if iou > 0.65: 122 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 123 | cv2.imwrite(save_file, resized_im) 124 | 125 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 126 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 127 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 128 | 129 | l_idx += 1 130 | 131 | f.close() 132 | 133 | 134 | 135 | 136 | def parse_args(): 137 | parser = argparse.ArgumentParser(description='Test mtcnn', 138 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 139 | 140 | parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder', 141 | default=config.TRAIN_DATA_DIR, type=str) 142 | parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file', 143 | default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str) 144 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 145 | default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str) 146 | 147 | 148 | args = parser.parse_args() 149 | return args 150 | 151 | if __name__ == '__main__': 152 | args = parse_args() 153 | 154 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 155 | 156 | -------------------------------------------------------------------------------- /prepare_data/gen_landmark_48.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri May 25 09:05:31 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import sys 9 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN') 10 | 11 | import os 12 | import cv2 13 | import numpy as np 14 | import sys 15 | import numpy.random as npr 16 | import argparse 17 | import config 18 | import tools.utils as utils 19 | 20 | 21 | def gen_data(anno_file, data_dir, prefix): 22 | 23 | size = 48 24 | 25 | landmark_imgs_save_dir = os.path.join(data_dir,"48/landmark") 26 | if not os.path.exists(landmark_imgs_save_dir): 27 | os.makedirs(landmark_imgs_save_dir) 28 | 29 | anno_dir = config.ANNO_STORE_DIR 30 | if not os.path.exists(anno_dir): 31 | os.makedirs(anno_dir) 32 | 33 | landmark_anno_filename = config.ONET_LANDMARK_ANNO_FILENAME 34 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 35 | 36 | f = open(save_landmark_anno, 'w') 37 | 38 | with open(anno_file, 'r') as f2: 39 | annotations = f2.readlines() 40 | 41 | num = len(annotations) 42 | print("%d total images" % num) 43 | 44 | l_idx =0 45 | idx = 0 46 | # image_path bbox landmark(5*2) 47 | for annotation in annotations: 48 | # print imgPath 49 | 50 | annotation = annotation.strip().split(' ') 51 | assert len(annotation)==15,"each line should have 15 element" 52 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 53 | 54 | gt_box = map(float, annotation[1:5]) 55 | # the bounging box in original anno_file is [left, right, top, bottom] 56 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 57 | gt_box = np.array(gt_box, dtype=np.int32) 58 | 59 | landmark = map(float, annotation[5:]) 60 | landmark = np.array(landmark, dtype=np.float) 61 | 62 | img = cv2.imread(im_path) 63 | assert (img is not None) 64 | 65 | height, width, channel = img.shape 66 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 67 | # crop_face = cv2.resize(crop_face,(size,size)) 68 | 69 | idx = idx + 1 70 | if idx % 100 == 0: 71 | print("%d images done, landmark images: %d"%(idx,l_idx)) 72 | 73 | x1, y1, x2, y2 = gt_box 74 | 75 | # gt's width 76 | w = x2 - x1 77 | # gt's height 78 | h = y2 - y1 79 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 80 | continue 81 | # random shift 82 | for i in range(20): 83 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 84 | delta_x = npr.randint(-w * 0.2, w * 0.2) 85 | delta_y = npr.randint(-h * 0.2, h * 0.2) 86 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 87 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 88 | 89 | nx2 = nx1 + bbox_size 90 | ny2 = ny1 + bbox_size 91 | if nx2 > width or ny2 > height: 92 | continue 93 | crop_box = np.array([nx1, ny1, nx2, ny2]) 94 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 95 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 96 | 97 | offset_x1 = (x1 - nx1) / float(bbox_size) 98 | offset_y1 = (y1 - ny1) / float(bbox_size) 99 | offset_x2 = (x2 - nx2) / float(bbox_size) 100 | offset_y2 = (y2 - ny2) / float(bbox_size) 101 | 102 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 103 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 104 | 105 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 106 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 107 | 108 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 109 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 110 | 111 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 112 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 113 | 114 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 115 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 116 | 117 | 118 | # cal iou 119 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 120 | if iou > 0.65: 121 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 122 | cv2.imwrite(save_file, resized_im) 123 | 124 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 125 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 126 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 127 | 128 | l_idx += 1 129 | 130 | f.close() 131 | 132 | 133 | 134 | 135 | def parse_args(): 136 | parser = argparse.ArgumentParser(description='Test mtcnn', 137 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 138 | 139 | parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder', 140 | default=config.TRAIN_DATA_DIR, type=str) 141 | parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file', 142 | default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str) 143 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 144 | default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str) 145 | 146 | 147 | args = parser.parse_args() 148 | return args 149 | 150 | if __name__ == '__main__': 151 | args = parse_args() 152 | 153 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 154 | 155 | 156 | -------------------------------------------------------------------------------- /prepare_data/gen_landmark_24.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 26 14:38:08 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append('/home/wujiyang/FaceProjects/MTCNN_TRAIN') 11 | 12 | import os 13 | import cv2 14 | import numpy as np 15 | import sys 16 | import numpy.random as npr 17 | import argparse 18 | import config 19 | import tools.utils as utils 20 | 21 | 22 | def gen_data(anno_file, data_dir, prefix): 23 | 24 | size = 24 25 | 26 | landmark_imgs_save_dir = os.path.join(data_dir,"24/landmark") 27 | if not os.path.exists(landmark_imgs_save_dir): 28 | os.makedirs(landmark_imgs_save_dir) 29 | 30 | anno_dir = config.ANNO_STORE_DIR 31 | if not os.path.exists(anno_dir): 32 | os.makedirs(anno_dir) 33 | 34 | landmark_anno_filename = config.RNET_LANDMARK_ANNO_FILENAME 35 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 36 | 37 | f = open(save_landmark_anno, 'w') 38 | 39 | with open(anno_file, 'r') as f2: 40 | annotations = f2.readlines() 41 | 42 | num = len(annotations) 43 | print("%d total images" % num) 44 | 45 | l_idx =0 46 | idx = 0 47 | # image_path bbox landmark(5*2) 48 | for annotation in annotations: 49 | # print imgPath 50 | 51 | annotation = annotation.strip().split(' ') 52 | assert len(annotation)==15,"each line should have 15 element" 53 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 54 | 55 | gt_box = map(float, annotation[1:5]) 56 | # the bounging box in original anno_file is [left, right, top, bottom] 57 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] #[left, top, right, bottom] 58 | gt_box = np.array(gt_box, dtype=np.int32) 59 | 60 | landmark = map(float, annotation[5:]) 61 | landmark = np.array(landmark, dtype=np.float) 62 | 63 | img = cv2.imread(im_path) 64 | assert (img is not None) 65 | 66 | height, width, channel = img.shape 67 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 68 | # crop_face = cv2.resize(crop_face,(size,size)) 69 | 70 | idx = idx + 1 71 | if idx % 100 == 0: 72 | print("%d images done, landmark images: %d"%(idx,l_idx)) 73 | 74 | x1, y1, x2, y2 = gt_box 75 | 76 | # gt's width 77 | w = x2 - x1 78 | # gt's height 79 | h = y2 - y1 80 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 81 | continue 82 | # random shift 83 | for i in range(20): 84 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 85 | delta_x = npr.randint(-w * 0.2, w * 0.2) 86 | delta_y = npr.randint(-h * 0.2, h * 0.2) 87 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 88 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 89 | 90 | nx2 = nx1 + bbox_size 91 | ny2 = ny1 + bbox_size 92 | if nx2 > width or ny2 > height: 93 | continue 94 | crop_box = np.array([nx1, ny1, nx2, ny2]) 95 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 96 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 97 | 98 | offset_x1 = (x1 - nx1) / float(bbox_size) 99 | offset_y1 = (y1 - ny1) / float(bbox_size) 100 | offset_x2 = (x2 - nx2) / float(bbox_size) 101 | offset_y2 = (y2 - ny2) / float(bbox_size) 102 | 103 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 104 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 105 | 106 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 107 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 108 | 109 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 110 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 111 | 112 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 113 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 114 | 115 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 116 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 117 | 118 | 119 | # cal iou 120 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 121 | if iou > 0.65: 122 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 123 | cv2.imwrite(save_file, resized_im) 124 | 125 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 126 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 127 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 128 | 129 | l_idx += 1 130 | 131 | f.close() 132 | 133 | 134 | 135 | 136 | def parse_args(): 137 | parser = argparse.ArgumentParser(description='Test mtcnn', 138 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 139 | 140 | parser.add_argument('--traindata_store', dest='traindata_store', help='dface train data temporary folder', 141 | default=config.TRAIN_DATA_DIR, type=str) 142 | parser.add_argument('--anno_file', dest='annotation_file', help='landmark dataset original annotation file', 143 | default=os.path.join(config.ANNO_STORE_DIR,"landmark_imagelist.txt"), type=str) 144 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 145 | default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/training_data/landmark_train', type=str) 146 | 147 | 148 | args = parser.parse_args() 149 | return args 150 | 151 | if __name__ == '__main__': 152 | args = parse_args() 153 | 154 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 155 | 156 | 157 | -------------------------------------------------------------------------------- /prepare_data/gen_Pnet_train_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 09:35:11 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import sys 9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 10 | 11 | import argparse 12 | import numpy as np 13 | import cv2 14 | import os 15 | import numpy.random as npr 16 | 17 | import config 18 | from tools.utils import IoU 19 | 20 | 21 | def gen_pnet_data(data_dir, anno_file, prefix): 22 | neg_save_dir = os.path.join(data_dir, "12/negative") 23 | pos_save_dir = os.path.join(data_dir, "12/positive") 24 | part_save_dir = os.path.join(data_dir, "12/part") 25 | 26 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: 27 | if not os.path.exists(dir_path): 28 | os.makedirs(dir_path) 29 | 30 | save_dir = os.path.join(data_dir,"pnet") 31 | if not os.path.exists(save_dir): 32 | os.mkdir(save_dir) 33 | 34 | post_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_POSTIVE_ANNO_FILENAME) 35 | neg_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_NEGATIVE_ANNO_FILENAME) 36 | part_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_PART_ANNO_FILENAME) 37 | 38 | f1 = open(post_save_file, 'w') 39 | f2 = open(neg_save_file, 'w') 40 | f3 = open(part_save_file, 'w') 41 | 42 | with open(anno_file, 'r') as f: 43 | annotations = f.readlines() 44 | 45 | num = len(annotations) 46 | print("%d pics in total" % num) 47 | 48 | p_idx = 0 # positive examples index 49 | n_idx = 0 # negative examples index 50 | d_idx = 0 # partface examples index 51 | idx = 0 # pics index 52 | box_idx = 0 # boxes index 53 | 54 | for annotation in annotations: 55 | # for i in range(100): 56 | annotation = annotation.strip().split(' ') 57 | # annotation = annotations[i].strip().split(' ') 58 | im_path = os.path.join(prefix,annotation[0]) 59 | bbox = list(map(float, annotation[1:])) 60 | boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4) 61 | img = cv2.imread(im_path) 62 | idx += 1 63 | 64 | height, width, channel = img.shape 65 | 66 | neg_num = 0 67 | while neg_num < 50: 68 | size = npr.randint(12, min(width, height) / 2) 69 | nx = npr.randint(0, width - size) 70 | ny = npr.randint(0, height - size) 71 | crop_box = np.array([nx, ny, nx + size, ny + size]) 72 | 73 | Iou = IoU(crop_box, boxes) 74 | 75 | if np.max(Iou) < 0.3: 76 | # Iou with all gts must below 0.3 77 | save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx) 78 | f2.write(save_file + ' 0\n') 79 | cropped_im = img[ny : ny + size, nx : nx + size, :] 80 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 81 | cv2.imwrite(save_file, resized_im) 82 | n_idx += 1 83 | neg_num += 1 84 | 85 | for box in boxes: 86 | # box (x_left, y_top, x_right, y_bottom) 87 | x1, y1, x2, y2 = box 88 | w = x2 - x1 89 | h = y2 - y1 90 | 91 | # ignore small faces 92 | # in case the ground truth boxes of small faces are not accurate 93 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 94 | continue 95 | 96 | # generate negative examples that have overlap with gt 97 | for i in range(5): 98 | size = npr.randint(12, min(width, height) / 2) 99 | # delta_x and delta_y are offsets of (x1, y1) 100 | delta_x = npr.randint(max(-size, -x1), w) 101 | delta_y = npr.randint(max(-size, -y1), h) 102 | nx1 = max(0, x1 + delta_x) 103 | ny1 = max(0, y1 + delta_y) 104 | 105 | if nx1 + size > width or ny1 + size > height: 106 | continue 107 | crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) 108 | Iou = IoU(crop_box, boxes) 109 | 110 | if np.max(Iou) < 0.3: 111 | # Iou with all gts must below 0.3 112 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) 113 | cropped_im = img[ny1 : ny1 + size, nx1 : nx1 + size, :] 114 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 115 | f2.write(save_file + ' 0\n') # neg samples with label 0 116 | cv2.imwrite(save_file, resized_im) 117 | n_idx += 1 118 | 119 | # generate positive examples and part faces 120 | for i in range(20): 121 | size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 122 | 123 | # delta here is the offset of box center 124 | delta_x = npr.randint(-w * 0.2, w * 0.2) 125 | delta_y = npr.randint(-h * 0.2, h * 0.2) 126 | 127 | nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0)) 128 | ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0)) 129 | nx2 = nx1 + size 130 | ny2 = ny1 + size 131 | 132 | if nx2 > width or ny2 > height: 133 | continue 134 | crop_box = np.array([nx1, ny1, nx2, ny2]) 135 | 136 | offset_x1 = (x1 - nx1) / float(size) 137 | offset_y1 = (y1 - ny1) / float(size) 138 | offset_x2 = (x2 - nx2) / float(size) 139 | offset_y2 = (y2 - ny2) / float(size) 140 | 141 | cropped_im = img[ny1 : ny2, nx1 : nx2, :] 142 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 143 | 144 | box_ = box.reshape(1, -1) 145 | if IoU(crop_box, box_) >= 0.65: 146 | save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx) # positive samples with label 1 and offset 147 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2)) 148 | cv2.imwrite(save_file, resized_im) 149 | p_idx += 1 150 | elif IoU(crop_box, box_) >= 0.4: 151 | save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx) 152 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2)) 153 | cv2.imwrite(save_file, resized_im) 154 | d_idx += 1 155 | box_idx += 1 156 | print("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx)) 157 | 158 | f1.close() 159 | f2.close() 160 | f3.close() 161 | 162 | 163 | 164 | def parse_args(): 165 | parser = argparse.ArgumentParser(description='Test mtcnn', 166 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 167 | 168 | parser.add_argument('--face_traindata_store', dest='traindata_store', help='face train data temporary folder', 169 | default=config.TRAIN_DATA_DIR, type=str) 170 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 171 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 172 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 173 | default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str) 174 | 175 | 176 | 177 | 178 | args = parser.parse_args() 179 | return args 180 | 181 | if __name__ == '__main__': 182 | args = parse_args() 183 | gen_pnet_data(args.traindata_store,args.annotation_file,args.prefix_path) 184 | # gen_pnet_data('training_data', 'anno_store/wider_origin_anno.txt', '/home/wujiyang/data/Widerface/WIDER_train/images') 185 | 186 | -------------------------------------------------------------------------------- /train_net/models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 15 16:41:52 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | def weights_init(m): 14 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 15 | nn.init.xavier_uniform_(m.weight.data) 16 | nn.init.constant_(m.bias, 0.1) 17 | 18 | 19 | def compute_accuracy(prob_cls, gt_cls): 20 | '''return a tensor which contains predicted accuracy''' 21 | prob_cls = torch.squeeze(prob_cls) 22 | gt_cls = torch.squeeze(gt_cls) 23 | 24 | # only positive and negative examples has the classification loss which labels 1 and 0 25 | mask = torch.ge(gt_cls, 0) 26 | valid_gt_cls = torch.masked_select(gt_cls,mask) 27 | valid_prob_cls = torch.masked_select(prob_cls,mask) 28 | # computer predicted accuracy 29 | size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0]) 30 | prob_ones = torch.ge(valid_prob_cls,0.6).float() 31 | right_ones = torch.eq(prob_ones,valid_gt_cls).float() 32 | 33 | return torch.div(torch.mul(torch.sum(right_ones),float(1.0)),float(size)) 34 | 35 | 36 | class LossFn: 37 | def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1): 38 | # loss function 39 | self.cls_factor = cls_factor 40 | self.box_factor = box_factor 41 | self.land_factor = landmark_factor 42 | self.loss_cls = nn.BCELoss() 43 | self.loss_box = nn.MSELoss() 44 | self.loss_landmark = nn.MSELoss() 45 | 46 | def cls_loss(self, gt_label, pred_label): 47 | pred_label = torch.squeeze(pred_label) 48 | gt_label = torch.squeeze(gt_label) 49 | # only use negative samples and positive samples for classification which labels 0 and 1 50 | mask = torch.ge(gt_label, 0) 51 | valid_gt_label = torch.masked_select(gt_label, mask) 52 | valid_pred_label = torch.masked_select(pred_label, mask) 53 | return self.loss_cls(valid_pred_label, valid_gt_label) * self.cls_factor 54 | 55 | def box_loss(self, gt_label, gt_offset, pred_offset): 56 | pred_offset = torch.squeeze(pred_offset) 57 | gt_offset = torch.squeeze(gt_offset) 58 | gt_label = torch.squeeze(gt_label) 59 | # only use positive samples and partface samples for bounding box regression which labels 1 and -1 60 | unmask = torch.eq(gt_label,0) 61 | mask = torch.eq(unmask,0) 62 | #convert mask to dim index 63 | chose_index = torch.nonzero(mask.data) 64 | chose_index = torch.squeeze(chose_index) 65 | #only valid element can effect the loss 66 | valid_gt_offset = gt_offset[chose_index, :] 67 | valid_pred_offset = pred_offset[chose_index, :] 68 | return self.loss_box(valid_pred_offset, valid_gt_offset) * self.box_factor 69 | 70 | def landmark_loss(self, gt_label, gt_landmark, pred_landmark): 71 | pred_landmark = torch.squeeze(pred_landmark) 72 | gt_landmark = torch.squeeze(gt_landmark) 73 | gt_label = torch.squeeze(gt_label) 74 | # only CelebA data been used in landmark regression 75 | mask = torch.eq(gt_label, -2) 76 | 77 | chose_index = torch.nonzero(mask.data) 78 | chose_index = torch.squeeze(chose_index) 79 | 80 | valid_gt_landmark = gt_landmark[chose_index, :] 81 | valid_pred_landmark = pred_landmark[chose_index, :] 82 | return self.loss_landmark(valid_pred_landmark,valid_gt_landmark) * self.land_factor 83 | 84 | 85 | 86 | class PNet(nn.Module): 87 | '''PNet''' 88 | def __init__(self, is_train=False, use_cuda=True): 89 | super(PNet, self).__init__() 90 | self.is_train = is_train 91 | self.use_cuda = use_cuda 92 | 93 | # backend 94 | self.pre_layer = nn.Sequential( 95 | nn.Conv2d(3, 10, kernel_size=3, stride=1), 96 | nn.PReLU(), 97 | nn.MaxPool2d(kernel_size=2, stride=2), 98 | nn.Conv2d(10, 16, kernel_size=3, stride=1), 99 | nn.PReLU(), 100 | nn.Conv2d(16, 32, kernel_size=3, stride=1), 101 | nn.PReLU() 102 | ) 103 | 104 | # face classification 105 | self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1) 106 | # bounding box regresion 107 | self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1) 108 | # landmark localization 109 | self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1) 110 | 111 | # weight initiation with xavier 112 | self.apply(weights_init) 113 | 114 | def forward(self, x): 115 | x = self.pre_layer(x) 116 | label = F.sigmoid(self.conv4_1(x)) 117 | offset = self.conv4_2(x) 118 | # landmark = self.conv4_3(x) 119 | 120 | if self.is_train is True: 121 | return label,offset 122 | 123 | return label, offset 124 | 125 | 126 | class RNet(nn.Module): 127 | ''' RNet ''' 128 | 129 | def __init__(self,is_train=False, use_cuda=True): 130 | super(RNet, self).__init__() 131 | self.is_train = is_train 132 | self.use_cuda = use_cuda 133 | # backend 134 | self.pre_layer = nn.Sequential( 135 | nn.Conv2d(3, 28, kernel_size=3, stride=1), 136 | nn.PReLU(), 137 | nn.MaxPool2d(kernel_size=3, stride=2), 138 | nn.Conv2d(28, 48, kernel_size=3, stride=1), 139 | nn.PReLU(), 140 | nn.MaxPool2d(kernel_size=3, stride=2), 141 | nn.Conv2d(48, 64, kernel_size=2, stride=1), 142 | nn.PReLU() 143 | 144 | ) 145 | # this is little different from MTCNN paper, cause in pytroch, pooliing is calculated by floor() 146 | self.conv4 = nn.Linear(64*2*2, 128) 147 | self.prelu4 = nn.PReLU() 148 | # face calssification 149 | self.conv5_1 = nn.Linear(128, 1) 150 | # bounding box regression 151 | self.conv5_2 = nn.Linear(128, 4) 152 | # lanbmark localization 153 | self.conv5_3 = nn.Linear(128, 10) 154 | # weight initiation weih xavier 155 | self.apply(weights_init) 156 | 157 | def forward(self, x): 158 | # backend 159 | x = self.pre_layer(x) 160 | #x = x.view(-1, x.size(0)) 161 | x = x.view(-1, 64 * 2 * 2) 162 | x = self.conv4(x) 163 | x = self.prelu4(x) 164 | # detection 165 | det = torch.sigmoid(self.conv5_1(x)) 166 | box = self.conv5_2(x) 167 | 168 | if self.is_train is True: 169 | return det, box 170 | 171 | return det, box 172 | 173 | 174 | class ONet(nn.Module): 175 | ''' ONet ''' 176 | def __init__(self, is_train=False, use_cuda=True): 177 | super(ONet, self).__init__() 178 | self.is_train = is_train 179 | self.use_cuda = use_cuda 180 | # backend 181 | self.pre_layer = nn.Sequential( 182 | nn.Conv2d(3, 32, kernel_size=3, stride=1), 183 | nn.PReLU(), 184 | nn.MaxPool2d(kernel_size=3, stride=2), 185 | nn.Conv2d(32, 64, kernel_size=3, stride=1), 186 | nn.PReLU(), 187 | nn.MaxPool2d(kernel_size=3, stride=2), 188 | nn.Conv2d(64, 64, kernel_size=3, stride=1), 189 | nn.PReLU(), 190 | nn.MaxPool2d(kernel_size=2,stride=2), 191 | nn.Conv2d(64,128,kernel_size=2,stride=1), 192 | nn.PReLU() 193 | ) 194 | self.conv5 = nn.Linear(128*2*2, 256) 195 | self.prelu5 = nn.PReLU() 196 | # face classification 197 | self.conv6_1 = nn.Linear(256, 1) 198 | # bounding box regression 199 | self.conv6_2 = nn.Linear(256, 4) 200 | # lanbmark localization 201 | self.conv6_3 = nn.Linear(256, 10) 202 | # weight initiation weih xavier 203 | self.apply(weights_init) 204 | 205 | def forward(self, x): 206 | # backend 207 | x = self.pre_layer(x) 208 | x = x.view(-1, 128*2*2) 209 | x = self.conv5(x) 210 | x = self.prelu5(x) 211 | # detection 212 | det = torch.sigmoid(self.conv6_1(x)) 213 | box = self.conv6_2(x) 214 | landmark = self.conv6_3(x) 215 | if self.is_train is True: 216 | return det, box, landmark 217 | 218 | return det, box, landmark 219 | 220 | 221 | -------------------------------------------------------------------------------- /prepare_data/gen_Rnet_train_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon May 21 22:17:05 2018 5 | 6 | @author: wujiyang 7 | """ 8 | import sys 9 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 10 | 11 | import cv2 12 | import argparse 13 | import numpy as np 14 | from tools.detect import MtcnnDetector, create_mtcnn_net 15 | from tools.imagedb import ImageDB 16 | from tools.image_reader import TestImageLoader 17 | import time 18 | import os 19 | import cPickle 20 | 21 | from tools.utils import convert_to_square, IoU 22 | import config 23 | import tools.vision as vision 24 | 25 | 26 | def gen_rnet_data(data_dir, anno_file, pnet_model_file, prefix_path='', use_cuda=True, vis=False): 27 | # load the pnet and pnet_detector 28 | 29 | pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda) 30 | mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12) 31 | 32 | imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path) 33 | imdb = imagedb.load_imdb() 34 | image_reader = TestImageLoader(imdb, 1, False) 35 | 36 | all_boxes = list() 37 | batch_idx = 0 38 | 39 | for databatch in image_reader: 40 | if batch_idx % 100 == 0: 41 | print ("%d images done" % batch_idx) 42 | im = databatch 43 | t = time.time() 44 | boxes, boxes_align = mtcnn_detector.detect_pnet(im=im) 45 | if boxes_align is None: 46 | all_boxes.append(np.array([])) 47 | batch_idx += 1 48 | continue 49 | if vis: 50 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) 51 | vision.vis_two(rgb_im, boxes, boxes_align) 52 | 53 | t1 = time.time() - t 54 | print 'time cost for image ', batch_idx, '/', image_reader.size, ': ', t1 55 | all_boxes.append(boxes_align) 56 | batch_idx += 1 57 | 58 | save_path = config.MODLE_STORE_DIR 59 | if not os.path.exists(save_path): 60 | os.mkdir(save_path) 61 | 62 | save_file = os.path.join(save_path, "pnet_detections_%d.pkl" % int(time.time())) 63 | 64 | with open(save_file, 'wb') as f: 65 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 66 | 67 | 68 | #save_file = '/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/pnet_detections_1527162101.pkl' 69 | get_rnet_sample_data(data_dir, anno_file, save_file, prefix_path) 70 | 71 | def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path): 72 | 73 | neg_save_dir = os.path.join(data_dir, "24/negative") 74 | pos_save_dir = os.path.join(data_dir, "24/positive") 75 | part_save_dir = os.path.join(data_dir, "24/part") 76 | 77 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: 78 | if not os.path.exists(dir_path): 79 | os.makedirs(dir_path) 80 | 81 | # load ground truth from annotation file 82 | # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image 83 | with open(anno_file, 'r') as f: 84 | annotations = f.readlines() 85 | 86 | image_size = 24 87 | im_idx_list = list() 88 | gt_boxes_list = list() 89 | num_of_images = len(annotations) 90 | print ("processing %d images in total" % num_of_images) 91 | for annotation in annotations: 92 | #for i in range(10): 93 | annotation = annotation.strip().split(' ') 94 | #annotation = annotations[i].strip().split(' ') 95 | im_idx = os.path.join(prefix_path, annotation[0]) 96 | boxes = map(float, annotation[1:]) 97 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) 98 | im_idx_list.append(im_idx) 99 | gt_boxes_list.append(boxes) 100 | 101 | save_path = config.ANNO_STORE_DIR 102 | if not os.path.exists(save_path): 103 | os.makedirs(save_path) 104 | 105 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') 106 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') 107 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') 108 | 109 | det_handle = open(det_boxes_file, 'r') 110 | det_boxes = cPickle.load(det_handle) 111 | print(len(det_boxes), num_of_images) 112 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" 113 | 114 | # index of neg, pos and part face, used as their image names 115 | n_idx = 0 116 | p_idx = 0 117 | d_idx = 0 118 | image_done = 0 119 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): 120 | image_done += 1 121 | if image_done % 100 == 0: 122 | print("%d images done" % image_done) 123 | if dets.shape[0] == 0: 124 | continue 125 | img = cv2.imread(im_idx) 126 | dets = convert_to_square(dets) 127 | dets[:, 0:4] = np.round(dets[:, 0:4]) 128 | 129 | # each image have at most 50 neg_samples 130 | cur_n_idx = 0 131 | for box in dets: 132 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int) 133 | width = x_right - x_left 134 | height = y_bottom - y_top 135 | # ignore box that is too small or beyond image border 136 | if width < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[1] or y_bottom >= img.shape[0]: 137 | continue 138 | # compute intersection over union(IoU) between current box and all gt boxes 139 | Iou = IoU(box, gts) 140 | cropped_im = img[y_top:y_bottom, x_left:x_right, :] 141 | resized_im = cv2.resize(cropped_im, (image_size, image_size), 142 | interpolation=cv2.INTER_LINEAR) 143 | # save negative images and write label 144 | 145 | if np.max(Iou) < 0.3: 146 | # Iou with all gts must below 0.3 147 | cur_n_idx += 1 148 | if cur_n_idx <= 50: 149 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) 150 | f2.write(save_file + ' 0\n') 151 | cv2.imwrite(save_file, resized_im) 152 | n_idx += 1 153 | else: 154 | # find gt_box with the highest iou 155 | idx = np.argmax(Iou) 156 | assigned_gt = gts[idx] 157 | x1, y1, x2, y2 = assigned_gt 158 | 159 | # compute bbox reg label 160 | offset_x1 = (x1 - x_left) / float(width) 161 | offset_y1 = (y1 - y_top) / float(height) 162 | offset_x2 = (x2 - x_right) / float(width) 163 | offset_y2 = (y2 - y_bottom) / float(height) 164 | 165 | # save positive and part-face images and write labels 166 | if np.max(Iou) >= 0.65: 167 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) 168 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( 169 | offset_x1, offset_y1, offset_x2, offset_y2)) 170 | cv2.imwrite(save_file, resized_im) 171 | p_idx += 1 172 | 173 | elif np.max(Iou) >= 0.4: 174 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) 175 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( 176 | offset_x1, offset_y1, offset_x2, offset_y2)) 177 | cv2.imwrite(save_file, resized_im) 178 | d_idx += 1 179 | 180 | f1.close() 181 | f2.close() 182 | f3.close() 183 | 184 | 185 | def model_store_path(): 186 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store" 187 | 188 | 189 | 190 | def parse_args(): 191 | parser = argparse.ArgumentParser(description='Test mtcnn', 192 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 193 | 194 | parser.add_argument('--face_traindata_store', dest='traindata_store', help='dface train data temporary folder', 195 | default=config.TRAIN_DATA_DIR, type=str) 196 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 197 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 198 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path', 199 | default='/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/pnet_model_final.pt', type=str) 200 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu', 201 | default=config.USE_CUDA, type=bool) 202 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 203 | default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str) 204 | 205 | 206 | args = parser.parse_args() 207 | return args 208 | 209 | 210 | 211 | if __name__ == '__main__': 212 | args = parse_args() 213 | gen_rnet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.prefix_path, args.use_cuda) 214 | 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /prepare_data/gen_Onet_train_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri May 25 20:12:13 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import cv2 13 | import argparse 14 | import numpy as np 15 | from tools.detect import MtcnnDetector, create_mtcnn_net 16 | from tools.imagedb import ImageDB 17 | from tools.image_reader import TestImageLoader 18 | import time 19 | import os 20 | import cPickle 21 | 22 | from tools.utils import convert_to_square, IoU 23 | import config 24 | import tools.vision as vision 25 | 26 | def gen_onet_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False): 27 | 28 | pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda) 29 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12) 30 | 31 | imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path) 32 | imdb = imagedb.load_imdb() 33 | image_reader = TestImageLoader(imdb,1,False) 34 | 35 | all_boxes = list() 36 | batch_idx = 0 37 | 38 | for databatch in image_reader: 39 | if batch_idx % 100 == 0: 40 | print("%d images done" % batch_idx) 41 | im = databatch 42 | t = time.time() 43 | #detect an image by pnet and rnet 44 | p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im) 45 | boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align) 46 | if boxes_align is None: 47 | all_boxes.append(np.array([])) 48 | batch_idx += 1 49 | continue 50 | if vis: 51 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) 52 | vision.vis_two(rgb_im, boxes, boxes_align) 53 | 54 | t1 = time.time() - t 55 | print 'time cost for image ', batch_idx, '/', image_reader.size, ': ', t1 56 | all_boxes.append(boxes_align) 57 | batch_idx += 1 58 | 59 | save_path = config.MODLE_STORE_DIR 60 | if not os.path.exists(save_path): 61 | os.mkdir(save_path) 62 | 63 | save_file = os.path.join(save_path, "rnet_detections_%d.pkl" % int(time.time())) 64 | 65 | with open(save_file, 'wb') as f: 66 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 67 | 68 | 69 | #save_file = '/home/wujiyang/FaceProjects/MTCNN_TRAIN/model_store/rnet_detections_1527304558.pkl' 70 | get_onet_sample_data(data_dir, anno_file, save_file, prefix_path) 71 | 72 | 73 | def get_onet_sample_data(data_dir, anno_file, det_boxs_file, prefix): 74 | 75 | neg_save_dir = os.path.join(data_dir, "48/negative") 76 | pos_save_dir = os.path.join(data_dir, "48/positive") 77 | part_save_dir = os.path.join(data_dir, "48/part") 78 | 79 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: 80 | if not os.path.exists(dir_path): 81 | os.makedirs(dir_path) 82 | 83 | # load ground truth from annotation file 84 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image 85 | 86 | with open(anno_file, 'r') as f: 87 | annotations = f.readlines() 88 | 89 | image_size = 48 90 | im_idx_list = list() 91 | gt_boxes_list = list() 92 | num_of_images = len(annotations) 93 | print("processing %d images in total" % num_of_images) 94 | 95 | for annotation in annotations: 96 | annotation = annotation.strip().split(' ') 97 | im_idx = os.path.join(prefix,annotation[0]) 98 | 99 | boxes = map(float, annotation[1:]) 100 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) 101 | im_idx_list.append(im_idx) 102 | gt_boxes_list.append(boxes) 103 | 104 | 105 | save_path = config.ANNO_STORE_DIR 106 | if not os.path.exists(save_path): 107 | os.makedirs(save_path) 108 | 109 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') 110 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') 111 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') 112 | 113 | det_handle = open(det_boxs_file, 'r') 114 | 115 | det_boxes = cPickle.load(det_handle) 116 | print(len(det_boxes), num_of_images) 117 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" 118 | 119 | # index of neg, pos and part face, used as their image names 120 | n_idx = 0 121 | p_idx = 0 122 | d_idx = 0 123 | image_done = 0 124 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): 125 | image_done += 1 126 | if image_done % 100 == 0: 127 | print("%d images done" % image_done) 128 | if dets.shape[0] == 0: 129 | continue 130 | img = cv2.imread(im_idx) 131 | dets = convert_to_square(dets) 132 | dets[:, 0:4] = np.round(dets[:, 0:4]) 133 | 134 | # each image have at most 50 neg_samples 135 | cur_n_idx = 0 136 | for box in dets: 137 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int) 138 | width = x_right - x_left 139 | height = y_bottom - y_top 140 | # ignore box that is too small or beyond image border 141 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: 142 | continue 143 | # compute intersection over union(IoU) between current box and all gt boxes 144 | Iou = IoU(box, gts) 145 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] 146 | resized_im = cv2.resize(cropped_im, (image_size, image_size), 147 | interpolation=cv2.INTER_LINEAR) 148 | 149 | # save negative images and write label 150 | if np.max(Iou) < 0.3: 151 | # Iou with all gts must below 0.3 152 | cur_n_idx += 1 153 | if cur_n_idx <= 50: 154 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) 155 | f2.write(save_file + ' 0\n') 156 | cv2.imwrite(save_file, resized_im) 157 | n_idx += 1 158 | else: 159 | # find gt_box with the highest iou 160 | idx = np.argmax(Iou) 161 | assigned_gt = gts[idx] 162 | x1, y1, x2, y2 = assigned_gt 163 | 164 | # compute bbox reg label 165 | offset_x1 = (x1 - x_left) / float(width) 166 | offset_y1 = (y1 - y_top) / float(height) 167 | offset_x2 = (x2 - x_right) / float(width) 168 | offset_y2 = (y2 - y_bottom) / float(height) 169 | 170 | # save positive and part-face images and write labels 171 | if np.max(Iou) >= 0.65: 172 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) 173 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( 174 | offset_x1, offset_y1, offset_x2, offset_y2)) 175 | cv2.imwrite(save_file, resized_im) 176 | p_idx += 1 177 | 178 | elif np.max(Iou) >= 0.4: 179 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) 180 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( 181 | offset_x1, offset_y1, offset_x2, offset_y2)) 182 | cv2.imwrite(save_file, resized_im) 183 | d_idx += 1 184 | f1.close() 185 | f2.close() 186 | f3.close() 187 | 188 | 189 | 190 | def model_store_path(): 191 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store" 192 | 193 | def parse_args(): 194 | parser = argparse.ArgumentParser(description='Test mtcnn', 195 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 196 | 197 | parser.add_argument('--face_traindata_store', dest='traindata_store', help='face train data temporary folder', 198 | default=config.TRAIN_DATA_DIR, type=str) 199 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 200 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 201 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path', 202 | default=os.path.join(config.MODLE_STORE_DIR,"pnet_model_final.pt"), type=str) 203 | parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path', 204 | default=os.path.join(config.MODLE_STORE_DIR,"rnet_model_final.pt"), type=str) 205 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu', 206 | default=config.USE_CUDA, type=bool) 207 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 208 | default='/home/wujiyang/data/Widerface/WIDER_train/images', type=str) 209 | 210 | args = parser.parse_args() 211 | return args 212 | 213 | 214 | 215 | if __name__ == '__main__': 216 | args = parse_args() 217 | gen_onet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda) 218 | 219 | -------------------------------------------------------------------------------- /tools/detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 19 20:55:10 2018 5 | 6 | @author: wujiyang 7 | """ 8 | 9 | import sys 10 | sys.path.append("/home/wujiyang/FaceProjects/MTCNN_TRAIN") 11 | 12 | import cv2 13 | import time 14 | import numpy as np 15 | import torch 16 | from train_net.models import PNet, RNet, ONet 17 | import tools.utils as utils 18 | import tools.image_tools as image_tools 19 | 20 | 21 | def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): 22 | 23 | pnet, rnet, onet = None, None, None 24 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 25 | 26 | if p_model_path is not None: 27 | pnet = PNet(use_cuda=use_cuda) 28 | pnet.load_state_dict(torch.load(p_model_path)) 29 | if(use_cuda): 30 | pnet.to(device) 31 | 32 | pnet.eval() 33 | 34 | if r_model_path is not None: 35 | rnet = RNet(use_cuda=use_cuda) 36 | rnet.load_state_dict(torch.load(r_model_path)) 37 | if(use_cuda): 38 | rnet.to(device) 39 | 40 | rnet.eval() 41 | 42 | if o_model_path is not None: 43 | onet = ONet(use_cuda=use_cuda) 44 | onet.load_state_dict(torch.load(o_model_path)) 45 | if(use_cuda): 46 | onet.to(device) 47 | 48 | onet.eval() 49 | 50 | return pnet, rnet, onet 51 | 52 | 53 | 54 | class MtcnnDetector(object): 55 | ''' P, R, O net for face detection and landmark alignment''' 56 | def __init__(self, 57 | pnet=None, 58 | rnet=None, 59 | onet=None, 60 | min_face_size=12, 61 | stride=2, 62 | threshold=[0.6, 0.7, 0.7], 63 | scale_factor=0.709): 64 | self.pnet_detector = pnet 65 | self.rnet_detector = rnet 66 | self.onet_detector = onet 67 | self.min_face_size = min_face_size 68 | self.stride=stride 69 | self.thresh = threshold 70 | self.scale_factor = scale_factor 71 | 72 | def unique_image_format(self, im): 73 | if not isinstance(im,np.ndarray): 74 | if im.mode == 'I': 75 | im = np.array(im, np.int32, copy=False) 76 | elif im.mode == 'I;16': 77 | im = np.array(im, np.int16, copy=False) 78 | else: 79 | im = np.asarray(im) 80 | 81 | return im 82 | 83 | def square_bbox(self, bbox): 84 | ''' 85 | convert bbox to square 86 | Parameters: 87 | bbox: numpy array, shape n x m 88 | Returns: 89 | square bbox 90 | ''' 91 | square_bbox = bbox.copy() 92 | 93 | h = bbox[:, 3] - bbox[:, 1] + 1 94 | w = bbox[:, 2] - bbox[:, 0] + 1 95 | l = np.maximum(h,w) 96 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - l*0.5 97 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - l*0.5 98 | 99 | square_bbox[:, 2] = square_bbox[:, 0] + l - 1 100 | square_bbox[:, 3] = square_bbox[:, 1] + l - 1 101 | return square_bbox 102 | 103 | 104 | def generate_bounding_box(self, map, reg, scale, threshold): 105 | """ TODO: 这个函数没看懂 """ 106 | ''' 107 | generate bbox from feature map 108 | for PNet, there exists no fc layer, only convolution layer ,so feature map n x m x 1/4 109 | Parameters: 110 | map: numpy array , n x m x 1, detect score for each position 111 | reg: numpy array , n x m x 4, bbox 112 | scale: float number, scale of this detection 113 | threshold: float number, detect threshold 114 | Returns: 115 | bbox array 116 | ''' 117 | stride = 2 118 | cellsize = 12 119 | 120 | t_index = np.where(map > threshold) 121 | # find nothing 122 | if t_index[0].size == 0: 123 | return np.array([]) 124 | 125 | dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)] 126 | reg = np.array([dx1, dy1, dx2, dy2]) 127 | 128 | score = map[t_index[0], t_index[1], 0] 129 | boundingbox = np.vstack([np.round((stride * t_index[1]) / scale), 130 | np.round((stride * t_index[0]) / scale), 131 | np.round((stride * t_index[1] + cellsize) / scale), 132 | np.round((stride * t_index[0] + cellsize) / scale), 133 | score, 134 | reg, 135 | # landmarks 136 | ]) 137 | 138 | return boundingbox.T 139 | 140 | 141 | def resize_image(self, img, scale): 142 | """ 143 | resize image and transform dimention to [batchsize, channel, height, width] 144 | Parameters: 145 | ---------- 146 | img: numpy array , height x width x channel,input image, channels in BGR order here 147 | scale: float number, scale factor of resize operation 148 | Returns: 149 | ------- 150 | transformed image tensor , 1 x channel x height x width 151 | """ 152 | height, width, channels = img.shape 153 | new_height = int(height * scale) # resized new height 154 | new_width = int(width * scale) # resized new width 155 | new_dim = (new_width, new_height) 156 | img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image 157 | 158 | return img_resized 159 | 160 | def pad(self, bboxes, w, h): 161 | """ 162 | pad the the boxes 163 | Parameters: 164 | ---------- 165 | bboxes: numpy array, n x 5, input bboxes 166 | w: float number, width of the input image 167 | h: float number, height of the input image 168 | Returns : 169 | ------ 170 | dy, dx : numpy array, n x 1, start point of the bbox in target image 171 | edy, edx : numpy array, n x 1, end point of the bbox in target image 172 | y, x : numpy array, n x 1, start point of the bbox in original image 173 | ey, ex : numpy array, n x 1, end point of the bbox in original image 174 | tmph, tmpw: numpy array, n x 1, height and width of the bbox 175 | """ 176 | 177 | tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32) 178 | tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32) 179 | numbox = bboxes.shape[0] 180 | 181 | dx = np.zeros((numbox, )) 182 | dy = np.zeros((numbox, )) 183 | edx, edy = tmpw.copy()-1, tmph.copy()-1 184 | 185 | x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] 186 | 187 | tmp_index = np.where(ex > w-1) 188 | edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] 189 | ex[tmp_index] = w - 1 190 | 191 | tmp_index = np.where(ey > h-1) 192 | edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] 193 | ey[tmp_index] = h - 1 194 | 195 | tmp_index = np.where(x < 0) 196 | dx[tmp_index] = 0 - x[tmp_index] 197 | x[tmp_index] = 0 198 | 199 | tmp_index = np.where(y < 0) 200 | dy[tmp_index] = 0 - y[tmp_index] 201 | y[tmp_index] = 0 202 | 203 | return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] 204 | return_list = [item.astype(np.int32) for item in return_list] 205 | 206 | return return_list 207 | 208 | 209 | def detect_pnet(self, im): 210 | """Get face candidates through pnet 211 | 212 | Parameters: 213 | ---------- 214 | im: numpy array, input image array 215 | 216 | Returns: 217 | ------- 218 | boxes: numpy array 219 | detected boxes before calibration 220 | boxes_align: numpy array 221 | boxes after calibration 222 | """ 223 | h, w, c = im.shape 224 | net_size = 12 225 | current_scale = float(net_size) / self.min_face_size # find initial scale 226 | im_resized = self.resize_image(im, current_scale) 227 | current_height, current_width, _ = im_resized.shape 228 | 229 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 230 | 231 | # fcn for pnet 232 | all_boxes = list() 233 | while min(current_height, current_width) > net_size: 234 | feed_imgs = [] 235 | image_tensor = image_tools.convert_image_to_tensor(im_resized) 236 | feed_imgs.append(image_tensor) 237 | feed_imgs = torch.stack(feed_imgs) 238 | 239 | if self.pnet_detector.use_cuda: 240 | feed_imgs = feed_imgs.to(device) 241 | 242 | cls_map, reg = self.pnet_detector(feed_imgs) 243 | cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu()) 244 | reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) 245 | 246 | boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0]) 247 | 248 | current_scale *= self.scale_factor 249 | im_resized = self.resize_image(im, current_scale) 250 | current_height, current_width, _ = im_resized.shape 251 | 252 | if boxes.size == 0: 253 | continue 254 | keep = utils.nms(boxes[:, :5], 0.5, 'Union') 255 | boxes = boxes[keep] 256 | all_boxes.append(boxes) 257 | 258 | if len(all_boxes) == 0: 259 | return None, None 260 | 261 | all_boxes = np.vstack(all_boxes) 262 | 263 | # merge the detection from first stage 264 | keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') 265 | all_boxes = all_boxes[keep] 266 | 267 | bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 268 | bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 269 | 270 | boxes = np.vstack([all_boxes[:,0], 271 | all_boxes[:,1], 272 | all_boxes[:,2], 273 | all_boxes[:,3], 274 | all_boxes[:,4] 275 | ]) 276 | 277 | boxes = boxes.T 278 | 279 | align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw 280 | align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh 281 | align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw 282 | align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh 283 | 284 | # refine the boxes 285 | boxes_align = np.vstack([align_topx, 286 | align_topy, 287 | align_bottomx, 288 | align_bottomy, 289 | all_boxes[:, 4] 290 | ]) 291 | boxes_align = boxes_align.T 292 | 293 | return boxes, boxes_align 294 | 295 | 296 | def detect_rnet(self, im, dets): 297 | """Get face candidates using rnet 298 | 299 | Parameters: 300 | ---------- 301 | im: numpy array 302 | input image array 303 | dets: numpy array 304 | detection results of pnet 305 | 306 | Returns: 307 | ------- 308 | boxes: numpy array 309 | detected boxes before calibration 310 | boxes_align: numpy array 311 | boxes after calibration 312 | """ 313 | h, w, c = im.shape 314 | if dets is None: 315 | return None, None 316 | 317 | dets = self.square_bbox(dets) 318 | dets[:, 0:4] = np.round(dets[:, 0:4]) 319 | 320 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) 321 | num_boxes = dets.shape[0] 322 | 323 | cropped_ims_tensors = [] 324 | for i in range(num_boxes): 325 | try: 326 | if tmph[i] > 0 and tmpw[i] > 0: 327 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) 328 | tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] 329 | crop_im = cv2.resize(tmp, (24, 24)) 330 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) 331 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor 332 | cropped_ims_tensors.append(crop_im_tensor) 333 | except ValueError, e: 334 | print e.message 335 | 336 | feed_imgs = torch.stack(cropped_ims_tensors) 337 | 338 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 339 | if self.rnet_detector.use_cuda: 340 | feed_imgs = feed_imgs.to(device) 341 | 342 | cls_map, reg = self.rnet_detector(feed_imgs) 343 | cls_map = cls_map.cpu().data.numpy() 344 | reg = reg.cpu().data.numpy() 345 | 346 | keep_inds = np.where(cls_map > self.thresh[1])[0] 347 | 348 | if len(keep_inds) > 0: 349 | boxes = dets[keep_inds] 350 | cls = cls_map[keep_inds] 351 | reg = reg[keep_inds] 352 | else: 353 | return None, None 354 | 355 | keep = utils.nms(boxes, 0.7) 356 | if len(keep) == 0: 357 | return None, None 358 | 359 | keep_cls = cls[keep] 360 | keep_boxes = boxes[keep] 361 | keep_reg = reg[keep] 362 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] 363 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] 364 | boxes = np.vstack([ keep_boxes[:,0], 365 | keep_boxes[:,1], 366 | keep_boxes[:,2], 367 | keep_boxes[:,3], 368 | keep_cls[:,0] 369 | ]) 370 | align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw 371 | align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh 372 | align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw 373 | align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh 374 | 375 | boxes_align = np.vstack([align_topx, 376 | align_topy, 377 | align_bottomx, 378 | align_bottomy, 379 | keep_cls[:, 0] 380 | ]) 381 | boxes = boxes.T 382 | boxes_align = boxes_align.T 383 | 384 | return boxes, boxes_align 385 | 386 | 387 | def detect_onet(self, im, dets): 388 | """Get face candidates using onet 389 | 390 | Parameters: 391 | ---------- 392 | im: numpy array 393 | input image array 394 | dets: numpy array 395 | detection results of rnet 396 | 397 | Returns: 398 | ------- 399 | boxes_align: numpy array 400 | boxes after calibration 401 | landmarks_align: numpy array 402 | landmarks after calibration 403 | 404 | """ 405 | h, w, c = im.shape 406 | if dets is None: 407 | return None, None 408 | 409 | dets = self.square_bbox(dets) 410 | dets[:, 0:4] = np.round(dets[:, 0:4]) 411 | 412 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) 413 | num_boxes = dets.shape[0] 414 | 415 | cropped_ims_tensors = [] 416 | for i in range(num_boxes): 417 | try: 418 | if tmph[i] > 0 and tmpw[i] > 0: 419 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) 420 | tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] 421 | crop_im = cv2.resize(tmp, (48, 48)) 422 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) 423 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor 424 | cropped_ims_tensors.append(crop_im_tensor) 425 | except ValueError, e: 426 | print e.message 427 | 428 | feed_imgs = torch.stack(cropped_ims_tensors) 429 | 430 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 431 | if self.rnet_detector.use_cuda: 432 | feed_imgs = feed_imgs.to(device) 433 | 434 | cls_map, reg, landmark = self.onet_detector(feed_imgs) 435 | 436 | cls_map = cls_map.cpu().data.numpy() 437 | reg = reg.cpu().data.numpy() 438 | landmark = landmark.cpu().data.numpy() 439 | 440 | keep_inds = np.where(cls_map > self.thresh[2])[0] 441 | 442 | if len(keep_inds) > 0: 443 | boxes = dets[keep_inds] 444 | cls = cls_map[keep_inds] 445 | reg = reg[keep_inds] 446 | landmark = landmark[keep_inds] 447 | else: 448 | return None, None 449 | 450 | keep = utils.nms(boxes, 0.7, mode="Minimum") 451 | 452 | if len(keep) == 0: 453 | return None, None 454 | 455 | keep_cls = cls[keep] 456 | keep_boxes = boxes[keep] 457 | keep_reg = reg[keep] 458 | keep_landmark = landmark[keep] 459 | 460 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] 461 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] 462 | 463 | 464 | align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw 465 | align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh 466 | align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw 467 | align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh 468 | 469 | align_landmark_topx = keep_boxes[:, 0] 470 | align_landmark_topy = keep_boxes[:, 1] 471 | 472 | 473 | 474 | boxes_align = np.vstack([align_topx, 475 | align_topy, 476 | align_bottomx, 477 | align_bottomy, 478 | keep_cls[:, 0] 479 | ]) 480 | 481 | boxes_align = boxes_align.T 482 | 483 | landmark = np.vstack([ 484 | align_landmark_topx + keep_landmark[:, 0] * bw, 485 | align_landmark_topy + keep_landmark[:, 1] * bh, 486 | align_landmark_topx + keep_landmark[:, 2] * bw, 487 | align_landmark_topy + keep_landmark[:, 3] * bh, 488 | align_landmark_topx + keep_landmark[:, 4] * bw, 489 | align_landmark_topy + keep_landmark[:, 5] * bh, 490 | align_landmark_topx + keep_landmark[:, 6] * bw, 491 | align_landmark_topy + keep_landmark[:, 7] * bh, 492 | align_landmark_topx + keep_landmark[:, 8] * bw, 493 | align_landmark_topy + keep_landmark[:, 9] * bh, 494 | ]) 495 | 496 | landmark_align = landmark.T 497 | 498 | return boxes_align, landmark_align 499 | 500 | 501 | 502 | def detect_face(self, img): 503 | """Detect face over image 504 | """ 505 | boxes_align = np.array([]) 506 | landmark_align = np.array([]) 507 | 508 | t = time.time() 509 | 510 | # pnet 511 | if self.pnet_detector: 512 | boxes, boxes_align = self.detect_pnet(img) 513 | if boxes_align is None: 514 | return np.array([]), np.array([]) 515 | 516 | t1 = time.time() - t 517 | t = time.time() 518 | 519 | # rnet 520 | if self.rnet_detector: 521 | boxes, boxes_align = self.detect_rnet(img, boxes_align) 522 | if boxes_align is None: 523 | return np.array([]), np.array([]) 524 | 525 | t2 = time.time() - t 526 | t = time.time() 527 | 528 | # onet 529 | if self.onet_detector: 530 | boxes_align, landmark_align = self.detect_onet(img, boxes_align) 531 | if boxes_align is None: 532 | return np.array([]), np.array([]) 533 | 534 | t3 = time.time() - t 535 | t = time.time() 536 | print("time cost " + '{:.3f}'.format(t1+t2+t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, t3)) 537 | 538 | return boxes_align, landmark_align 539 | 540 | 541 | --------------------------------------------------------------------------------