├── .gitignore ├── README.md ├── doc └── TRAIN.md ├── mtcnn ├── __init__.py ├── datasets │ ├── CelebA │ │ └── .gitignore │ ├── FacePoint │ │ └── .gitignore │ ├── WIDER_FACE │ │ └── .gitignore │ ├── __init__.py │ ├── celeba.py │ └── wider_face.py ├── deploy │ ├── __init__.py │ ├── align.py │ ├── batch_detect.py │ ├── detect.py │ ├── models │ │ ├── onet.npy │ │ ├── pnet.npy │ │ └── rnet.npy │ └── tracker.py ├── network │ ├── __init__.py │ └── mtcnn_pytorch.py ├── train │ ├── __init__.py │ ├── data.py │ ├── gen_landmark.py │ ├── gen_onet_train.py │ ├── gen_pnet_train.py │ ├── gen_rnet_train.py │ └── train_net.py └── utils │ ├── __init__.py │ ├── align_trans.py │ ├── cp2tform.py │ ├── draw.py │ ├── functional.py │ └── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── output └── caffe_models │ ├── det1.caffemodel │ ├── det1.prototxt │ ├── det2.caffemodel │ ├── det2.prototxt │ ├── det3.caffemodel │ ├── det3.prototxt │ ├── det4.caffemodel │ └── det4.prototxt ├── scripts ├── convert_caffe_model.py ├── detect_on_video.py ├── gen_onet_train.py ├── gen_pnet_train.py ├── gen_rnet_train.py ├── track_on_video.py ├── train_onet.py ├── train_pnet.py └── train_rnet.py ├── setup.py ├── tests ├── __init__.py ├── asset │ ├── images │ │ ├── audrey.jpg │ │ ├── bksomels.jpg │ │ ├── gpripe.jpg │ │ ├── office5.jpg │ │ └── roate.jpg │ └── video │ │ └── track.mp4 ├── test_align.py ├── test_batch_detection.py ├── test_data.py ├── test_datasets.py ├── test_detection.py ├── test_functional.py ├── test_gptd.py ├── test_landmarks.py ├── test_net_jit.py ├── test_net_pytorch.py ├── test_tracker.py └── test_train.py └── tutorial ├── README.md ├── detect_step_by_step.ipynb └── face_align.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | 117 | # ide 118 | .vscode 119 | .idea 120 | 121 | # derivative files 122 | output/* 123 | !output/caffe_models/ 124 | !output/converted/ 125 | mtcnn/utils/nms.c 126 | runs/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MTCNN 2 | 3 | pytorch implementation of **inference and training stage** of face detection algorithm described in 4 | [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878). 5 | 6 | ## Why this projects 7 | 8 | [mtcnn-pytorch](https://github.com/TropComplique/mtcnn-pytorch) This is the most popular pytorch implementation of mtcnn. There are some disadvantages we found when using it for real-time detection task. 9 | 10 | - No training code. 11 | - Mix torch operation and numpy operation together, which resulting in slow inference speed. 12 | - No unified interface for setting computation device. ('cpu' or 'gpu') 13 | - Based on the old version of pytorch (0.2). 14 | 15 | So we create this project and add these features: 16 | 17 | - Add code for training stage, you can train model by your own datasets. 18 | - Transfer all numpy operation to torch operation, so that it can benefit from gpu acceleration. It's 10 times faster than the original repo [mtcnn-pytorch](https://github.com/TropComplique/mtcnn-pytorch). 19 | - Provide unified interface to assign 'cpu' or 'gpu'. 20 | - Based on the latest version of pytorch (1.0) and we will provide long-term support. 21 | - It's is a component of our [FaceLab](https://github.com/faciallab) ecosystem. 22 | - Real-time face tracking. 23 | - Friendly tutorial for beginner. 24 | 25 | ## Installation 26 | 27 | ### Create virtual env use conda (recommend) 28 | 29 | ``` 30 | conda create -n face_detection python=3 31 | source activate face_detection 32 | ``` 33 | 34 | ### Installation dependency package 35 | 36 | ```bash 37 | pip install opencv-python numpy easydict Cython progressbar2 torch tensorboardX 38 | ``` 39 | 40 | If you have gpu on your mechine, you can follow the [official instruction](https://pytorch.org/) and install pytorch gpu version. 41 | 42 | ### Compile the cython code 43 | Compile with gpu support 44 | ```bash 45 | python setup.py build_ext --inplace 46 | ``` 47 | Compile with cpu only 48 | ```bash 49 | python setup.py build_ext --inplace --disable_gpu 50 | ``` 51 | 52 | ### Also, you can install mtcnn as a package 53 | ``` 54 | python setup.py install 55 | ``` 56 | 57 | ## Test the code by example 58 | 59 | We assume all these command running in the $SOURCE_ROOT directory. 60 | 61 | #### Detect on example picture 62 | 63 | ```bash 64 | python -m unittest tests.test_detection.TestDetection.test_detection 65 | ``` 66 | 67 | #### Detect on video 68 | 69 | ```bash 70 | python scripts/detect_on_video.py --video_path ./tests/asset/video/school.avi --device cuda:0 --minsize 24 71 | ``` 72 | 73 | you can set device to 'cpu' if you have no valid gpu on your machine 74 | 75 | ## Basic Usage 76 | 77 | ```python 78 | import cv2 79 | import mtcnn 80 | 81 | # First we create pnet, rnet, onet, and load weights from caffe model. 82 | pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') 83 | 84 | # Then we create a detector 85 | detector = mtcnn.FaceDetector(pnet, rnet, onet, device='cuda:0') 86 | 87 | # Then we can detect faces from image 88 | img = 'tests/asset/images/office5.jpg' 89 | boxes, landmarks = detector.detect(img) 90 | 91 | # Then we draw bounding boxes and landmarks on image 92 | image = cv2.imread(img) 93 | image = mtcnn.utils.draw.draw_boxes2(image, boxes) 94 | image = mtcnn.utils.draw.batch_draw_landmarks(image, landmarks) 95 | 96 | # Show the result 97 | cv2.imshwow("Detected image.", image) 98 | cv2.waitKey(0) 99 | ``` 100 | 101 | ## Doc 102 | [Train your own model from scratch](./doc/TRAIN.md) 103 | 104 | ## Tutorial 105 | 106 | [Detect step by step](./tutorial/detect_step_by_step.ipynb). 107 | 108 | [face_alignment step by step](./tutorial/face_align.ipynb) 109 | -------------------------------------------------------------------------------- /doc/TRAIN.md: -------------------------------------------------------------------------------- 1 | # Train Your Own Model From Scratch 2 | 3 | ## Download the training data 4 | For classification and bounding box regression tasks, download [WiderFace](http://shuoyang1213.me/WIDERFACE/) 5 | 6 | For facial landmark regression task, download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) 7 | 8 | After download these two dataset, you will get file structure like this 9 | 10 | **WIDER_FACE** 11 | ``` 12 | ├── WIDER_FACE 13 | │ ├── wider_face_split 14 | │ ├── WIDER_test 15 | │ ├── WIDER_train 16 | │ └── WIDER_val 17 | ``` 18 | 19 | **CelebA** 20 | ``` 21 | ├── CelebA 22 | │ ├── Anno 23 | │ ├── Eval 24 | │ ├── img_align_celeba 25 | │ ├── img_celeba 26 | │ └── README.txt 27 | ``` 28 | Then, link these folders to "mtcnn/datasets". 29 | ``` 30 | ln -s /path/to/WIDER_FACE/* mtcnn/datasets/WIDER_FACE/ 31 | ln -s /path/to/CelebA/* mtcnn/datasets/CelebA/ 32 | ``` 33 | 34 | ## Train 35 | First, we generate training data for pnet. 36 | ```bash 37 | python scripts/gen_pnet_train.py 38 | ``` 39 | Train pnet with epoch 10, batchsize 256 and gpu:0. 40 | ```bash 41 | python scripts/train_pnet.py -e 10 -b 256 -o output/pnet.torchm -dv cuda:0 -r True 42 | ``` 43 | Generate training data for rnet 44 | ```bash 45 | python scripts/gen_pnet_train.py -m output/pnet.torchm 46 | ``` 47 | Train rnet 48 | ```bash 49 | python scripts/train_rnet.py -e 10 -b 256 -o output/rnet.torchm -dv cuda:0 50 | ``` 51 | Generate training data for onet 52 | ```bash 53 | python scripts/gen_onet_train.py -pm output/pnet.torchm -rm output/rnet.torchm 54 | ``` 55 | Train onet 56 | ```bash 57 | python scripts/train_onet.py -e 9 -b 256 -o output/onet.torchm -dv cuda:1 -r True 58 | ``` 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /mtcnn/__init__.py: -------------------------------------------------------------------------------- 1 | from mtcnn.deploy import get_net, get_net_caffe, get_default_detector 2 | from mtcnn.deploy.detect import FaceDetector 3 | from mtcnn.deploy.batch_detect import BatchImageDetector 4 | from mtcnn.deploy.tracker import FaceTracker 5 | from mtcnn.deploy.align import align_multi 6 | -------------------------------------------------------------------------------- /mtcnn/datasets/CelebA/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /mtcnn/datasets/FacePoint/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /mtcnn/datasets/WIDER_FACE/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /mtcnn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .wider_face import WiderFace 2 | from .celeba import CelebA 3 | 4 | import sys 5 | def get_by_name(name, *args, **kwargs): 6 | """Get Dataset instance by name 7 | 8 | Args: 9 | name (str): Name of a avaliable dataset class 10 | """ 11 | this_module = sys.modules[__name__] 12 | if hasattr(this_module, name): 13 | return getattr(this_module, name)(*args, **kwargs) 14 | 15 | else: 16 | raise AttributeError("No queue named %s." % name) -------------------------------------------------------------------------------- /mtcnn/datasets/celeba.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | 5 | import numpy as np 6 | 7 | here = os.path.dirname(__file__) 8 | 9 | 10 | class CelebA(object): 11 | 12 | def __init__(self, dataset_folder=os.path.join(here, 'CelebA')): 13 | """ 14 | Make sure the WIDER_FACE dataset saved in $SOURCE_ROOT/datasets/CelebA folder. 15 | """ 16 | self.dataset_folder = dataset_folder 17 | 18 | def _load_all(self): 19 | """Load metadata of CelebA dataset. 20 | 21 | Returns: 22 | list: Each item contains a dict with file_name, num_bb (Number of bounding box.), meta_data(x1, y1, w, h), landmarks(lefteye_x lefteye_y righteye_x righteye_y nose_x nose_y leftmouth_x leftmouth_y rightmouth_x rightmouth_y). 23 | """ 24 | ret = [] 25 | 26 | anno_folder = os.path.join(self.dataset_folder, 'Anno') 27 | image_folder = os.path.join(self.dataset_folder, 'img_celeba') 28 | box_anno = os.path.join(anno_folder, 'list_bbox_celeba.txt') 29 | landmarks_algn_anno = os.path.join( 30 | anno_folder, 'list_landmarks_celeba.txt') 31 | 32 | f_box_anno = open(box_anno) 33 | f_landmarks_anno = open(landmarks_algn_anno) 34 | 35 | for i, (f_box_line, f_landmarks_line) in enumerate(zip(f_box_anno, f_landmarks_anno)): 36 | if i < 2: # skip the top two lines in anno files 37 | continue 38 | image_name = f_box_line.strip().split(' ')[0] 39 | 40 | boxes = f_box_line.strip().split(' ')[1:] 41 | boxes = list(filter(lambda x: x != '', boxes)) 42 | boxes = np.array(boxes).astype(int) 43 | 44 | landmarks = f_landmarks_line.strip().split(' ')[1:] 45 | landmarks = list(filter(lambda x: x != '', landmarks)) 46 | landmarks = np.array(landmarks).astype(int) 47 | 48 | img_path = os.path.join(image_folder, image_name) 49 | 50 | item = { 51 | 'file_name': img_path, 52 | 'num_bb': 1, 53 | 'meta_data': [boxes], 54 | 'landmarks': [landmarks] 55 | } 56 | ret.append(item) 57 | 58 | f_box_anno.close() 59 | f_landmarks_anno.close() 60 | 61 | return ret 62 | 63 | def _split(self): 64 | """ 65 | Split all_data into train, dev, test parts. 66 | """ 67 | ret = self._load_all() 68 | partition_file = os.path.join( 69 | self.dataset_folder, 'Eval', 'list_eval_partition.txt') 70 | f_partition = open(partition_file) 71 | 72 | train = [] 73 | dev = [] 74 | test = [] 75 | 76 | for line, item in zip(f_partition, ret): 77 | dtype = int(line.split(' ')[1]) 78 | if dtype == 0: 79 | train.append(item) 80 | elif dtype == 1: 81 | dev.append(item) 82 | elif dtype == 2: 83 | test.append(item) 84 | 85 | return train, dev, test 86 | 87 | def get_train_meta(self): 88 | train, _, _ = self._split() 89 | return train 90 | 91 | def get_val_meta(self): 92 | _, dev, _ = self._split() 93 | return dev 94 | 95 | def get_test_meta(self): 96 | _, _, test = self._split() 97 | return test 98 | -------------------------------------------------------------------------------- /mtcnn/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | 5 | here = os.path.dirname(__file__) 6 | 7 | 8 | class WiderFace(object): 9 | 10 | def __init__(self, dataset_folder=os.path.join(here, 'WIDER_FACE')): 11 | """ 12 | Make sure the WIDER_FACE dataset saved in $SOURCE_ROOT/datasets/WIDER_FACE folder. 13 | """ 14 | self.dataset_folder = dataset_folder 15 | 16 | def _load_meta_data(self, meta_file, target_folder=''): 17 | """Load metadata of WIDER_FACE dataset. 18 | 19 | Args: 20 | meta_file (str): E.g. WIDER_FACE/wider_face_split 21 | target_folder (str): E.g. WIDER_FACE/WIDER_train/images 22 | 23 | Returns: 24 | list: Each item contains a dict with file_name, num_bb (Number of bounding box), meta_data(x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose). 25 | """ 26 | f = open(meta_file) 27 | 28 | ret = [] 29 | 30 | flag = 0 31 | num = 0 32 | current_num = 0 33 | current_dict = {} 34 | 35 | for line in f: 36 | if flag == 0: 37 | current_dict = {'file_name': os.path.join( 38 | target_folder, line.strip())} 39 | flag = 1 40 | 41 | elif flag == 1: 42 | current_dict['num_bb'] = int(line.strip()) 43 | num = int(line.strip()) 44 | current_dict['meta_data'] = list() 45 | flag = 2 46 | 47 | elif flag == 2: 48 | cur = [int(i) for i in line.strip().split(' ')] 49 | 50 | # Append the boxes whoes attribute 'invalid' is 'True'. 51 | if cur[7] == 0: 52 | current_dict['meta_data'].append(cur) 53 | else: 54 | current_dict['num_bb'] -= 1 55 | current_num += 1 56 | 57 | if current_num == num: 58 | ret.append(current_dict) 59 | flag = 0 60 | current_num = 0 61 | 62 | f.close() 63 | 64 | return ret 65 | 66 | def get_train_meta(self): 67 | meta_file = os.path.join(self.dataset_folder, "wider_face_split/wider_face_train_bbx_gt.txt") 68 | target_folder = os.path.join(self.dataset_folder, 'WIDER_train/images') 69 | 70 | return self._load_meta_data(meta_file, target_folder) 71 | 72 | def get_val_meta(self): 73 | meta_file = os.path.join(self.dataset_folder, "wider_face_split/wider_face_val_bbx_gt.txt") 74 | target_folder = os.path.join(self.dataset_folder, 'WIDER_val/images') 75 | 76 | return self._load_meta_data(meta_file, target_folder) 77 | 78 | 79 | def get_test_meta(self): 80 | """Use for load test meta_file without label. 81 | 82 | Returns: 83 | list: Each item is a file name 84 | """ 85 | meta_file = os.path.join(self.dataset_folder, 'wider_face_split/wider_face_test_filelist.txt') 86 | target_folder = os.path.join(self.dataset_folder, 'WIDER_test/images') 87 | f = open(meta_file) 88 | ret = list() 89 | 90 | for line in f: 91 | ret.append(os.path.join(target_folder, line.strip())) 92 | 93 | f.close() 94 | return ret 95 | -------------------------------------------------------------------------------- /mtcnn/deploy/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import mtcnn.network.mtcnn_pytorch as mtcnn_pytorch 5 | 6 | from .detect import FaceDetector 7 | 8 | here = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | def get_net(weight_folder=None): 11 | """ 12 | Create pnet, rnet, onet for detector. 13 | """ 14 | 15 | pnet = mtcnn_pytorch.PNet() 16 | rnet = mtcnn_pytorch.RNet() 17 | onet = mtcnn_pytorch.ONet() 18 | 19 | if weight_folder is not None: 20 | pnet.load(os.path.join(weight_folder, 'pnet')) 21 | rnet.load(os.path.join(weight_folder, 'rnet')) 22 | onet.load(os.path.join(weight_folder, 'onet')) 23 | 24 | return pnet, rnet, onet 25 | 26 | 27 | def get_net_caffe(weight_folder): 28 | """ 29 | Create pnet, rnet, onet for detector. And init weights with caffe model from original mtcnn repo. 30 | """ 31 | weight_folder = os.path.join(here, "models") 32 | pnet, rnet, onet = get_net() 33 | pnet.load_caffe_model( 34 | np.load(os.path.join(weight_folder, 'pnet.npy'), allow_pickle=True)[()]) 35 | rnet.load_caffe_model( 36 | np.load(os.path.join(weight_folder, 'rnet.npy'), allow_pickle=True)[()]) 37 | onet.load_caffe_model( 38 | np.load(os.path.join(weight_folder, 'onet.npy'), allow_pickle=True)[()]) 39 | 40 | return pnet, rnet, onet 41 | 42 | def get_default_detector(device=None): 43 | """ 44 | Get the default face detector with pnet, rnet, onet trained by original mtcnn author. 45 | """ 46 | pnet, rnet, onet = get_net_caffe(os.path.join(here, "models")) 47 | if device is None: 48 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 49 | detector = FaceDetector(pnet, rnet, onet, device) 50 | return detector 51 | -------------------------------------------------------------------------------- /mtcnn/deploy/align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from mtcnn.utils.align_trans import get_reference_facial_points, warp_and_crop_face 4 | 5 | refrence = get_reference_facial_points(default_square= True) 6 | 7 | def align_multi(img, boxes, landmarks, crop_size=(112, 112)): 8 | """Align muti-faces in a image 9 | 10 | Args: 11 | img (np.ndarray or torch.Tensor): Image matrix returned by cv2.imread() 12 | boxes (np.ndarray or torch.IntTensor): Bounding boxes with shape [n, 4] 13 | landmarks (np.ndarray or torch.IntTensor): Facial landmarks points with shape [n, 5, 2] 14 | """ 15 | 16 | if isinstance(boxes, torch.Tensor): 17 | boxes = boxes.cpu().numpy() 18 | 19 | if isinstance(landmarks, torch.Tensor): 20 | landmarks = landmarks.cpu().numpy() 21 | 22 | faces = [] 23 | for landmark in landmarks: 24 | warped_face = warp_and_crop_face(img, landmark, refrence, crop_size=crop_size) 25 | faces.append(warped_face) 26 | if len(faces) > 0: 27 | faces = np.stack(faces) 28 | return boxes, faces 29 | 30 | def filter_side_face(boxes, landmarks): 31 | """Mask all side face judged through facial landmark points. 32 | 33 | Args: 34 | boxes (torch.IntTensor): Bounding boxes with shape [n, 4] 35 | landmarks (or torch.IntTensor): Facial landmarks points with shape [n, 5, 2] 36 | 37 | Returns: 38 | torch.Tensor: Tensor mask. 39 | """ 40 | mid = (boxes[:, 2] + boxes[:, 0]).float() / 2 41 | mask = (landmarks[:, 0, 0].float() - mid) * (landmarks[:, 1, 0].float() - mid) <= 0 42 | 43 | return mask 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /mtcnn/deploy/batch_detect.py: -------------------------------------------------------------------------------- 1 | import math 2 | import cv2 3 | import torch 4 | import numpy as np 5 | import time 6 | 7 | import mtcnn.utils.functional as func 8 | 9 | 10 | def _no_grad(func): 11 | 12 | def wrapper(*args, **kwargs): 13 | with torch.no_grad(): 14 | ret = func(*args, **kwargs) 15 | return ret 16 | 17 | return wrapper 18 | 19 | 20 | class BatchImageDetector(object): 21 | 22 | def __init__(self, pnet, rnet, onet, device='cpu'): 23 | 24 | self.device = torch.device(device) 25 | 26 | self.pnet = pnet.to(self.device) 27 | self.rnet = rnet.to(self.device) 28 | self.onet = onet.to(self.device) 29 | 30 | self.onet.eval() # Onet has dropout layer. 31 | 32 | def _preprocess(self, imgs): 33 | 34 | # Convert image from rgb to bgr for Compatible with original caffe model. 35 | tmp = [] 36 | for i, img in enumerate(imgs): 37 | tmp.append(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) 38 | 39 | imgs = np.stack(tmp) 40 | imgs = imgs.transpose(0, 3, 1, 2) 41 | imgs = torch.FloatTensor(imgs).to(self.device) 42 | imgs = func.imnormalize(imgs) 43 | 44 | return imgs 45 | 46 | def detect(self, imgs, threshold=[0.6, 0.7, 0.9], factor=0.7, minsize=12, nms_threshold=[0.7, 0.7, 0.3]): 47 | 48 | imgs = self._preprocess(imgs) 49 | stage_one_boxes = self.stage_one( 50 | imgs, threshold[0], factor, minsize, nms_threshold[0]) 51 | stage_two_boxes = self.stage_two( 52 | imgs, stage_one_boxes, threshold[1], nms_threshold[1]) 53 | stage_three_boxes, landmarks = self.stage_three( 54 | imgs, stage_two_boxes, threshold[2], nms_threshold[2]) 55 | 56 | return stage_three_boxes, landmarks 57 | 58 | def _generate_bboxes(self, probs, offsets, scale, threshold): 59 | """Generate bounding boxes at places 60 | where there is probably a face. 61 | 62 | Arguments: 63 | probs: a FloatTensor of shape [n, 2, h, w]. 64 | offsets: a FloatTensor array of shape [n, 4, h, w]. 65 | scale: a float number, 66 | width and height of the image were scaled by this number. 67 | threshold: a float number. 68 | 69 | Returns: 70 | boxes: LongTensor with shape [x, 4]. 71 | score: FloatTensor with shape [x]. 72 | offses: FloatTensor with shape [x, 4] 73 | img_label: IntTensor with shape [x] 74 | """ 75 | # applying P-Net is equivalent, in some sense, to 76 | # moving 12x12 window with stride 2 77 | stride = 2 78 | cell_size = 12 79 | 80 | # extract positive probability and resize it as [n, m] dim tensor. 81 | probs = probs[:, 1, :, :] 82 | 83 | # indices of boxes where there is probably a face 84 | mask = probs > threshold 85 | inds = mask.nonzero() 86 | 87 | if inds.shape[0] == 0: 88 | return torch.empty(0, dtype=torch.int32, device=self.device), \ 89 | torch.empty(0, dtype=torch.float32, device=self.device), \ 90 | torch.empty(0, dtype=torch.float32, device=self.device), \ 91 | torch.empty(0, dtype=torch.int32, device=self.device) 92 | 93 | # transformations of bounding boxes 94 | tx1, ty1, tx2, ty2 = [offsets[inds[:, 0], i, inds[:, 1], inds[:, 2]] 95 | for i in range(4)] 96 | 97 | offsets = torch.stack([tx1, ty1, tx2, ty2], 1) 98 | score = probs[inds[:, 0], inds[:, 1], inds[:, 2]] 99 | 100 | # P-Net is applied to scaled images 101 | # so we need to rescale bounding boxes back 102 | bounding_boxes = torch.stack([ 103 | stride*inds[:, -1] + 1.0, 104 | stride*inds[:, -2] + 1.0, 105 | stride*inds[:, -1] + 1.0 + cell_size, 106 | (stride*inds[:, -2] + 1.0 + cell_size), 107 | ], 0).transpose(0, 1).float() 108 | 109 | bounding_boxes = torch.round(bounding_boxes / scale).int() 110 | return bounding_boxes, score, offsets, inds[:, 0].int() 111 | 112 | def _calibrate_box(self, bboxes, offsets): 113 | """Transform bounding boxes to be more like true bounding boxes. 114 | 'offsets' is one of the outputs of the nets. 115 | 116 | Arguments: 117 | bboxes: a IntTensor of shape [n, 4]. 118 | offsets: a IntTensor of shape [n, 4]. 119 | 120 | Returns: 121 | a IntTensor of shape [n, 4]. 122 | """ 123 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 124 | w = x2 - x1 + 1.0 125 | h = y2 - y1 + 1.0 126 | w = torch.unsqueeze(w, 1) 127 | h = torch.unsqueeze(h, 1) 128 | 129 | # this is what happening here: 130 | # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 131 | # x1_true = x1 + tx1*w 132 | # y1_true = y1 + ty1*h 133 | # x2_true = x2 + tx2*w 134 | # y2_true = y2 + ty2*h 135 | # below is just more compact form of this 136 | 137 | # are offsets always such that 138 | # x1 < x2 and y1 < y2 ? 139 | 140 | translation = torch.cat([w, h, w, h], 1).float() * offsets 141 | bboxes += torch.round(translation).int() 142 | return bboxes 143 | 144 | def _convert_to_square(self, bboxes): 145 | """Convert bounding boxes to a square form. 146 | 147 | Arguments: 148 | bboxes: a IntTensor of shape [n, 4]. 149 | 150 | Returns: 151 | a IntTensor of shape [n, 4], 152 | squared bounding boxes. 153 | """ 154 | 155 | square_bboxes = torch.zeros_like(bboxes, device=self.device, dtype=torch.float32) 156 | x1, y1, x2, y2 = [bboxes[:, i].float() for i in range(4)] 157 | h = y2 - y1 + 1.0 158 | w = x2 - x1 + 1.0 159 | max_side = torch.max(h, w) 160 | square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5 161 | square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5 162 | square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 163 | square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 164 | 165 | square_bboxes = torch.ceil(square_bboxes + 1).int() 166 | return square_bboxes 167 | 168 | def _refine_boxes(self, bboxes, w, h): 169 | 170 | bboxes = torch.max(torch.zeros_like(bboxes, device=self.device), bboxes) 171 | sizes = torch.IntTensor([[h, w, h, w]] * bboxes.shape[0]).to(self.device) 172 | bboxes = torch.min(bboxes, sizes) 173 | return bboxes 174 | 175 | def _calibrate_landmarks(self, bboxes, landmarks, align=False): 176 | """Compute the face landmarks coordinates 177 | 178 | Args: 179 | bboxes (torch.IntTensor): bounding boxes of shape [n, 4] 180 | landmarks (torch.floatTensor): landmarks regression output of shape [n, 10] 181 | align (bool, optional): Defaults to False. If "False", return the coordinates related to the origin image. Else, return the coordinates related to alinged faces. 182 | 183 | Returns: 184 | torch.IntTensor: face landmarks coordinates of shape [n, 10] 185 | """ 186 | 187 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 188 | w = x2 - x1 + 1.0 189 | h = y2 - y1 + 1.0 190 | w = torch.unsqueeze(w, 1) 191 | h = torch.unsqueeze(h, 1) 192 | 193 | translation = torch.cat([w]*5 + [h]* 5, 1).float() * landmarks 194 | if align: 195 | landmarks = torch.ceil(translation).int() 196 | else: 197 | landmarks = torch.stack([bboxes[:, 0]] * 5 + [bboxes[:, 1]] * 5, 1) + torch.round(translation).int() 198 | return landmarks 199 | 200 | @_no_grad 201 | def stage_one(self, imgs, threshold, factor, minsize, nms_threshold): 202 | """Stage one of mtcnn detection. 203 | 204 | Args: 205 | imgs (torch.FloatTensro): Output of "_preprocess" method. 206 | threshold (float): The minimum probability of reserve bounding boxes. 207 | factor (float): Image pyramid scaling ratio. 208 | minsize (int): The minimum size of reserve bounding boxes. 209 | nms_threshold (float): retain boxes that satisfy overlap <= thresh 210 | 211 | Returns: 212 | torch.IntTensor: All bounding boxes with image label output by stage one detection. [n, 5] 213 | """ 214 | 215 | width = imgs.shape[-2] 216 | height = imgs.shape[-1] 217 | num_img = imgs.shape[0] 218 | 219 | # Compute valid scales 220 | scales = [] 221 | cur_width = width 222 | cur_height = height 223 | cur_factor = 1 224 | while cur_width >= 12 and cur_height >= 12: 225 | if 12 / cur_factor >= minsize: # Ignore boxes that smaller than minsize 226 | 227 | w = cur_width 228 | h = cur_height 229 | scales.append((w, h, cur_factor)) 230 | 231 | cur_factor *= factor 232 | cur_width = math.ceil(cur_width * factor) 233 | cur_height = math.ceil(cur_height * factor) 234 | 235 | # Get candidate boxesi ph 236 | candidate_boxes = torch.empty(0, dtype=torch.int32, device=self.device) 237 | candidate_scores = torch.empty(0, device=self.device) 238 | candidate_offsets = torch.empty( 239 | 0, dtype=torch.float32, device=self.device) 240 | all_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) 241 | for w, h, f in scales: 242 | resize_img = torch.nn.functional.interpolate( 243 | imgs, size=(w, h), mode='bilinear') 244 | p_distribution, box_regs, _ = self.pnet(resize_img) 245 | 246 | candidate, scores, offsets, img_labels = self._generate_bboxes( 247 | p_distribution, box_regs, f, threshold) 248 | 249 | candidate_boxes = torch.cat([candidate_boxes, candidate]) 250 | candidate_scores = torch.cat([candidate_scores, scores]) 251 | candidate_offsets = torch.cat([candidate_offsets, offsets]) 252 | all_img_labels = torch.cat([all_img_labels, img_labels]) 253 | 254 | 255 | if candidate_boxes.shape[0] != 0: 256 | candidate_boxes = self._calibrate_box( 257 | candidate_boxes, candidate_offsets) 258 | candidate_boxes = self._convert_to_square(candidate_boxes) 259 | candidate_boxes = self._refine_boxes( 260 | candidate_boxes, width, height) 261 | 262 | final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) 263 | final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) 264 | for i in range(num_img): 265 | mask = all_img_labels == i 266 | keep = func.nms(candidate_boxes[mask].cpu().numpy(), 267 | candidate_scores[mask].cpu().numpy(), nms_threshold) 268 | final_boxes = torch.cat([final_boxes, candidate_boxes[mask][keep]]) 269 | final_img_labels = torch.cat([final_img_labels, all_img_labels[mask][keep]]) 270 | 271 | return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1) 272 | else: 273 | return candidate_boxes 274 | 275 | 276 | @_no_grad 277 | def stage_two(self, imgs, boxes, threshold, nms_threshold): 278 | 279 | # no candidate face found. 280 | if boxes.shape[0] == 0: 281 | return boxes 282 | 283 | width = imgs.shape[2] 284 | height = imgs.shape[3] 285 | lablels = boxes[:, -1] 286 | boxes = boxes[:, :4] 287 | 288 | num_img = imgs.shape[0] 289 | 290 | # get candidate faces 291 | candidate_faces = list() 292 | 293 | for box, label in zip(boxes, lablels): 294 | im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0) 295 | im = torch.nn.functional.interpolate( 296 | im, size=(24, 24), mode='bilinear') 297 | candidate_faces.append(im) 298 | 299 | candidate_faces = torch.cat(candidate_faces, 0) 300 | 301 | # rnet forward pass 302 | p_distribution, box_regs, _ = self.rnet(candidate_faces) 303 | 304 | # filter negative boxes 305 | scores = p_distribution[:, 1] 306 | mask = (scores >= threshold) 307 | boxes = boxes[mask] 308 | box_regs = box_regs[mask] 309 | scores = scores[mask] 310 | labels = lablels[mask] 311 | 312 | if boxes.shape[0] != 0: 313 | boxes = self._calibrate_box(boxes, box_regs) 314 | boxes = self._convert_to_square(boxes) 315 | boxes = self._refine_boxes(boxes, width, height) 316 | 317 | final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) 318 | final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) 319 | for i in range(num_img): 320 | mask = labels == i 321 | keep = func.nms(boxes[mask].cpu().numpy(), 322 | scores[mask].cpu().numpy(), nms_threshold) 323 | final_boxes = torch.cat([final_boxes, boxes[mask][keep]]) 324 | final_img_labels = torch.cat([final_img_labels, labels[mask][keep]]) 325 | 326 | return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1) 327 | 328 | else: 329 | 330 | return boxes 331 | 332 | 333 | @_no_grad 334 | def stage_three(self, imgs, boxes, threshold, nms_threshold): 335 | # no candidate face found. 336 | if boxes.shape[0] == 0: 337 | return boxes, torch.empty(0, device=self.device, dtype=torch.int32) 338 | 339 | width = imgs.shape[2] 340 | height = imgs.shape[3] 341 | 342 | labels = boxes[:, -1] 343 | boxes = boxes[:, :4] 344 | 345 | num_img = imgs.shape[0] 346 | 347 | # get candidate faces 348 | candidate_faces = list() 349 | 350 | for box, label in zip(boxes, labels): 351 | im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0) 352 | im = torch.nn.functional.interpolate( 353 | im, size=(48, 48), mode='bilinear') 354 | candidate_faces.append(im) 355 | 356 | candidate_faces = torch.cat(candidate_faces, 0) 357 | 358 | p_distribution, box_regs, landmarks = self.onet(candidate_faces) 359 | 360 | # filter negative boxes 361 | scores = p_distribution[:, 1] 362 | mask = (scores >= threshold) 363 | boxes = boxes[mask] 364 | box_regs = box_regs[mask] 365 | scores = scores[mask] 366 | landmarks = landmarks[mask] 367 | labels =labels[mask] 368 | 369 | if boxes.shape[0] != 0: 370 | 371 | # compute face landmark points 372 | landmarks = self._calibrate_landmarks(boxes, landmarks) 373 | landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2) 374 | 375 | boxes = self._calibrate_box(boxes, box_regs) 376 | boxes = self._refine_boxes(boxes, width, height) 377 | 378 | final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) 379 | final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) 380 | final_landmarks = torch.empty(0, dtype=torch.int32, device=self.device) 381 | for i in range(num_img): 382 | 383 | # nms 384 | mask = labels == i 385 | keep = func.nms(boxes[mask].cpu().numpy(), 386 | scores[mask].cpu().numpy(), nms_threshold) 387 | final_boxes = torch.cat([final_boxes, boxes[mask][keep]]) 388 | final_img_labels = torch.cat([final_img_labels, labels[mask][keep]]) 389 | 390 | # compute face landmark points 391 | landm = landmarks [mask][keep] 392 | final_landmarks = torch.cat([final_landmarks, landm]) 393 | 394 | return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1), final_landmarks 395 | 396 | else: 397 | return boxes, landmarks -------------------------------------------------------------------------------- /mtcnn/deploy/detect.py: -------------------------------------------------------------------------------- 1 | import math 2 | import cv2 3 | import torch 4 | import time 5 | 6 | import mtcnn.utils.functional as func 7 | 8 | def _no_grad(func): 9 | 10 | def wrapper(*args, **kwargs): 11 | with torch.no_grad(): 12 | ret = func(*args, **kwargs) 13 | return ret 14 | 15 | return wrapper 16 | 17 | 18 | class FaceDetector(object): 19 | 20 | def __init__(self, pnet, rnet, onet, device='cpu'): 21 | 22 | self.device = torch.device(device) 23 | 24 | self.pnet = pnet.to(self.device) 25 | self.rnet = rnet.to(self.device) 26 | self.onet = onet.to(self.device) 27 | 28 | self.onet.eval() # Onet has dropout layer. 29 | 30 | def to_script(self): 31 | if isinstance(self.pnet, torch.nn.Module): 32 | self.pnet.to_script() 33 | 34 | if isinstance(self.rnet, torch.nn.Module): 35 | self.rnet.to_script() 36 | 37 | if isinstance(self.onet, torch.nn.Module): 38 | self.onet.to_script() 39 | return self 40 | 41 | def _preprocess(self, img): 42 | 43 | if isinstance(img, str): 44 | img = cv2.imread(img) 45 | 46 | # Convert image from rgb to bgr for Compatible with original caffe model. 47 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 48 | img = img.transpose(2, 0, 1) 49 | img = torch.FloatTensor(img).to(self.device) 50 | img = func.imnormalize(img) 51 | img = img.unsqueeze(0) 52 | 53 | return img 54 | 55 | def detect(self, img, threshold=[0.6, 0.7, 0.85], factor=0.7, minsize=12, nms_threshold=[0.7, 0.7, 0.3]): 56 | 57 | img = self._preprocess(img) 58 | stage_one_boxes = self.stage_one(img, threshold[0], factor, minsize, nms_threshold[0]) 59 | stage_two_boxes = self.stage_two(img, stage_one_boxes, threshold[1], nms_threshold[1]) 60 | stage_three_boxes, landmarks = self.stage_three( 61 | img, stage_two_boxes, threshold[2], nms_threshold[2]) 62 | 63 | return stage_three_boxes, landmarks 64 | 65 | def _generate_bboxes(self, probs, offsets, scale, threshold): 66 | """Generate bounding boxes at places 67 | where there is probably a face. 68 | 69 | Arguments: 70 | probs: a FloatTensor of shape [1, 2, n, m]. 71 | offsets: a FloatTensor array of shape [1, 4, n, m]. 72 | scale: a float number, 73 | width and height of the image were scaled by this number. 74 | threshold: a float number. 75 | 76 | Returns: 77 | boxes: LongTensor with shape [x, 4]. 78 | score: FloatTensor with shape [x]. 79 | """ 80 | 81 | # applying P-Net is equivalent, in some sense, to 82 | # moving 12x12 window with stride 2 83 | stride = 2 84 | cell_size = 12 85 | 86 | # extract positive probability and resize it as [n, m] dim tensor. 87 | probs = probs[0, 1, :, :] 88 | 89 | # indices of boxes where there is probably a face 90 | inds = (probs > threshold).nonzero() 91 | 92 | if inds.shape[0] == 0: 93 | return torch.empty((0, 4), dtype=torch.int32, device=self.device), torch.empty(0, dtype=torch.float32, device=self.device), torch.empty((0, 4), dtype=torch.float32, device=self.device) 94 | 95 | # transformations of bounding boxes 96 | tx1, ty1, tx2, ty2 = [offsets[0, i, inds[:, 0], inds[:, 1]] 97 | for i in range(4)] 98 | # they are defined as: 99 | # w = x2 - x1 + 1 100 | # h = y2 - y1 + 1 101 | # x1_true = x1 + tx1*w 102 | # x2_true = x2 + tx2*w 103 | # y1_true = y1 + ty1*h 104 | # y2_true = y2 + ty2*h 105 | 106 | offsets = torch.stack([tx1, ty1, tx2, ty2], 1) 107 | score = probs[inds[:, 0], inds[:, 1]] 108 | 109 | # P-Net is applied to scaled images 110 | # so we need to rescale bounding boxes back 111 | bounding_boxes = torch.stack([ 112 | stride*inds[:, 1] + 1.0, 113 | stride*inds[:, 0] + 1.0, 114 | stride*inds[:, 1] + 1.0 + cell_size, 115 | (stride*inds[:, 0] + 1.0 + cell_size), 116 | ], 0).transpose(0, 1).float() 117 | 118 | bounding_boxes = torch.round(bounding_boxes / scale).int() 119 | return bounding_boxes, score, offsets 120 | 121 | def _calibrate_box(self, bboxes, offsets): 122 | """Transform bounding boxes to be more like true bounding boxes. 123 | 'offsets' is one of the outputs of the nets. 124 | 125 | Arguments: 126 | bboxes: a IntTensor of shape [n, 4]. 127 | offsets: a IntTensor of shape [n, 4]. 128 | 129 | Returns: 130 | a IntTensor of shape [n, 4]. 131 | """ 132 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 133 | w = x2 - x1 + 1.0 134 | h = y2 - y1 + 1.0 135 | w = torch.unsqueeze(w, 1) 136 | h = torch.unsqueeze(h, 1) 137 | 138 | # this is what happening here: 139 | # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 140 | # x1_true = x1 + tx1*w 141 | # y1_true = y1 + ty1*h 142 | # x2_true = x2 + tx2*w 143 | # y2_true = y2 + ty2*h 144 | # below is just more compact form of this 145 | 146 | # are offsets always such that 147 | # x1 < x2 and y1 < y2 ? 148 | 149 | translation = torch.cat([w, h, w, h], 1).float() * offsets 150 | bboxes += torch.round(translation).int() 151 | return bboxes 152 | 153 | def _convert_to_square(self, bboxes): 154 | """Convert bounding boxes to a square form. 155 | 156 | Arguments: 157 | bboxes: a IntTensor of shape [n, 4]. 158 | 159 | Returns: 160 | a IntTensor of shape [n, 4], 161 | squared bounding boxes. 162 | """ 163 | 164 | square_bboxes = torch.zeros_like(bboxes, device=self.device, dtype=torch.float32) 165 | x1, y1, x2, y2 = [bboxes[:, i].float() for i in range(4)] 166 | h = y2 - y1 + 1.0 167 | w = x2 - x1 + 1.0 168 | max_side = torch.max(h, w) 169 | square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5 170 | square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5 171 | square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 172 | square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 173 | 174 | square_bboxes = torch.ceil(square_bboxes + 1).int() 175 | return square_bboxes 176 | 177 | def _refine_boxes(self, bboxes, w, h): 178 | 179 | bboxes = torch.max(torch.zeros_like(bboxes, device=self.device), bboxes) 180 | sizes = torch.IntTensor([[h, w, h, w]] * bboxes.shape[0]).to(self.device) 181 | bboxes = torch.min(bboxes, sizes) 182 | return bboxes 183 | 184 | def _calibrate_landmarks(self, bboxes, landmarks, align=False): 185 | """Compute the face landmarks coordinates 186 | 187 | Args: 188 | bboxes (torch.IntTensor): bounding boxes of shape [n, 4] 189 | landmarks (torch.floatTensor): landmarks regression output of shape [n, 10] 190 | align (bool, optional): Defaults to False. If "False", return the coordinates related to the origin image. Else, return the coordinates related to alinged faces. 191 | 192 | Returns: 193 | torch.IntTensor: face landmarks coordinates of shape [n, 10] 194 | """ 195 | 196 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 197 | w = x2 - x1 + 1.0 198 | h = y2 - y1 + 1.0 199 | w = torch.unsqueeze(w, 1) 200 | h = torch.unsqueeze(h, 1) 201 | 202 | translation = torch.cat([w]*5 + [h]* 5, 1).float() * landmarks 203 | if align: 204 | landmarks = torch.ceil(translation).int() 205 | else: 206 | landmarks = torch.stack([bboxes[:, 0]] * 5 + [bboxes[:, 1]] * 5, 1) + torch.round(translation).int() 207 | return landmarks 208 | 209 | @_no_grad 210 | def stage_one(self, img, threshold, factor, minsize, nms_threshold): 211 | width = img.shape[2] 212 | height = img.shape[3] 213 | 214 | # Compute valid scales 215 | scales = [] 216 | cur_width = width 217 | cur_height = height 218 | cur_factor = 1 219 | while cur_width >= 12 and cur_height >= 12: 220 | if 12 / cur_factor >= minsize: # Ignore boxes that smaller than minsize 221 | w = cur_width 222 | h = cur_height 223 | scales.append((w, h, cur_factor)) 224 | 225 | cur_factor *= factor 226 | cur_width = math.ceil(cur_width * factor) 227 | cur_height = math.ceil(cur_height * factor) 228 | 229 | # Get candidate boxesi ph 230 | candidate_boxes = torch.empty((0, 4), dtype=torch.int32, device=self.device) 231 | candidate_scores = torch.empty((0), device=self.device) 232 | candidate_offsets = torch.empty((0, 4), dtype=torch.float32, device=self.device) 233 | for w, h, f in scales: 234 | resize_img = torch.nn.functional.interpolate( 235 | img, size=(w, h), mode='bilinear') 236 | p_distribution, box_regs, _ = self.pnet(resize_img) 237 | 238 | candidate, scores, offsets = self._generate_bboxes( 239 | p_distribution, box_regs, f, threshold) 240 | 241 | candidate_boxes = torch.cat([candidate_boxes, candidate]) 242 | candidate_scores = torch.cat([candidate_scores, scores]) 243 | candidate_offsets = torch.cat([candidate_offsets, offsets]) 244 | 245 | # nms 246 | if candidate_boxes.shape[0] != 0: 247 | candidate_boxes = self._calibrate_box(candidate_boxes, candidate_offsets) 248 | keep = func.nms(candidate_boxes.cpu().numpy(), candidate_scores.cpu().numpy(), nms_threshold, device=self.device) 249 | return candidate_boxes[keep] 250 | else: 251 | return candidate_boxes 252 | 253 | @_no_grad 254 | def stage_two(self, img, boxes, threshold, nms_threshold): 255 | 256 | # no candidate face found. 257 | if boxes.shape[0] == 0: 258 | return boxes 259 | 260 | width = img.shape[2] 261 | height = img.shape[3] 262 | 263 | boxes = self._convert_to_square(boxes) 264 | boxes = self._refine_boxes(boxes, width, height) 265 | 266 | # get candidate faces 267 | candidate_faces = list() 268 | 269 | for box in boxes: 270 | im = img[:, :, box[1]: box[3], box[0]: box[2]] 271 | im = torch.nn.functional.interpolate( 272 | im, size=(24, 24), mode='bilinear') 273 | candidate_faces.append(im) 274 | 275 | candidate_faces = torch.cat(candidate_faces, 0) 276 | 277 | # rnet forward pass 278 | p_distribution, box_regs, _ = self.rnet(candidate_faces) 279 | 280 | # filter negative boxes 281 | scores = p_distribution[:, 1] 282 | mask = (scores >= threshold) 283 | boxes = boxes[mask] 284 | box_regs = box_regs[mask] 285 | scores = scores[mask] 286 | 287 | if boxes.shape[0] > 0: 288 | boxes = self._calibrate_box(boxes, box_regs) 289 | # nms 290 | keep = func.nms(boxes.cpu().numpy(), scores.cpu().numpy(), nms_threshold, device=self.device) 291 | boxes = boxes[keep] 292 | return boxes 293 | 294 | @_no_grad 295 | def stage_three(self, img, boxes, threshold, nms_threshold): 296 | # no candidate face found. 297 | if boxes.shape[0] == 0: 298 | return boxes, torch.empty(0, device=self.device, dtype=torch.int32) 299 | 300 | width = img.shape[2] 301 | height = img.shape[3] 302 | 303 | boxes = self._convert_to_square(boxes) 304 | boxes = self._refine_boxes(boxes, width, height) 305 | 306 | # get candidate faces 307 | candidate_faces = list() 308 | 309 | for box in boxes: 310 | im = img[:, :, box[1]: box[3], box[0]: box[2]] 311 | im = torch.nn.functional.interpolate( 312 | im, size=(48, 48), mode='bilinear') 313 | candidate_faces.append(im) 314 | 315 | candidate_faces = torch.cat(candidate_faces, 0) 316 | 317 | p_distribution, box_regs, landmarks = self.onet(candidate_faces) 318 | 319 | # filter negative boxes 320 | scores = p_distribution[:, 1] 321 | mask = (scores >= threshold) 322 | boxes = boxes[mask] 323 | box_regs = box_regs[mask] 324 | scores = scores[mask] 325 | landmarks = landmarks[mask] 326 | 327 | if boxes.shape[0] > 0: 328 | 329 | # compute face landmark points 330 | landmarks = self._calibrate_landmarks(boxes, landmarks) 331 | landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2) 332 | boxes = self._calibrate_box(boxes, box_regs) 333 | boxes = self._refine_boxes(boxes, width, height) 334 | 335 | # nms 336 | keep = func.nms(boxes.cpu().numpy(), scores.cpu().numpy(), nms_threshold, device=self.device) 337 | boxes = boxes[keep] 338 | landmarks = landmarks[keep] 339 | 340 | return boxes, landmarks 341 | -------------------------------------------------------------------------------- /mtcnn/deploy/models/onet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/deploy/models/onet.npy -------------------------------------------------------------------------------- /mtcnn/deploy/models/pnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/deploy/models/pnet.npy -------------------------------------------------------------------------------- /mtcnn/deploy/models/rnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/deploy/models/rnet.npy -------------------------------------------------------------------------------- /mtcnn/deploy/tracker.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import uuid 3 | import numpy as np 4 | 5 | import mtcnn.deploy.detect as detect 6 | import mtcnn.utils.functional as func 7 | 8 | from collections import defaultdict 9 | from easydict import EasyDict 10 | 11 | class FaceTracker(object): 12 | 13 | def __init__(self, detector, re_detect_every=10, min_interval=2, iou_thres=0.3): 14 | """Set hyper parameters for FaceTracker 15 | 16 | Keyword Arguments: 17 | detector {mtcnn.deploy.detect.FaceDetector} -- FaceDetector object. 18 | re_detect_every {int} -- How often does this tracker do full mtcnn detection.(default: {10}) 19 | min_interval {int} -- If we can't detect any face in some areas, we drop these boexs out. (default: {2}) 20 | iou_thres (float) -- Iou < iou_thres is regard as the same person. 21 | """ 22 | 23 | self.detector = detector 24 | self.re_detect_every = re_detect_every 25 | self.min_interval = min_interval 26 | self.iou_thres = iou_thres 27 | 28 | self.reset() 29 | self.image_cache = defaultdict(list) 30 | 31 | # Set params for detector. This can be modefied by "set_detect_params" 32 | self.default_detect_params = EasyDict( 33 | threshold=[0.6, 0.7, 0.9], 34 | factor=0.7, 35 | minsize=12, 36 | nms_threshold=[0.7, 0.7, 0.3] 37 | ) 38 | 39 | def set_detect_params(self, **kwargs): 40 | self.default_detect_params.update(kwargs) 41 | 42 | 43 | def track(self, frame): 44 | if self.cur_count % self.re_detect_every == 0 or len(self.boxes_cache) == 0: 45 | boxes, landmarks = self.detector.detect(frame, **self.default_detect_params) 46 | 47 | update_cache = {} 48 | if boxes.shape[0] != 0: 49 | 50 | for i, b in enumerate(self.boxes_cache): 51 | b = torch.tensor(b, dtype=torch.int32, device=self.detector.device) 52 | ovr = func.iou_torch(b, boxes) 53 | max_ovr = ovr.max() 54 | max_index = ovr.argmax() 55 | if max_ovr >= self.iou_thres: 56 | update_cache[max_index] = self.label_cache[i] 57 | 58 | self.reset() 59 | for b in boxes: 60 | self.label_cache.append(uuid.uuid1()) 61 | self.interval_cache.append(0) 62 | self.boxes_cache.append(b.cpu().numpy().tolist()) 63 | 64 | for k, v in update_cache.items(): 65 | self.label_cache[k] = v 66 | 67 | for b, label in zip(self.boxes_cache, self.label_cache): 68 | self.image_cache[label].append(frame[b[1]: b[3], b[0]: b[2]]) 69 | 70 | self.cur_count += 1 71 | 72 | else: 73 | torch_img = self.detector._preprocess(frame) 74 | boxes_cache = torch.tensor(self.boxes_cache, dtype=torch.int32, device=self.detector.device) 75 | boxes, landmarks = self.detector.stage_three(torch_img, boxes_cache, self.default_detect_params.threshold[2], self.default_detect_params.nms_threshold[2]) 76 | update_cache = {} 77 | for b in boxes: 78 | ovr = func.iou_torch(b, boxes_cache) 79 | max_index = int(ovr.argmax()) 80 | update_cache[max_index] = b.cpu().numpy().tolist() 81 | 82 | revome_list = [] 83 | for i, b in enumerate(self.boxes_cache): 84 | if i in update_cache: 85 | self.boxes_cache[i] = update_cache[i] 86 | b = update_cache[i] 87 | self.interval_cache[i] = 0 88 | self.image_cache[self.label_cache[i]].append(frame[b[1]: b[3], b[0]: b[2]]) 89 | else: 90 | if self.interval_cache[i] <= self.min_interval: 91 | self.interval_cache[i] += 1 92 | else: 93 | revome_list.append(i) 94 | 95 | self.label_cache = [value for i, value in enumerate(self.label_cache) if i not in revome_list] 96 | self.interval_cache = [value for i, value in enumerate(self.interval_cache) if i not in revome_list] 97 | self.boxes_cache = [value for i, value in enumerate(self.boxes_cache) if i not in revome_list] 98 | 99 | self.cur_count += 1 100 | 101 | return boxes, landmarks 102 | 103 | 104 | def reset(self): 105 | self.cur_count = 0 106 | self.boxes_cache = [] 107 | self.label_cache = [] 108 | self.interval_cache = [] 109 | 110 | def get_cache(self): 111 | """ 112 | Get the images in image_cache and clear the images in cache. 113 | """ 114 | tmp = self.image_cache 115 | self.image_cache = defaultdict(list) 116 | return tmp 117 | 118 | 119 | -------------------------------------------------------------------------------- /mtcnn/network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/network/__init__.py -------------------------------------------------------------------------------- /mtcnn/network/mtcnn_pytorch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from collections import OrderedDict 5 | 6 | 7 | def weights_init(m): 8 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 9 | nn.init.xavier_uniform(m.weight.data) 10 | nn.init.constant(m.bias, 0.1) 11 | 12 | 13 | class Flatten(nn.Module): 14 | 15 | def __init__(self): 16 | super(Flatten, self).__init__() 17 | 18 | def forward(self, x): 19 | """ 20 | Arguments: 21 | x: a float tensor with shape [batch_size, c, h, w]. 22 | Returns: 23 | a float tensor with shape [batch_size, c*h*w]. 24 | """ 25 | 26 | # without this pretrained model isn't working 27 | x = x.transpose(3, 2).contiguous() 28 | 29 | return x.view(x.size(0), -1) 30 | 31 | 32 | class _Net(nn.Module): 33 | def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1, is_train=False, device='cpu'): 34 | super(_Net, self).__init__() 35 | 36 | self.is_train = is_train 37 | self.device = torch.device(device) 38 | 39 | self._init_net() 40 | 41 | if is_train: 42 | # loss function 43 | self.cls_factor = cls_factor 44 | self.box_factor = box_factor 45 | self.land_factor = landmark_factor 46 | self.loss_cls = nn.NLLLoss(reduction='none') 47 | self.loss_box = nn.MSELoss() 48 | self.loss_landmark = nn.MSELoss() 49 | 50 | # weight initiation with xavier 51 | self.apply(weights_init) 52 | 53 | # Move tensor to target device 54 | self.to(self.device) 55 | 56 | if not self.is_train: 57 | self.eval() 58 | 59 | def get_loss(self, x, gt_label, gt_boxes, gt_landmarks): 60 | """ 61 | Get total loss. 62 | Arguments: 63 | x {Tensor} -- Input normalized images. (Note here: rnet, onet only support fix size images.) 64 | gt_label {Tensor} -- Ground truth label. 65 | gt_boxes {Tensor} -- Ground truth boxes coordinate. 66 | 67 | Returns: 68 | Tensor -- classification loss + box regression loss + landmark loss 69 | """ 70 | if not self.is_train: 71 | raise AssertionError( 72 | "Method 'get_loss' is avaliable only when 'is_train' is True.") 73 | 74 | # Forward pass 75 | pred_label, pred_offset, pred_landmarks = self.forward(x) 76 | 77 | # Reshape the tensor 78 | pred_label = pred_label.view(-1, 2) 79 | pred_offset = pred_offset.view(-1, 4) 80 | pred_landmarks = pred_landmarks.view(-1, 10) 81 | 82 | # Compute the loss 83 | cls_loss = self.cls_loss(gt_label, pred_label) 84 | box_loss = self.box_loss(gt_label, gt_boxes, pred_offset) 85 | landmark_loss = self.landmark_loss( 86 | gt_label, gt_landmarks, pred_landmarks) 87 | 88 | return cls_loss + box_loss + landmark_loss 89 | 90 | def _init_net(self): 91 | raise NotImplementedError 92 | 93 | def cls_loss(self, gt_label, pred_label): 94 | """Classification loss 95 | 96 | Args: 97 | gt_label (Tensor): Pobability distribution with shape (batch_size, 2) 98 | pred_label (Tensor): Ground truth lables with shape (batch_size) 99 | 100 | Returns: 101 | Tensor: Cross-Entropy loss multiply by cls_factor 102 | """ 103 | 104 | pred_label = torch.squeeze(pred_label) 105 | gt_label = torch.squeeze(gt_label) 106 | 107 | # Online hard sample mining 108 | 109 | mask = torch.eq(gt_label, 0) | torch.eq(gt_label, 1) 110 | valid_gt_label = torch.masked_select(gt_label, mask) 111 | mask = torch.stack([mask] * 2, dim=1) 112 | valid_pred_label = torch.masked_select(pred_label, mask).reshape(-1, 2) 113 | 114 | # compute log-softmax 115 | valid_pred_label = torch.log(valid_pred_label) 116 | 117 | loss = self.loss_cls(valid_pred_label, valid_gt_label) 118 | 119 | pos_mask = torch.eq(valid_gt_label, 1) 120 | neg_mask = torch.eq(valid_gt_label, 0) 121 | 122 | neg_loss = loss.masked_select(neg_mask) 123 | pos_loss = loss.masked_select(pos_mask) 124 | 125 | if neg_loss.shape[0] > pos_loss.shape[0]: 126 | neg_loss, _ = neg_loss.topk(pos_loss.shape[0]) 127 | loss = torch.cat([pos_loss, neg_loss]) 128 | loss = torch.mean(loss) 129 | 130 | return loss * self.cls_factor 131 | 132 | def box_loss(self, gt_label, gt_offset, pred_offset): 133 | pred_offset = torch.squeeze(pred_offset) 134 | gt_offset = torch.squeeze(gt_offset) 135 | gt_label = torch.squeeze(gt_label) 136 | 137 | mask = torch.eq(gt_label, 1) | torch.eq(gt_label, 2) 138 | # broadcast mask 139 | mask = torch.stack([mask] * 4, dim=1) 140 | 141 | # only valid element can effect the loss 142 | valid_gt_offset = torch.masked_select(gt_offset, mask).reshape(-1, 4) 143 | valid_pred_offset = torch.masked_select( 144 | pred_offset, mask).reshape(-1, 4) 145 | return self.loss_box(valid_pred_offset, valid_gt_offset)*self.box_factor 146 | 147 | def landmark_loss(self, gt_label, gt_landmark, pred_landmark): 148 | pred_landmark = torch.squeeze(pred_landmark) 149 | gt_landmark = torch.squeeze(gt_landmark) 150 | gt_label = torch.squeeze(gt_label) 151 | mask = torch.eq(gt_label, 3) 152 | # broadcast mask 153 | mask = torch.stack([mask] * 10, dim=1) 154 | 155 | valid_gt_landmark = torch.masked_select( 156 | gt_landmark, mask).reshape(-1, 10) 157 | valid_pred_landmark = torch.masked_select( 158 | pred_landmark, mask).reshape(-1, 10) 159 | return self.loss_landmark(valid_pred_landmark, valid_gt_landmark)*self.land_factor 160 | 161 | def load_caffe_model(self, weights): 162 | if self.is_train: 163 | raise AssertionError("This method is avaliable only when 'is_train' is false.") 164 | for n, p in self.named_parameters(): 165 | p.data = torch.FloatTensor(weights[n], device="cpu") 166 | 167 | def load(self, model_file): 168 | state_dict = torch.load(model_file, map_location=self.device) 169 | self.load_state_dict(state_dict, strict=False) 170 | 171 | def to_script(self): 172 | raise NotImplementedError 173 | 174 | class PNet(_Net): 175 | 176 | def __init__(self, **kwargs): 177 | # Hyper-parameter from original papaer 178 | param = [1, 0.5, 0.5] 179 | super(PNet, self).__init__(*param, **kwargs) 180 | 181 | def _init_net(self): 182 | 183 | # backend 184 | self.body = nn.Sequential(OrderedDict([ 185 | ('conv1', nn.Conv2d(3, 10, kernel_size=3, stride=1)), 186 | ('prelu1', nn.PReLU(10)), 187 | ('pool1', nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)), 188 | ('conv2', nn.Conv2d(10, 16, 3, 1)), 189 | ('prelu2', nn.PReLU(16)), 190 | ('conv3', nn.Conv2d(16, 32, kernel_size=3, stride=1)), 191 | ('prelu3', nn.PReLU(32)) 192 | ])) 193 | 194 | # detection 195 | self.cls = nn.Sequential(OrderedDict([ 196 | ('conv4-1', nn.Conv2d(32, 2, kernel_size=1, stride=1)), 197 | ('softmax', nn.Softmax(1)) 198 | ])) 199 | # bounding box regresion 200 | self.box_offset = nn.Sequential(OrderedDict([ 201 | ('conv4-2', nn.Conv2d(32, 4, kernel_size=1, stride=1)), 202 | ])) 203 | 204 | if self.is_train: 205 | # landmark regression 206 | self.landmarks = nn.Sequential(OrderedDict([ 207 | ('conv4-2', nn.Conv2d(32, 10, kernel_size=1, stride=1)) 208 | ])) 209 | 210 | def forward(self, x): 211 | feature_map = self.body(x) 212 | label = self.cls(feature_map) 213 | offset = self.box_offset(feature_map) 214 | landmarks = self.landmarks(feature_map) if self.is_train else torch.empty(0, device=self.device) 215 | 216 | return label, offset, landmarks 217 | 218 | def to_script(self): 219 | data = torch.randn((100, 3, 12, 12), device=self.device) 220 | script_module = torch.jit.trace(self, data) 221 | return script_module 222 | 223 | 224 | class RNet(_Net): 225 | 226 | def __init__(self, **kwargs): 227 | # Hyper-parameter from original papaer 228 | param = [1, 0.5, 0.5] 229 | super(RNet, self).__init__(*param, **kwargs) 230 | 231 | def _init_net(self): 232 | 233 | self.body = nn.Sequential(OrderedDict([ 234 | ('conv1', nn.Conv2d(3, 28, kernel_size=3, stride=1)), 235 | ('prelu1', nn.PReLU(28)), 236 | ('pool1', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), 237 | 238 | ('conv2', nn.Conv2d(28, 48, kernel_size=3, stride=1)), 239 | ('prelu2', nn.PReLU(48)), 240 | ('pool2', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), 241 | 242 | ('conv3', nn.Conv2d(48, 64, kernel_size=2, stride=1)), 243 | ('prelu3', nn.PReLU(64)), 244 | 245 | ('flatten', Flatten()), 246 | ('conv4', nn.Linear(576, 128)), 247 | ('prelu4', nn.PReLU(128)) 248 | ])) 249 | 250 | # detection 251 | self.cls = nn.Sequential(OrderedDict([ 252 | ('conv5-1', nn.Linear(128, 2)), 253 | ('softmax', nn.Softmax(1)) 254 | ])) 255 | # bounding box regression 256 | self.box_offset = nn.Sequential(OrderedDict([ 257 | ('conv5-2', nn.Linear(128, 4)) 258 | ])) 259 | 260 | if self.is_train: 261 | # lanbmark localization 262 | self.landmarks = nn.Sequential(OrderedDict([ 263 | ('conv5-3', nn.Linear(128, 10)) 264 | ])) 265 | 266 | def forward(self, x): 267 | # backend 268 | x = self.body(x) 269 | 270 | # detection 271 | det = self.cls(x) 272 | box = self.box_offset(x) 273 | landmarks = self.landmarks(x) if self.is_train else torch.empty(0, device=self.device) 274 | 275 | return det, box, landmarks 276 | 277 | def to_script(self): 278 | data = torch.randn((100, 3, 24, 24), device=self.device) 279 | script_module = torch.jit.trace(self, data) 280 | return script_module 281 | 282 | 283 | class ONet(_Net): 284 | 285 | def __init__(self, **kwargs): 286 | # Hyper-parameter from original papaer 287 | param = [1, 5, 50] 288 | super(ONet, self).__init__(*param, **kwargs) 289 | 290 | def _init_net(self): 291 | # backend 292 | 293 | self.body = nn.Sequential(OrderedDict([ 294 | ('conv1', nn.Conv2d(3, 32, kernel_size=3, stride=1)), 295 | ('prelu1', nn.PReLU(32)), 296 | ('pool1', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), 297 | 298 | ('conv2', nn.Conv2d(32, 64, kernel_size=3, stride=1)), 299 | ('prelu2', nn.PReLU(64)), 300 | ('pool2', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), 301 | 302 | ('conv3', nn.Conv2d(64, 64, kernel_size=3, stride=1)), 303 | ('prelu3', nn.PReLU(64)), 304 | ('pool3', nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)), 305 | 306 | ('conv4', nn.Conv2d(64, 128, kernel_size=2, stride=1)), 307 | ('prelu4', nn.PReLU(128)), 308 | 309 | ('flatten', Flatten()), 310 | ('conv5', nn.Linear(1152, 256)), 311 | ('drop5', nn.Dropout(0.25)), 312 | ('prelu5', nn.PReLU(256)), 313 | ])) 314 | 315 | # detection 316 | self.cls = nn.Sequential(OrderedDict([ 317 | ('conv6-1', nn.Linear(256, 2)), 318 | ('softmax', nn.Softmax(1)) 319 | ])) 320 | # bounding box regression 321 | self.box_offset = nn.Sequential(OrderedDict([ 322 | ('conv6-2', nn.Linear(256, 4)) 323 | ])) 324 | # lanbmark localization 325 | self.landmarks = nn.Sequential(OrderedDict([ 326 | ('conv6-3', nn.Linear(256, 10)) 327 | ])) 328 | 329 | def forward(self, x): 330 | # backend 331 | x = self.body(x) 332 | 333 | # detection 334 | det = self.cls(x) 335 | 336 | # box regression 337 | box = self.box_offset(x) 338 | 339 | # landmarks regresion 340 | landmarks = self.landmarks(x) 341 | 342 | return det, box, landmarks 343 | 344 | def to_script(self): 345 | data = torch.randn((100, 3, 48, 48), device=self.device) 346 | script_module = torch.jit.trace(self, data) 347 | return script_module 348 | -------------------------------------------------------------------------------- /mtcnn/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/train/__init__.py -------------------------------------------------------------------------------- /mtcnn/train/data.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import random 4 | import pandas as pd 5 | import torch 6 | import numpy as np 7 | import mtcnn.train.gen_landmark as landm 8 | import mtcnn.train.gen_pnet_train as pnet 9 | import mtcnn.train.gen_rnet_train as rnet 10 | import mtcnn.utils.functional as func 11 | 12 | from torch.utils.data import Dataset, DataLoader 13 | 14 | class ClsBoxData(object): 15 | 16 | """ 17 | Define a custom data structure training classification task and bounding box regression task. 18 | """ 19 | 20 | def __init__(self, pos, part, neg, pos_reg, part_reg): 21 | 22 | self.pos = pos 23 | self.part = part 24 | self.neg = neg 25 | self.pos_reg = pos_reg 26 | self.part_reg = part_reg 27 | 28 | def get_training_data(output_folder, suffix): 29 | """Get training data for classification and bounding box regression tasks 30 | 31 | Arguments: 32 | output_folder {str} -- Consistent with parameter 'output_folder' passed in method "generate_training_data_for...". 33 | suffix {str} -- Create a folder called $suffix in $output_folder. 34 | Returns: 35 | {PnetData} -- 'PnetData' object. 36 | """ 37 | 38 | positive_dest = os.path.join(output_folder, suffix, 'positive') 39 | negative_dest = os.path.join(output_folder, suffix, 'negative') 40 | part_dest = os.path.join(output_folder, suffix, 'part') 41 | 42 | positive_meta_file = os.path.join(output_folder, suffix, 'positive_meta.csv') 43 | part_meta_file = os.path.join(output_folder, suffix, 'part_meta.csv') 44 | negative_meta_file = os.path.join(output_folder, suffix, 'negative_meta.csv') 45 | 46 | # load from disk to menmory 47 | positive_meta = pd.read_csv(positive_meta_file) 48 | pos = [os.path.join(part_dest, i) for i in positive_meta.iloc[:, 0]] 49 | pos_reg = np.array(positive_meta.iloc[:, 1:]) 50 | 51 | part_meta = pd.read_csv(part_meta_file) 52 | part = [os.path.join(part_dest, i) for i in part_meta.iloc[:, 0]] 53 | part_reg = np.array(part_meta.iloc[:, 1:]) 54 | 55 | negative_meta = pd.read_csv(negative_meta_file) 56 | neg = [os.path.join(negative_dest, i) for i in negative_meta.iloc[:, 0]] 57 | 58 | return ClsBoxData(pos, part, neg, pos_reg, part_reg) 59 | 60 | 61 | class LandmarkData(object): 62 | """ 63 | Custom data structure for storing facial landmark points training data. 64 | """ 65 | def __init__(self, images, landmarks): 66 | self.images = images 67 | self.landmarks = landmarks 68 | 69 | def __len__(self): 70 | return len(self.images) 71 | 72 | 73 | def get_landmark_data(output_folder, suffix=''): 74 | 75 | image_file_folder = os.path.join(output_folder, suffix, 'landmarks') 76 | meta_file = os.path.join(output_folder, suffix, 'landmarks_meta.csv') 77 | 78 | meta = pd.read_csv(meta_file) 79 | images = [os.path.join(image_file_folder, i) for i in meta.iloc[:, 0]] 80 | landmarks = np.array(meta.iloc[:, 1:]).astype(float) 81 | 82 | return LandmarkData(images, landmarks) 83 | 84 | class ToTensor(object): 85 | 86 | def __call__(self, sample): 87 | sample[0] = cv2.imread(sample[0]) 88 | # Convert image from rgb to bgr for Compatible with original caffe model. 89 | sample[0] = cv2.cvtColor(sample[0], cv2.COLOR_RGB2BGR) 90 | sample[0] = sample[0].transpose((2, 0, 1)) 91 | sample[0] = func.imnormalize(sample[0]) 92 | sample[0] = torch.tensor(sample[0], dtype=torch.float) 93 | 94 | sample[1] = torch.tensor(sample[1], dtype=torch.float) 95 | 96 | return sample 97 | 98 | class ImageMetaDataset(Dataset): 99 | 100 | def __init__(self, image, meta=None, max_len=-1): 101 | if max_len >0 and max_len < len(image): 102 | if meta is None: 103 | image = random.sample(image, max_len) 104 | else: 105 | image, meta = zip(*random.sample(list(zip(image, meta)), max_len)) 106 | 107 | self.image = image 108 | self.meta = meta 109 | self.transform = ToTensor() 110 | 111 | def __len__(self): 112 | return len(self.image) 113 | 114 | def __getitem__(self, index): 115 | 116 | if self.meta is None: 117 | # negative data has no bounding box regression labels. 118 | sample = [self.image[index], np.zeros((4,))] 119 | 120 | else: 121 | sample = [self.image[index], self.meta[index]] 122 | 123 | return self.transform(sample) 124 | 125 | class MtcnnDataset(object): 126 | """ 127 | Dataset for training MTCNN. 128 | """ 129 | 130 | def __init__(self, output_folder, net_stage, batch_size, suffix): 131 | """ 132 | Put things together. The structure of 'output_folder' looks like this: 133 | 134 | output_folder/ 135 | ├── landmarks (generate by 'gen_landmark_data' method.) 136 | │ ├── 1.jpg 137 | │ └── 2.jpg 138 | ├── negative (neg, part, pos generate by 'generate_training_data_for_pnet' method) 139 | │ ├── 1.jpg 140 | │ └── 2.jpg 141 | ├── part 142 | │ ├── 1.jpg 143 | │ └── 2.jpg 144 | ├── positive 145 | ├── ├── 1.jpg 146 | ├── └── 2.jpg 147 | ├── pnet_negative_meta.csv 148 | ├── pnet_part_meta.csv 149 | └── pnet_positive_meta.csv 150 | 151 | net_stage is one of 'pnet', 'rnet' and 'onet' 152 | """ 153 | 154 | self.batch_size = batch_size 155 | # get classification and regression tasks data 156 | if net_stage == 'pnet': 157 | # get landmarks data 158 | self.landmark_data = get_landmark_data( 159 | output_folder, suffix=suffix) 160 | self.data = get_training_data(output_folder, suffix=suffix) 161 | elif net_stage == 'rnet': 162 | self.landmark_data = get_landmark_data(output_folder, suffix=suffix) 163 | self.data = get_training_data(output_folder, suffix=suffix) 164 | elif net_stage == 'onet': 165 | self.landmark_data = get_landmark_data(output_folder, suffix=suffix) 166 | self.data = get_training_data(output_folder, suffix=suffix) 167 | else: 168 | raise AttributeError( 169 | "Parameter 'net_stage' must be one of 'pnet', 'rnet' and 'onet' instead of %s." % net_stage) 170 | 171 | # Ensure the ratio of four kinds of data (pos, part, landmark, neg) is 1:1:1:3. (Follow the original paper) 172 | min_len = int(min([len(self.data.pos), len(self.data.part), len(self.landmark_data.landmarks), len(self.data.neg) / 3])) 173 | 174 | self.pos = ImageMetaDataset(self.data.pos, self.data.pos_reg, max_len=min_len) 175 | self.part = ImageMetaDataset(self.data.part, self.data.part_reg, max_len=min_len) 176 | self.neg = ImageMetaDataset(self.data.neg, max_len=min_len * 3) 177 | self.landm = ImageMetaDataset(self.landmark_data.images, self.landmark_data.landmarks, max_len=min_len) 178 | 179 | pos_len = len(self.pos) 180 | part_len = len(self.part) 181 | neg_len = len(self.neg) 182 | landm_len = len(self.landm) 183 | 184 | total_len = pos_len + part_len + neg_len + landm_len 185 | 186 | self.pos_batch = int(batch_size * (pos_len / total_len)) 187 | self.part_batch = int(batch_size * (part_len / total_len)) 188 | self.neg_batch = int(batch_size * (neg_len / total_len)) 189 | self.landm_batch = int(batch_size * (landm_len / total_len)) 190 | 191 | def get_iter(self): 192 | pos_loader = DataLoader(self.pos, self.pos_batch, shuffle=True) 193 | part_loader = DataLoader(self.part, self.part_batch, shuffle=True) 194 | neg_loader = DataLoader(self.neg, self.neg_batch, shuffle=True) 195 | landm_loader = DataLoader(self.landm, self.landm_batch, shuffle=True) 196 | 197 | transform = ToTensor() 198 | 199 | def generator(): 200 | 201 | for i in zip(pos_loader, part_loader, neg_loader, landm_loader): 202 | yield i 203 | 204 | total_batch = min([len(pos_loader), len(part_loader), len(neg_loader), len(landm_loader)]) 205 | 206 | return generator(), total_batch 207 | -------------------------------------------------------------------------------- /mtcnn/train/gen_landmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | 5 | import cv2 6 | import progressbar 7 | import numpy as np 8 | import pandas as pd 9 | import numpy.random as npr 10 | 11 | from mtcnn.utils.functional import IoU 12 | 13 | 14 | def gen_landmark_data(meta, size, output_folder, argument=False, suffix=''): 15 | """For training MTCNN, generate data for facial landmark localization task. 16 | The Generated file will be saved in "output_folder" 17 | 18 | Args: 19 | meta (list): Each item contains a dict with file_name, num_bb (Number of bounding box), meta_data (x1, y1, w, h), landmarks (lefteye_x lefteye_y righteye_x righteye_y nose_x nose_y leftmouth_x leftmouth_y rightmouth_x rightmouth_y). 20 | size (int): The size of the saved image. 21 | output_folder (str): Directory to save the result. 22 | argument (bool, optional): Defaults to False. Apply augmentation or not. 23 | """ 24 | total_num = 0 25 | 26 | image_output_folder = os.path.join(output_folder, suffix, 'landmarks') 27 | if os.path.exists(image_output_folder): 28 | shutil.rmtree(image_output_folder) 29 | 30 | os.makedirs(image_output_folder) 31 | landmark_meta_file = open(os.path.join(output_folder, suffix, "landmarks_meta.csv"), 'w') 32 | 33 | bar = progressbar.ProgressBar(max_value=len(meta) - 1) 34 | 35 | for index, item in enumerate(meta): 36 | bar.update(index) 37 | image_path = item['file_name'] 38 | boxes = item['meta_data'] 39 | landmarks = item['landmarks'] 40 | 41 | img = cv2.imread(image_path) 42 | cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Do this for compatible with caffe model 43 | img_w = img.shape[0] 44 | img_h = img.shape[1] 45 | 46 | for bbox, landmark in zip(boxes, landmarks): 47 | left = bbox[0] 48 | top = bbox[1] 49 | w = bbox[2] 50 | h = bbox[3] 51 | 52 | # there is error data in datasets 53 | if w <= 0 or h <= 0: 54 | continue 55 | 56 | right = bbox[0]+w+1 57 | bottom = bbox[1]+h+1 58 | 59 | # Crop the face image. 60 | face_img = img[top: bottom, left: right] 61 | 62 | # Resize the image 63 | face_img = cv2.resize(face_img, (size, size)) 64 | 65 | # Resize landmark as (5, 2) 66 | landmark = np.array(landmark) 67 | landmark.resize(5, 2) 68 | 69 | # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box 70 | landmark_gtx = (landmark[:, 0] - left) / w 71 | landmark_gty = (landmark[:, 1] - top) / h 72 | landmark_gt = np.concatenate([landmark_gtx, landmark_gty]).tolist() 73 | 74 | total_num += 1 75 | cv2.imwrite(os.path.join(image_output_folder, str(total_num) + '.jpg'), face_img) 76 | landmark_meta_file.write(str(total_num) + '.jpg, ') 77 | landmark_meta_file.write(','.join([str(i) for i in landmark_gt])) 78 | landmark_meta_file.write('\n') 79 | 80 | if not argument: 81 | continue 82 | 83 | if max(w, h) < 40 or left < 0 or right < 0 or min(w, h) < 0: 84 | continue 85 | 86 | # random shift 87 | for i in range(5): 88 | bbox_size = npr.randint( 89 | int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 90 | delta_x = npr.randint(-w * 0.2, w * 0.2) 91 | delta_y = npr.randint(-h * 0.2, h * 0.2) 92 | nx1 = int(max(left+w/2-bbox_size/2+delta_x, 0)) 93 | ny1 = int(max(top+h/2-bbox_size/2+delta_y, 0)) 94 | 95 | nx2 = nx1 + bbox_size 96 | ny2 = ny1 + bbox_size 97 | if nx2 > img_w or ny2 > img_h: 98 | continue 99 | 100 | crop_box = np.array([nx1, ny1, nx2, ny2]) 101 | gt_box = np.array([left, top, right, bottom]) 102 | 103 | iou = IoU(crop_box, np.expand_dims(gt_box, 0)) 104 | 105 | if iou > 0.65: 106 | landmark_croppedx = (landmark[:, 0] - nx1) / bbox_size 107 | landmark_croppedy = (landmark[:, 1] - ny1) / bbox_size 108 | landmark_gt = np.concatenate( 109 | [landmark_croppedx, landmark_croppedy]).tolist() 110 | cropped_img = img[ny1: ny2, nx1: nx2] 111 | cropped_img = cv2.resize(cropped_img, (size, size)) 112 | 113 | total_num += 1 114 | cv2.imwrite(os.path.join(image_output_folder, str(total_num) + '.jpg'), cropped_img) 115 | landmark_meta_file.write(str(total_num) + '.jpg, ') 116 | landmark_meta_file.write(','.join([str(i) for i in landmark_gt])) 117 | landmark_meta_file.write('\n') 118 | 119 | bar.update() 120 | 121 | landmark_meta_file.close() 122 | -------------------------------------------------------------------------------- /mtcnn/train/gen_onet_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import random 5 | import shutil 6 | import progressbar 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | import pandas as pd 11 | 12 | from mtcnn.deploy.detect import FaceDetector 13 | from mtcnn.utils.functional import IoU 14 | 15 | here = os.path.dirname(__file__) 16 | 17 | def generate_training_data_for_onet(pnet, rnet, meta_data, output_folder, crop_size=48, suffix='onet'): 18 | """ 19 | For training P-net, crop positive(0), negative(1) and partface(2) from original images. 20 | The Generated file will be saved in "output_folder" 21 | 22 | Args: 23 | pnet (Pnet): Pre-trained pnet network. 24 | rnet (Rnet): Pre-trained rnet network. 25 | meta_data (list): Each item contains a dict with file_name, num_bb (Number of bounding box), meta_data(x1, y1, w, h, **). 26 | output_folder (str): Directory to save the result. 27 | crop_size (int): image size to crop. 28 | suffix (str): Create a folder named $suffix in $output_folder to save the result. 29 | """ 30 | 31 | # Construct FaceDetector manually 32 | detector = FaceDetector.__new__(FaceDetector) 33 | detector.pnet = pnet 34 | detector.rnet = rnet 35 | detector.device = pnet.device 36 | 37 | # Prepare for output folder. 38 | rnet_output_folder = os.path.join(output_folder, suffix) 39 | 40 | positive_dest = os.path.join(rnet_output_folder, 'positive') 41 | negative_dest = os.path.join(rnet_output_folder, 'negative') 42 | part_dest = os.path.join(rnet_output_folder, 'part') 43 | 44 | [shutil.rmtree(x) for x in (positive_dest, negative_dest, 45 | part_dest) if os.path.exists(x)] 46 | 47 | # Make dest dir recursively 48 | [os.makedirs(x) for x in (positive_dest, negative_dest, 49 | part_dest) if not os.path.exists(x)] 50 | 51 | positive_meta_file = open(os.path.join( 52 | rnet_output_folder, 'positive_meta.csv'), 'w') 53 | part_meta_file = open(os.path.join(rnet_output_folder, 'part_meta.csv'), 'w') 54 | negative_meta_file = open(os.path.join( 55 | rnet_output_folder, 'negative_meta.csv'), 'w') 56 | 57 | # print("Start generate training data for pnet.") 58 | bar = progressbar.ProgressBar(max_value=len(meta_data) - 1) 59 | 60 | total_pos_num = 0 61 | total_neg_num = 0 62 | total_part_num = 0 63 | 64 | # Traverse all images in training set. 65 | for index, item in enumerate(meta_data): 66 | bar.update(index) 67 | # Read the image 68 | file_name = item['file_name'] 69 | img = cv2.imread(file_name) 70 | 71 | # Get boxes. (x1, y1, w, h) -> (x1, y1, x2, y2) 72 | boxes = np.array(item['meta_data'])[:, :4] 73 | boxes = boxes[boxes[:,2] >= 0] # filter error box (w <0) 74 | boxes = boxes[boxes[:,3] >= 0] # filter error box (h <0) 75 | 76 | boxes[:, 2] += boxes[:, 0] 77 | boxes[:, 3] += boxes[:, 1] 78 | 79 | # Origin image height and width 80 | height, width, _ = img.shape 81 | 82 | processed_img = detector._preprocess(img) 83 | candidate_boxes = detector.stage_one(processed_img, 0.5, 0.707, 12, 0.7) 84 | try: 85 | candidate_boxes = detector.stage_two(processed_img, candidate_boxes, 0.5, 0.7) 86 | except RuntimeError: 87 | print("Out of memory on process img '%s.'" % file_name) 88 | continue 89 | candidate_boxes = detector._convert_to_square(candidate_boxes).cpu().numpy() 90 | 91 | neg_examples = [] 92 | part_examples = [] 93 | part_offsets = [] 94 | pos_num = 0 95 | part_num = 0 96 | neg_num = 0 97 | 98 | for c_box in candidate_boxes: 99 | nx1 = c_box[0] 100 | ny1 = c_box[1] 101 | nx2 = c_box[2] 102 | ny2 = c_box[3] 103 | 104 | w = nx2 - nx1 + 1 105 | h = ny2 - ny1 + 1 106 | 107 | if nx2 > width or ny2 > height or nx1 < 0 or ny1<0: 108 | continue 109 | 110 | cropped_im = img[c_box[1]: c_box[3], c_box[0]: c_box[2], :] 111 | resized_im = cv2.resize( 112 | cropped_im, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR) 113 | 114 | iou = IoU(c_box, boxes) 115 | max_iou = iou.max() 116 | 117 | if max_iou < 0.3: 118 | neg_num += 1 119 | neg_examples.append(resized_im) 120 | continue 121 | 122 | max_index = iou.argmax() 123 | 124 | x1, y1, x2, y2 = boxes[max_index] 125 | 126 | offset_x1 = (x1 - nx1) / float(w) 127 | offset_y1 = (y1 - ny1) / float(h) 128 | offset_x2 = (x2 - nx2) / float(w) 129 | offset_y2 = (y2 - ny2) / float(h) 130 | 131 | if max_iou >= 0.65: 132 | pos_num += 1 133 | total_pos_num += 1 134 | positive_meta_file.write( 135 | ','.join([str(total_pos_num) + '.jpg', str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) + '\n') 136 | 137 | cv2.imwrite(os.path.join(positive_dest, str(total_pos_num) + '.jpg'), resized_im) 138 | 139 | 140 | elif max_iou >= 0.4: 141 | part_num += 1 142 | part_examples.append(resized_im) 143 | part_offsets.append([str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) 144 | 145 | # Prevent excessive negative samples 146 | if neg_num > 4 * pos_num: 147 | neg_examples = random.sample(neg_examples, k=3*pos_num) 148 | 149 | for i in neg_examples: 150 | total_neg_num += 1 151 | negative_meta_file.write(','.join([str(total_neg_num) + '.jpg']) + '\n') 152 | cv2.imwrite(os.path.join(negative_dest, str(total_neg_num) + '.jpg'), i) 153 | 154 | # Prevent excessive part samples 155 | if part_num > 2 * pos_num: 156 | choiced_index = random.sample(list(range(part_num)), k=2*pos_num) 157 | part_examples = [part_examples[i] for i in choiced_index] 158 | part_offsets = [part_offsets[i] for i in choiced_index] 159 | 160 | for i, offsets in zip(part_examples, part_offsets): 161 | total_part_num += 1 162 | part_meta_file.write(str(total_part_num) + '.jpg,' + ','.join(offsets) + '\n') 163 | 164 | cv2.imwrite(os.path.join(part_dest, str(total_part_num) + '.jpg'), i) 165 | 166 | bar.update() 167 | 168 | # Close the meta data files 169 | [x.close() for x in (positive_meta_file, part_meta_file, negative_meta_file)] 170 | -------------------------------------------------------------------------------- /mtcnn/train/gen_pnet_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import shutil 5 | import progressbar 6 | 7 | import numpy as np 8 | import numpy.random as npr 9 | import pandas as pd 10 | 11 | from mtcnn.utils.functional import IoU 12 | 13 | here = os.path.dirname(__file__) 14 | 15 | 16 | def generate_training_data_for_pnet(meta_data, output_folder, crop_size=12, suffix='pnet'): 17 | """ 18 | For training P-net, crop positive(0), negative(1) and partface(2) from original images. 19 | The Generated file will be saved in "output_folder" 20 | 21 | Args: 22 | meta_data (list): Each item contains a dict with file_name, num_bb (Number of bounding box), meta_data(x1, y1, w, h, **). 23 | output_folder (str): Directory to save the result. 24 | crop_size (int): image size to crop. 25 | """ 26 | pnet_data_folder = os.path.join(output_folder, suffix) 27 | 28 | positive_dest = os.path.join(pnet_data_folder, 'positive') 29 | negative_dest = os.path.join(pnet_data_folder, 'negative') 30 | part_dest = os.path.join(pnet_data_folder, 'part') 31 | 32 | [shutil.rmtree(x) for x in (positive_dest, negative_dest, 33 | part_dest) if os.path.exists(x)] 34 | 35 | # Make dest dir recursively 36 | [os.makedirs(x) for x in (positive_dest, negative_dest, 37 | part_dest) if not os.path.exists(x)] 38 | 39 | positive_meta_file = open(os.path.join( 40 | pnet_data_folder, 'positive_meta.csv'), 'w') 41 | part_meta_file = open(os.path.join(pnet_data_folder, 'part_meta.csv'), 'w') 42 | negative_meta_file = open(os.path.join( 43 | pnet_data_folder, 'negative_meta.csv'), 'w') 44 | 45 | # print("Start generate training data for pnet.") 46 | bar = progressbar.ProgressBar(max_value=len(meta_data) - 1) 47 | 48 | total_pos_num = 0 49 | total_neg_num = 0 50 | total_part_num = 0 51 | 52 | # Traverse all images in training set. 53 | for index, item in enumerate(meta_data): 54 | bar.update(index) 55 | # Read the image 56 | file_name = item['file_name'] 57 | img = cv2.imread(file_name) 58 | cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Do this for compatible with caffe model 59 | 60 | # Get boxes. (x1, y1, w, h) -> (x1, y1, x2, y2) 61 | boxes = np.array(item['meta_data'])[:, :4] 62 | boxes = boxes[boxes[:,2] >= 0] # filter error box (w <0) 63 | boxes = boxes[boxes[:,3] >= 0] # filter error box (h <0) 64 | 65 | boxes[:, 2] += boxes[:, 0] 66 | boxes[:, 3] += boxes[:, 1] 67 | 68 | # Origin image height and width 69 | height, width, _ = img.shape 70 | 71 | # Record the total number of positive, negative and part examples. 72 | neg_num = 0 73 | pos_num = 0 74 | part_num = 0 75 | 76 | # Random pick 50 negative examples 77 | while neg_num < 50: 78 | 79 | size = npr.randint(crop_size, min(width, height) / 2) 80 | 81 | nx = npr.randint(0, width - size) 82 | ny = npr.randint(0, height - size) 83 | 84 | crop_box = np.array([nx, ny, nx + size, ny + size]) 85 | 86 | iou = IoU(crop_box, boxes) 87 | 88 | if np.max(iou) < 0.3: 89 | # Iou with all gts must below 0.3 90 | cropped_im = img[ny: ny + size, nx: nx + size, :] 91 | resized_im = cv2.resize(cropped_im, (crop_size, crop_size), 92 | interpolation=cv2.INTER_LINEAR) 93 | 94 | total_neg_num += 1 95 | neg_num += 1 96 | 97 | negative_meta_file.write(','.join([str(total_neg_num) + '.jpg']) + '\n') 98 | cv2.imwrite(os.path.join(negative_dest, str(total_neg_num) + '.jpg'), resized_im) 99 | 100 | for box in boxes: 101 | # box (x_left, y_top, x_right, y_bottom) 102 | x1, y1, x2, y2 = box 103 | w = x2 - x1 + 1 104 | h = y2 - y1 + 1 105 | 106 | # ignore small faces 107 | # in case the ground truth boxes of small faces are not accurate 108 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 109 | continue 110 | 111 | # generate negative examples that have overlap with gt 112 | for i in range(5): 113 | size = npr.randint(crop_size, min(width, height) / 2) 114 | # delta_x and delta_y are offsets of (x1, y1) 115 | delta_x = npr.randint(max(-size, -x1), w) 116 | delta_y = npr.randint(max(-size, -y1), h) 117 | 118 | nx1 = max(0, x1 + delta_x) 119 | ny1 = max(0, y1 + delta_y) 120 | 121 | if nx1 + size > width or ny1 + size > height: 122 | continue 123 | crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) 124 | Iou = IoU(crop_box, boxes) 125 | 126 | cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :] 127 | resized_im = cv2.resize( 128 | cropped_im, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR) 129 | 130 | if np.max(Iou) < 0.3: 131 | # Iou with all gts must below 0.3 132 | neg_num += 1 133 | total_neg_num += 1 134 | negative_meta_file.write(','.join([str(total_neg_num) + '.jpg']) + '\n') 135 | cv2.imwrite(os.path.join(negative_dest, str(total_neg_num) + '.jpg'), resized_im) 136 | 137 | # generate positive examples and part faces 138 | for i in range(20): 139 | size = npr.randint(int(min(w, h) * 0.8), 140 | np.ceil(1.25 * max(w, h))) 141 | 142 | # delta here is the offset of box center 143 | delta_x = npr.randint(-w * 0.2, w * 0.2) 144 | delta_y = npr.randint(-h * 0.2, h * 0.2) 145 | 146 | nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0)) 147 | ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0)) 148 | nx2 = nx1 + size 149 | ny2 = ny1 + size 150 | 151 | if nx2 > width or ny2 > height: 152 | continue 153 | crop_box = np.array([nx1, ny1, nx2, ny2]) 154 | 155 | offset_x1 = (x1 - nx1) / float(size) 156 | offset_y1 = (y1 - ny1) / float(size) 157 | offset_x2 = (x2 - nx2) / float(size) 158 | offset_y2 = (y2 - ny2) / float(size) 159 | 160 | cropped_im = img[ny1: ny2, nx1: nx2, :] 161 | resized_im = cv2.resize( 162 | cropped_im, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR) 163 | 164 | box_ = box.reshape(1, -1) 165 | if IoU(crop_box, box_) >= 0.65: 166 | pos_num += 1 167 | total_pos_num += 1 168 | positive_meta_file.write( 169 | ','.join([str(total_pos_num) + '.jpg', str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) + '\n') 170 | 171 | cv2.imwrite(os.path.join(positive_dest, str(total_pos_num) + '.jpg'), resized_im) 172 | 173 | 174 | elif IoU(crop_box, box_) >= 0.4: 175 | part_num += 1 176 | total_part_num += 1 177 | part_meta_file.write( 178 | ','.join([str(total_part_num) + '.jpg', str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) + '\n') 179 | 180 | cv2.imwrite(os.path.join(part_dest, str(total_part_num) + '.jpg'), resized_im) 181 | bar.update() 182 | # print("\nDone") 183 | 184 | # Close the meta data files 185 | [x.close() for x in (positive_meta_file, part_meta_file, negative_meta_file)] 186 | -------------------------------------------------------------------------------- /mtcnn/train/gen_rnet_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import random 5 | import shutil 6 | import progressbar 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | import pandas as pd 11 | 12 | from mtcnn.deploy.detect import FaceDetector 13 | from mtcnn.utils.functional import IoU 14 | 15 | here = os.path.dirname(__file__) 16 | 17 | def generate_training_data_for_rnet(pnet, meta_data, output_folder, crop_size=24, suffix='rnet'): 18 | """ 19 | For training P-net, crop positive(0), negative(1) and partface(2) from original images. 20 | The Generated file will be saved in "output_folder" 21 | 22 | Args: 23 | pnet (Pnet): Pre-trained pnet network. 24 | meta_data (list): Each item contains a dict with file_name, num_bb (Number of bounding box), meta_data(x1, y1, w, h, **). 25 | output_folder (str): Directory to save the result. 26 | crop_size (int): image size to crop. 27 | suffix (str): Create a folder named $suffix in $output_folder to save the result. 28 | """ 29 | 30 | # Construct FaceDetector manually 31 | detector = FaceDetector.__new__(FaceDetector) 32 | detector.pnet = pnet 33 | detector.device = pnet.device 34 | 35 | # Prepare for output folder. 36 | rnet_output_folder = os.path.join(output_folder, suffix) 37 | 38 | positive_dest = os.path.join(rnet_output_folder, 'positive') 39 | negative_dest = os.path.join(rnet_output_folder, 'negative') 40 | part_dest = os.path.join(rnet_output_folder, 'part') 41 | 42 | [shutil.rmtree(x) for x in (positive_dest, negative_dest, 43 | part_dest) if os.path.exists(x)] 44 | 45 | # Make dest dir recursively 46 | [os.makedirs(x) for x in (positive_dest, negative_dest, 47 | part_dest) if not os.path.exists(x)] 48 | 49 | positive_meta_file = open(os.path.join( 50 | rnet_output_folder, 'positive_meta.csv'), 'w') 51 | part_meta_file = open(os.path.join(rnet_output_folder, 'part_meta.csv'), 'w') 52 | negative_meta_file = open(os.path.join( 53 | rnet_output_folder, 'negative_meta.csv'), 'w') 54 | 55 | # print("Start generate training data for pnet.") 56 | bar = progressbar.ProgressBar(max_value=len(meta_data) - 1) 57 | 58 | total_pos_num = 0 59 | total_neg_num = 0 60 | total_part_num = 0 61 | 62 | # Traverse all images in training set. 63 | for index, item in enumerate(meta_data): 64 | bar.update(index) 65 | # Read the image 66 | file_name = item['file_name'] 67 | img = cv2.imread(file_name) 68 | 69 | # Get boxes. (x1, y1, w, h) -> (x1, y1, x2, y2) 70 | boxes = np.array(item['meta_data'])[:, :4] 71 | boxes = boxes[boxes[:,2] >= 0] # filter error box (w <0) 72 | boxes = boxes[boxes[:,3] >= 0] # filter error box (h <0) 73 | 74 | boxes[:, 2] += boxes[:, 0] 75 | boxes[:, 3] += boxes[:, 1] 76 | 77 | # Origin image height and width 78 | height, width, _ = img.shape 79 | 80 | processed_img = detector._preprocess(img) 81 | candidate_boxes = detector.stage_one(processed_img, 0.5, 0.707, 12, 0.7) 82 | candidate_boxes = detector._convert_to_square(candidate_boxes).cpu().numpy() 83 | 84 | neg_examples = [] 85 | part_examples = [] 86 | part_offsets = [] 87 | pos_num = 0 88 | part_num = 0 89 | neg_num = 0 90 | 91 | for c_box in candidate_boxes: 92 | nx1 = c_box[0] 93 | ny1 = c_box[1] 94 | nx2 = c_box[2] 95 | ny2 = c_box[3] 96 | 97 | w = nx2 - nx1 + 1 98 | h = ny2 - ny1 + 1 99 | 100 | if nx2 > width or ny2 > height or nx1 < 0 or ny1<0: 101 | continue 102 | 103 | cropped_im = img[c_box[1]: c_box[3], c_box[0]: c_box[2], :] 104 | resized_im = cv2.resize( 105 | cropped_im, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR) 106 | 107 | iou = IoU(c_box, boxes) 108 | max_iou = iou.max() 109 | 110 | if max_iou < 0.3: 111 | neg_num += 1 112 | neg_examples.append(resized_im) 113 | continue 114 | 115 | max_index = iou.argmax() 116 | 117 | x1, y1, x2, y2 = boxes[max_index] 118 | 119 | offset_x1 = (x1 - nx1) / float(w) 120 | offset_y1 = (y1 - ny1) / float(h) 121 | offset_x2 = (x2 - nx2) / float(w) 122 | offset_y2 = (y2 - ny2) / float(h) 123 | 124 | if max_iou >= 0.65: 125 | pos_num += 1 126 | total_pos_num += 1 127 | positive_meta_file.write( 128 | ','.join([str(total_pos_num) + '.jpg', str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) + '\n') 129 | 130 | cv2.imwrite(os.path.join(positive_dest, str(total_pos_num) + '.jpg'), resized_im) 131 | 132 | 133 | elif max_iou >= 0.4: 134 | part_num += 1 135 | part_examples.append(resized_im) 136 | part_offsets.append([str(offset_x1), str(offset_y1), str(offset_x2), str(offset_y2)]) 137 | 138 | # Prevent excessive negative samples 139 | if neg_num > 4 * pos_num: 140 | neg_examples = random.sample(neg_examples, k=3*pos_num) 141 | 142 | for i in neg_examples: 143 | total_neg_num += 1 144 | negative_meta_file.write(','.join([str(total_neg_num) + '.jpg']) + '\n') 145 | cv2.imwrite(os.path.join(negative_dest, str(total_neg_num) + '.jpg'), i) 146 | 147 | # Prevent excessive part samples 148 | if part_num > 2 * pos_num: 149 | choiced_index = random.sample(list(range(part_num)), k=2*pos_num) 150 | part_examples = [part_examples[i] for i in choiced_index] 151 | part_offsets = [part_offsets[i] for i in choiced_index] 152 | 153 | for i, offsets in zip(part_examples, part_offsets): 154 | total_part_num += 1 155 | part_meta_file.write(str(total_part_num) + '.jpg,' + ','.join(offsets) + '\n') 156 | 157 | cv2.imwrite(os.path.join(part_dest, str(total_part_num) + '.jpg'), i) 158 | 159 | bar.update() 160 | 161 | # Close the meta data files 162 | [x.close() for x in (positive_meta_file, part_meta_file, negative_meta_file)] 163 | -------------------------------------------------------------------------------- /mtcnn/train/train_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import glob 4 | import progressbar 5 | 6 | from mtcnn.network.mtcnn_pytorch import PNet, RNet, ONet 7 | from mtcnn.train.data import MtcnnDataset 8 | from tensorboardX import SummaryWriter 9 | 10 | class Trainer(object): 11 | 12 | def __init__(self, net_stage, optimizer="SGD", device='cpu', log_dir='./runs', output_folder='./runs', resume=False): 13 | 14 | self.net_stage = net_stage 15 | self.device = device 16 | self.output_folder = output_folder 17 | 18 | if net_stage == 'pnet': 19 | self.net = PNet(is_train=True, device=self.device) 20 | 21 | elif net_stage == 'rnet': 22 | self.net = RNet(is_train=True, device=self.device) 23 | 24 | elif net_stage == 'onet': 25 | self.net = ONet(is_train=True, device=self.device) 26 | 27 | if optimizer is "SGD": 28 | # self.optimizer = torch.optim.SGD(self.net.parameters(), lr=0.01, momentum=0.9) 29 | self.optimizer = torch.optim.Adam(self.net.parameters()) 30 | else: 31 | raise AttributeError("Don't support optimizer named %s." % optimizer) 32 | 33 | self.globle_step = 1 34 | self.epoch_num = 1 35 | 36 | if resume: 37 | self.load_state_dict() 38 | 39 | self.writer = SummaryWriter(log_dir=log_dir, purge_step=self.epoch_num) 40 | 41 | 42 | def train(self, num_epoch, batch_size, data_folder): 43 | dataset = MtcnnDataset(data_folder, self.net_stage, batch_size, suffix=self.net_stage) 44 | eval_dataset = MtcnnDataset(data_folder, self.net_stage, batch_size, suffix=self.net_stage+'_eval') 45 | 46 | for i in range(num_epoch - self.epoch_num + 1): 47 | print("Training epoch %d ......" % self.epoch_num) 48 | data_iter, total_batch = dataset.get_iter() 49 | self._train_epoch(data_iter, total_batch) 50 | print("Training epoch %d done." % self.epoch_num) 51 | 52 | print("Evaluate on training data...") 53 | data_iter, total_batch = dataset.get_iter() 54 | result = self.eval(data_iter, total_batch) 55 | print("Epoch %d, " % self.epoch_num, "result on training set: acc %f, precision %f, recall %f, f1 %f, avg_cls_loss %f, avg_box_loss %f, avg_landmark_loss %f" % result) 56 | 57 | self.writer.add_scalars("training_set", { 58 | i: j for i, j in 59 | zip(["acc", "precision", "recall", "f1", "avg_cls_loss", "avg_box_loss", "avg_landmark_loss"], result) 60 | }, global_step=self.epoch_num) 61 | 62 | print("Evaluate on eval data...") 63 | data_iter, total_batch = eval_dataset.get_iter() 64 | result = self.eval(data_iter, total_batch) 65 | 66 | self.writer.add_scalars("eval_set", { 67 | i: j for i, j in 68 | zip(["acc", "precision", "recall", "f1", "avg_cls_loss", "avg_box_loss", "avg_landmark_loss"], result) 69 | }, global_step=self.epoch_num) 70 | print("Epoch %d, " % self.epoch_num, "result on eval set: acc %f, precision %f, recall %f, f1 %f, avg_cls_loss %f, avg_box_loss %f, avg_landmark_loss %f" % result) 71 | 72 | self.save_state_dict() 73 | 74 | self.epoch_num += 1 75 | 76 | def _train_epoch(self, data_iter, total_batch): 77 | 78 | bar = progressbar.ProgressBar(max_value=total_batch) 79 | 80 | for i, batch in enumerate(data_iter): 81 | bar.update(i) 82 | 83 | loss = self._train_batch(batch) 84 | self.writer.add_scalar('train/batch_loss', loss, global_step=self.epoch_num) 85 | self.globle_step += 1 86 | 87 | bar.update(total_batch) 88 | 89 | def _train_batch(self, batch): 90 | 91 | # assemble batch 92 | images, labels, boxes_reg, landmarks = self._assemble_batch(batch) 93 | 94 | # train step 95 | self.optimizer.zero_grad() 96 | loss = self.net.get_loss(images, labels, boxes_reg, landmarks) 97 | loss.backward() 98 | self.optimizer.step() 99 | 100 | return loss 101 | 102 | 103 | def _assemble_batch(self, batch): 104 | # assemble batch 105 | (pos_img, pos_reg), (part_img, part_reg), (neg_img, neg_reg), (landm_img, landm_reg) = batch 106 | 107 | # stack all images together 108 | images = torch.cat([pos_img, part_img, neg_img, landm_img]).to(self.device) 109 | 110 | # create labels for each image. 0 (neg), 1 (pos), 2 (part), 3(landmark) 111 | pos_label = torch.ones(pos_img.shape[0], dtype=torch.long) 112 | part_label = torch.ones(part_img.shape[0], dtype=torch.long) * 2 113 | neg_label = torch.zeros(neg_img.shape[0], dtype=torch.long) 114 | landm_label = torch.ones(landm_img.shape[0], dtype=torch.long) * 3 115 | 116 | labels = torch.cat([pos_label, part_label, neg_label, landm_label]).to(self.device) 117 | 118 | # stack boxes reg 119 | fake_landm_data_box_reg = torch.zeros((landm_img.shape[0], 4), dtype=torch.float) 120 | boxes_reg = torch.cat([pos_reg, part_reg, neg_reg, fake_landm_data_box_reg]).to(self.device) 121 | 122 | # stack landmarks reg 123 | fake_data_landm_reg = torch.zeros((pos_label.shape[0] + part_label.shape[0] + neg_label.shape[0], 10), dtype=torch.float) 124 | landmarks = torch.cat([fake_data_landm_reg, landm_reg]).to(self.device) 125 | 126 | return images, labels, boxes_reg, landmarks 127 | 128 | def eval(self, data_iter, total_batch): 129 | total = 0 130 | right = 0 131 | tp = 0 # True positive 132 | fp = 0 # False positive 133 | fn = 0 # False negative 134 | tn = 0 # True negative 135 | 136 | total_cls_loss = 0 137 | total_box_loss = 0 138 | total_landmark_loss = 0 139 | 140 | bar = progressbar.ProgressBar(max_value=total_batch) 141 | 142 | for i, batch in enumerate(data_iter): 143 | bar.update(i) 144 | 145 | # assemble batch 146 | images, gt_label, gt_boxes, gt_landmarks = self._assemble_batch(batch) 147 | 148 | # Forward pass 149 | with torch.no_grad(): 150 | pred_label, pred_offset, pred_landmarks = self.net.forward(images) 151 | 152 | # Reshape the tensor 153 | pred_label = pred_label.view(-1, 2) 154 | pred_offset = pred_offset.view(-1, 4) 155 | pred_landmarks = pred_landmarks.view(-1, 10) 156 | 157 | # Compute the loss 158 | total_cls_loss += self.net.cls_loss(gt_label, pred_label) 159 | total_box_loss += self.net.box_loss(gt_label, gt_boxes, pred_offset) 160 | total_landmark_loss += self.net.landmark_loss( 161 | gt_label, gt_landmarks, pred_landmarks) 162 | 163 | # compute the classification acc 164 | pred_label = torch.argmax(pred_label, dim=1) 165 | 166 | mask = gt_label <= 1 167 | right += torch.sum(gt_label[mask] == pred_label[mask]) 168 | total += gt_label[mask].shape[0] 169 | 170 | p_mask = gt_label == 1 171 | tp += torch.sum(gt_label[p_mask] == pred_label[p_mask]) 172 | fp += torch.sum(gt_label[p_mask] != pred_label[p_mask]) 173 | 174 | n_mask = gt_label == 0 175 | tn += torch.sum(gt_label[n_mask] == pred_label[n_mask]) 176 | fn += torch.sum(gt_label[n_mask] != pred_label[n_mask]) 177 | 178 | bar.update(total_batch) 179 | 180 | acc = right.float() / total 181 | precision = tp.float() / (tp + fp) 182 | recall = tp.float() / (tp + fn) 183 | f1 = 2 * precision * recall / (precision + recall) 184 | 185 | avg_cls_loss = total_cls_loss / i 186 | avg_box_loss = total_box_loss / i 187 | avg_landmark_loss = total_landmark_loss / i 188 | 189 | return acc, precision, recall, f1, avg_cls_loss, avg_box_loss, avg_landmark_loss 190 | 191 | 192 | def save_state_dict(self): 193 | checkpoint_name = "checkpoint_epoch_%d" % self.epoch_num 194 | file_path = os.path.join(self.output_folder, checkpoint_name) 195 | 196 | state = { 197 | 'epoch_num': self.epoch_num, 198 | 'state_dict': self.net.state_dict(), 199 | 'optimizer': self.optimizer.state_dict(), 200 | } 201 | torch.save(state, file_path) 202 | 203 | def export_model(self, filename): 204 | torch.save(self.net.state_dict(), filename) 205 | 206 | def load_state_dict(self): 207 | 208 | # Get the latest checkpoint in output_folder 209 | all_checkpoints = glob.glob(os.path.join(self.output_folder, 'checkpoint_epoch_*')) 210 | 211 | if len(all_checkpoints) > 1: 212 | epoch_nums = [int(i.split('_')[-1]) for i in all_checkpoints] 213 | max_index = epoch_nums.index(max(epoch_nums)) 214 | latest_checkpoint = all_checkpoints[max_index] 215 | 216 | state = torch.load(latest_checkpoint) 217 | self.epoch_num = state['epoch_num'] + 1 218 | self.net.load_state_dict(state['state_dict']) 219 | self.optimizer.load_state_dict(state['optimizer']) 220 | -------------------------------------------------------------------------------- /mtcnn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import draw -------------------------------------------------------------------------------- /mtcnn/utils/align_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from mtcnn.utils.cp2tform import get_similarity_transform_for_cv2 5 | 6 | # reference facial points, a list of coordinates (x,y) 7 | REFERENCE_FACIAL_POINTS = [ 8 | [30.29459953, 51.69630051], 9 | [65.53179932, 51.50139999], 10 | [48.02519989, 71.73660278], 11 | [33.54930115, 92.3655014], 12 | [62.72990036, 92.20410156] 13 | ] 14 | 15 | DEFAULT_CROP_SIZE = (96, 112) 16 | 17 | 18 | class FaceWarpException(Exception): 19 | def __str__(self): 20 | return 'In File {}:{}'.format( 21 | __file__, super.__str__(self)) 22 | 23 | 24 | def get_reference_facial_points(output_size=None, 25 | inner_padding_factor=0.0, 26 | outer_padding=(0, 0), 27 | default_square=False): 28 | """ 29 | Function: 30 | ---------- 31 | get reference 5 key points according to crop settings: 32 | 0. Set default crop_size: 33 | if default_square: 34 | crop_size = (112, 112) 35 | else: 36 | crop_size = (96, 112) 37 | 1. Pad the crop_size by inner_padding_factor in each side; 38 | 2. Resize crop_size into (output_size - outer_padding*2), 39 | pad into output_size with outer_padding; 40 | 3. Output reference_5point; 41 | Parameters: 42 | ---------- 43 | @output_size: (w, h) or None 44 | size of aligned face image 45 | @inner_padding_factor: (w_factor, h_factor) 46 | padding factor for inner (w, h) 47 | @outer_padding: (w_pad, h_pad) 48 | each row is a pair of coordinates (x, y) 49 | @default_square: True or False 50 | if True: 51 | default crop_size = (112, 112) 52 | else: 53 | default crop_size = (96, 112); 54 | !!! make sure, if output_size is not None: 55 | (output_size - outer_padding) 56 | = some_scale * (default crop_size * (1.0 + inner_padding_factor)) 57 | Returns: 58 | ---------- 59 | @reference_5point: 5x2 np.array 60 | each row is a pair of transformed coordinates (x, y) 61 | """ 62 | #print('\n===> get_reference_facial_points():') 63 | 64 | #print('---> Params:') 65 | #print(' output_size: ', output_size) 66 | #print(' inner_padding_factor: ', inner_padding_factor) 67 | #print(' outer_padding:', outer_padding) 68 | #print(' default_square: ', default_square) 69 | 70 | tmp_5pts = np.array(REFERENCE_FACIAL_POINTS) 71 | tmp_crop_size = np.array(DEFAULT_CROP_SIZE) 72 | 73 | # 0) make the inner region a square 74 | if default_square: 75 | size_diff = max(tmp_crop_size) - tmp_crop_size 76 | tmp_5pts += size_diff / 2 77 | tmp_crop_size += size_diff 78 | 79 | #print('---> default:') 80 | #print(' crop_size = ', tmp_crop_size) 81 | #print(' reference_5pts = ', tmp_5pts) 82 | 83 | if (output_size and 84 | output_size[0] == tmp_crop_size[0] and 85 | output_size[1] == tmp_crop_size[1]): 86 | #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size)) 87 | return tmp_5pts 88 | 89 | if (inner_padding_factor == 0 and 90 | outer_padding == (0, 0)): 91 | if output_size is None: 92 | #print('No paddings to do: return default reference points') 93 | return tmp_5pts 94 | else: 95 | raise FaceWarpException( 96 | 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size)) 97 | 98 | # check output size 99 | if not (0 <= inner_padding_factor <= 1.0): 100 | raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)') 101 | 102 | if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) 103 | and output_size is None): 104 | output_size = tmp_crop_size * \ 105 | (1 + inner_padding_factor * 2).astype(np.int32) 106 | output_size += np.array(outer_padding) 107 | #print(' deduced from paddings, output_size = ', output_size) 108 | 109 | if not (outer_padding[0] < output_size[0] 110 | and outer_padding[1] < output_size[1]): 111 | raise FaceWarpException('Not (outer_padding[0] < output_size[0]' 112 | 'and outer_padding[1] < output_size[1])') 113 | 114 | # 1) pad the inner region according inner_padding_factor 115 | #print('---> STEP1: pad the inner region according inner_padding_factor') 116 | if inner_padding_factor > 0: 117 | size_diff = tmp_crop_size * inner_padding_factor * 2 118 | tmp_5pts += size_diff / 2 119 | tmp_crop_size += np.round(size_diff).astype(np.int32) 120 | 121 | #print(' crop_size = ', tmp_crop_size) 122 | #print(' reference_5pts = ', tmp_5pts) 123 | 124 | # 2) resize the padded inner region 125 | #print('---> STEP2: resize the padded inner region') 126 | size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2 127 | #print(' crop_size = ', tmp_crop_size) 128 | #print(' size_bf_outer_pad = ', size_bf_outer_pad) 129 | 130 | if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]: 131 | raise FaceWarpException('Must have (output_size - outer_padding)' 132 | '= some_scale * (crop_size * (1.0 + inner_padding_factor)') 133 | 134 | scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0] 135 | #print(' resize scale_factor = ', scale_factor) 136 | tmp_5pts = tmp_5pts * scale_factor 137 | # size_diff = tmp_crop_size * (scale_factor - min(scale_factor)) 138 | # tmp_5pts = tmp_5pts + size_diff / 2 139 | tmp_crop_size = size_bf_outer_pad 140 | #print(' crop_size = ', tmp_crop_size) 141 | #print(' reference_5pts = ', tmp_5pts) 142 | 143 | # 3) add outer_padding to make output_size 144 | reference_5point = tmp_5pts + np.array(outer_padding) 145 | tmp_crop_size = output_size 146 | #print('---> STEP3: add outer_padding to make output_size') 147 | #print(' crop_size = ', tmp_crop_size) 148 | #print(' reference_5pts = ', tmp_5pts) 149 | 150 | #print('===> end get_reference_facial_points\n') 151 | 152 | return reference_5point 153 | 154 | 155 | def get_affine_transform_matrix(src_pts, dst_pts): 156 | """ 157 | Function: 158 | ---------- 159 | get affine transform matrix 'tfm' from src_pts to dst_pts 160 | Parameters: 161 | ---------- 162 | @src_pts: Kx2 np.array 163 | source points matrix, each row is a pair of coordinates (x, y) 164 | @dst_pts: Kx2 np.array 165 | destination points matrix, each row is a pair of coordinates (x, y) 166 | Returns: 167 | ---------- 168 | @tfm: 2x3 np.array 169 | transform matrix from src_pts to dst_pts 170 | """ 171 | 172 | tfm = np.float32([[1, 0, 0], [0, 1, 0]]) 173 | n_pts = src_pts.shape[0] 174 | ones = np.ones((n_pts, 1), src_pts.dtype) 175 | src_pts_ = np.hstack([src_pts, ones]) 176 | dst_pts_ = np.hstack([dst_pts, ones]) 177 | 178 | # #print(('src_pts_:\n' + str(src_pts_)) 179 | # #print(('dst_pts_:\n' + str(dst_pts_)) 180 | 181 | A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_) 182 | 183 | # #print(('np.linalg.lstsq return A: \n' + str(A)) 184 | # #print(('np.linalg.lstsq return res: \n' + str(res)) 185 | # #print(('np.linalg.lstsq return rank: \n' + str(rank)) 186 | # #print(('np.linalg.lstsq return s: \n' + str(s)) 187 | 188 | if rank == 3: 189 | tfm = np.float32([ 190 | [A[0, 0], A[1, 0], A[2, 0]], 191 | [A[0, 1], A[1, 1], A[2, 1]] 192 | ]) 193 | elif rank == 2: 194 | tfm = np.float32([ 195 | [A[0, 0], A[1, 0], 0], 196 | [A[0, 1], A[1, 1], 0] 197 | ]) 198 | 199 | return tfm 200 | 201 | 202 | def warp_and_crop_face(src_img, 203 | facial_pts, 204 | reference_pts=None, 205 | crop_size=(96, 112), 206 | align_type='smilarity'): 207 | """ 208 | Function: 209 | ---------- 210 | apply affine transform 'trans' to uv 211 | Parameters: 212 | ---------- 213 | @src_img: 3x3 np.array 214 | input image 215 | @facial_pts: could be 216 | 1)a list of K coordinates (x,y) 217 | or 218 | 2) Kx2 or 2xK np.array 219 | each row or col is a pair of coordinates (x, y) 220 | @reference_pts: could be 221 | 1) a list of K coordinates (x,y) 222 | or 223 | 2) Kx2 or 2xK np.array 224 | each row or col is a pair of coordinates (x, y) 225 | or 226 | 3) None 227 | if None, use default reference facial points 228 | @crop_size: (w, h) 229 | output face image size 230 | @align_type: transform type, could be one of 231 | 1) 'similarity': use similarity transform 232 | 2) 'cv2_affine': use the first 3 points to do affine transform, 233 | by calling cv2.getAffineTransform() 234 | 3) 'affine': use all points to do affine transform 235 | Returns: 236 | ---------- 237 | @face_img: output face image with size (w, h) = @crop_size 238 | """ 239 | 240 | if reference_pts is None: 241 | if crop_size[0] == 96 and crop_size[1] == 112: 242 | reference_pts = REFERENCE_FACIAL_POINTS 243 | else: 244 | default_square = False 245 | inner_padding_factor = 0 246 | outer_padding = (0, 0) 247 | output_size = crop_size 248 | 249 | reference_pts = get_reference_facial_points(output_size, 250 | inner_padding_factor, 251 | outer_padding, 252 | default_square) 253 | 254 | ref_pts = np.float32(reference_pts) 255 | ref_pts_shp = ref_pts.shape 256 | if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2: 257 | raise FaceWarpException( 258 | 'reference_pts.shape must be (K,2) or (2,K) and K>2') 259 | 260 | if ref_pts_shp[0] == 2: 261 | ref_pts = ref_pts.T 262 | 263 | src_pts = np.float32(facial_pts) 264 | src_pts_shp = src_pts.shape 265 | if max(src_pts_shp) < 3 or min(src_pts_shp) != 2: 266 | raise FaceWarpException( 267 | 'facial_pts.shape must be (K,2) or (2,K) and K>2') 268 | 269 | if src_pts_shp[0] == 2: 270 | src_pts = src_pts.T 271 | 272 | # #print('--->src_pts:\n', src_pts 273 | # #print('--->ref_pts\n', ref_pts 274 | 275 | if src_pts.shape != ref_pts.shape: 276 | raise FaceWarpException( 277 | 'facial_pts and reference_pts must have the same shape') 278 | 279 | if align_type is 'cv2_affine': 280 | tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3]) 281 | # #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm)) 282 | elif align_type is 'affine': 283 | tfm = get_affine_transform_matrix(src_pts, ref_pts) 284 | # #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm)) 285 | else: 286 | tfm = get_similarity_transform_for_cv2(src_pts, ref_pts) 287 | # #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm)) 288 | 289 | # #print('--->Transform matrix: ' 290 | # #print(('type(tfm):' + str(type(tfm))) 291 | # #print(('tfm.dtype:' + str(tfm.dtype)) 292 | # #print( tfm 293 | 294 | face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1])) 295 | 296 | return face_img -------------------------------------------------------------------------------- /mtcnn/utils/cp2tform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv, norm, lstsq 3 | from numpy.linalg import matrix_rank as rank 4 | 5 | class MatlabCp2tormException(Exception): 6 | def __str__(self): 7 | return 'In File {}:{}'.format( 8 | __file__, super.__str__(self)) 9 | 10 | def tformfwd(trans, uv): 11 | """ 12 | Function: 13 | ---------- 14 | apply affine transform 'trans' to uv 15 | Parameters: 16 | ---------- 17 | @trans: 3x3 np.array 18 | transform matrix 19 | @uv: Kx2 np.array 20 | each row is a pair of coordinates (x, y) 21 | Returns: 22 | ---------- 23 | @xy: Kx2 np.array 24 | each row is a pair of transformed coordinates (x, y) 25 | """ 26 | uv = np.hstack(( 27 | uv, np.ones((uv.shape[0], 1)) 28 | )) 29 | xy = np.dot(uv, trans) 30 | xy = xy[:, 0:-1] 31 | return xy 32 | 33 | 34 | def tforminv(trans, uv): 35 | """ 36 | Function: 37 | ---------- 38 | apply the inverse of affine transform 'trans' to uv 39 | Parameters: 40 | ---------- 41 | @trans: 3x3 np.array 42 | transform matrix 43 | @uv: Kx2 np.array 44 | each row is a pair of coordinates (x, y) 45 | Returns: 46 | ---------- 47 | @xy: Kx2 np.array 48 | each row is a pair of inverse-transformed coordinates (x, y) 49 | """ 50 | Tinv = inv(trans) 51 | xy = tformfwd(Tinv, uv) 52 | return xy 53 | 54 | 55 | def findNonreflectiveSimilarity(uv, xy, options=None): 56 | 57 | options = {'K': 2} 58 | 59 | K = options['K'] 60 | M = xy.shape[0] 61 | x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 62 | y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 63 | # print('--->x, y:\n', x, y 64 | 65 | tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))) 66 | tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) 67 | X = np.vstack((tmp1, tmp2)) 68 | # print('--->X.shape: ', X.shape 69 | # print('X:\n', X 70 | 71 | u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 72 | v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 73 | U = np.vstack((u, v)) 74 | # print('--->U.shape: ', U.shape 75 | # print('U:\n', U 76 | 77 | # We know that X * r = U 78 | if rank(X) >= 2 * K: 79 | r, _, _, _ = lstsq(X, U) 80 | r = np.squeeze(r) 81 | else: 82 | raise Exception('cp2tform:twoUniquePointsReq') 83 | 84 | # print('--->r:\n', r 85 | 86 | sc = r[0] 87 | ss = r[1] 88 | tx = r[2] 89 | ty = r[3] 90 | 91 | Tinv = np.array([ 92 | [sc, -ss, 0], 93 | [ss, sc, 0], 94 | [tx, ty, 1] 95 | ]) 96 | 97 | # print('--->Tinv:\n', Tinv 98 | 99 | T = inv(Tinv) 100 | # print('--->T:\n', T 101 | 102 | T[:, 2] = np.array([0, 0, 1]) 103 | 104 | return T, Tinv 105 | 106 | 107 | def findSimilarity(uv, xy, options=None): 108 | 109 | options = {'K': 2} 110 | 111 | # Solve for trans1 112 | trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) 113 | 114 | # Solve for trans2 115 | 116 | # manually reflect the xy data across the Y-axis 117 | xyR = xy 118 | xyR[:, 0] = -1 * xyR[:, 0] 119 | 120 | trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options) 121 | 122 | # manually reflect the tform to undo the reflection done on xyR 123 | TreflectY = np.array([ 124 | [-1, 0, 0], 125 | [0, 1, 0], 126 | [0, 0, 1] 127 | ]) 128 | 129 | trans2 = np.dot(trans2r, TreflectY) 130 | 131 | # Figure out if trans1 or trans2 is better 132 | xy1 = tformfwd(trans1, uv) 133 | norm1 = norm(xy1 - xy) 134 | 135 | xy2 = tformfwd(trans2, uv) 136 | norm2 = norm(xy2 - xy) 137 | 138 | if norm1 <= norm2: 139 | return trans1, trans1_inv 140 | else: 141 | trans2_inv = inv(trans2) 142 | return trans2, trans2_inv 143 | 144 | 145 | def get_similarity_transform(src_pts, dst_pts, reflective=True): 146 | """ 147 | Function: 148 | ---------- 149 | Find Similarity Transform Matrix 'trans': 150 | u = src_pts[:, 0] 151 | v = src_pts[:, 1] 152 | x = dst_pts[:, 0] 153 | y = dst_pts[:, 1] 154 | [x, y, 1] = [u, v, 1] * trans 155 | Parameters: 156 | ---------- 157 | @src_pts: Kx2 np.array 158 | source points, each row is a pair of coordinates (x, y) 159 | @dst_pts: Kx2 np.array 160 | destination points, each row is a pair of transformed 161 | coordinates (x, y) 162 | @reflective: True or False 163 | if True: 164 | use reflective similarity transform 165 | else: 166 | use non-reflective similarity transform 167 | Returns: 168 | ---------- 169 | @trans: 3x3 np.array 170 | transform matrix from uv to xy 171 | trans_inv: 3x3 np.array 172 | inverse of trans, transform matrix from xy to uv 173 | """ 174 | 175 | if reflective: 176 | trans, trans_inv = findSimilarity(src_pts, dst_pts) 177 | else: 178 | trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts) 179 | 180 | return trans, trans_inv 181 | 182 | 183 | def cvt_tform_mat_for_cv2(trans): 184 | """ 185 | Function: 186 | ---------- 187 | Convert Transform Matrix 'trans' into 'cv2_trans' which could be 188 | directly used by cv2.warpAffine(): 189 | u = src_pts[:, 0] 190 | v = src_pts[:, 1] 191 | x = dst_pts[:, 0] 192 | y = dst_pts[:, 1] 193 | [x, y].T = cv_trans * [u, v, 1].T 194 | Parameters: 195 | ---------- 196 | @trans: 3x3 np.array 197 | transform matrix from uv to xy 198 | Returns: 199 | ---------- 200 | @cv2_trans: 2x3 np.array 201 | transform matrix from src_pts to dst_pts, could be directly used 202 | for cv2.warpAffine() 203 | """ 204 | cv2_trans = trans[:, 0:2].T 205 | 206 | return cv2_trans 207 | 208 | 209 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True): 210 | """ 211 | Function: 212 | ---------- 213 | Find Similarity Transform Matrix 'cv2_trans' which could be 214 | directly used by cv2.warpAffine(): 215 | u = src_pts[:, 0] 216 | v = src_pts[:, 1] 217 | x = dst_pts[:, 0] 218 | y = dst_pts[:, 1] 219 | [x, y].T = cv_trans * [u, v, 1].T 220 | Parameters: 221 | ---------- 222 | @src_pts: Kx2 np.array 223 | source points, each row is a pair of coordinates (x, y) 224 | @dst_pts: Kx2 np.array 225 | destination points, each row is a pair of transformed 226 | coordinates (x, y) 227 | reflective: True or False 228 | if True: 229 | use reflective similarity transform 230 | else: 231 | use non-reflective similarity transform 232 | Returns: 233 | ---------- 234 | @cv2_trans: 2x3 np.array 235 | transform matrix from src_pts to dst_pts, could be directly used 236 | for cv2.warpAffine() 237 | """ 238 | trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective) 239 | cv2_trans = cvt_tform_mat_for_cv2(trans) 240 | 241 | return cv2_trans 242 | -------------------------------------------------------------------------------- /mtcnn/utils/draw.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some useful function for visualize bounding box and face landmarks. 3 | """ 4 | import cv2 5 | 6 | 7 | def draw_boxes(img, boxes, color=(255, 0, 0)): 8 | """Draw bounding boxes on original image. 9 | 10 | Args: 11 | img (np.array): image matrix returned by cv2.imread 12 | boxes (list): Each item contrains a bounding box (x1, y1, w, h). (List like objects are all ok. "np.array" for example.) 13 | """ 14 | for box in boxes: 15 | # Default draw red box on it. 16 | cv2.rectangle(img, (box[0], box[1]), (box[0]+box[2], box[1]+box[3]), color) 17 | 18 | return img 19 | 20 | def draw_boxes2(img, boxes, color=(255, 0, 0)): 21 | """Draw bounding boxes on original image. 22 | 23 | Args: 24 | img (np.array): image matrix returned by cv2.imread 25 | boxes (list): Each item contrains a bounding box (x1, y1, x2, y2). (List like objects are all ok. "np.array" for example.) 26 | """ 27 | for box in boxes: 28 | # Default draw red box on it. 29 | cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color) 30 | 31 | return img 32 | 33 | def crop(img, boxes, landmarks=None): 34 | """Cut region from origin image 35 | 36 | Args: 37 | img (np.array): image matrix returned by cv2.imread 38 | boxes (list): Each item contrains a bounding box (x1, y1, x2, y2). (List like objects are all ok. "np.array" for example.) 39 | """ 40 | if landmarks is not None: 41 | img = img.copy() 42 | batch_draw_landmarks(img, landmarks) 43 | 44 | img_list = [] 45 | for box in boxes: 46 | i = img[box[1]:box[3], box[0]:box[2]] 47 | img_list.append(i) 48 | 49 | return img_list 50 | 51 | 52 | def draw_landmarks(img, landmarks, color=(0, 0, 255)): 53 | """Draw points on original image. 54 | 55 | Args: 56 | img (np.array): image matrix returned by cv2.imread 57 | landmarks (list): Each item contains a point coordinates (x, y). (List like objects are all ok. "np.array" for example.) 58 | """ 59 | for point in landmarks: 60 | 61 | # Default draw blue point on it 62 | cv2.circle(img, tuple(point), 2, color) 63 | 64 | 65 | return img 66 | 67 | def batch_draw_landmarks(img, batch_landmarks, color=(0, 0, 255)): 68 | 69 | for landmarks in batch_landmarks: 70 | draw_landmarks(img, landmarks, color) 71 | 72 | return img 73 | -------------------------------------------------------------------------------- /mtcnn/utils/functional.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import numpy as np 4 | 5 | from mtcnn.utils.nms.cpu_nms import cpu_nms 6 | try: 7 | from mtcnn.utils.nms.gpu_nms import gpu_nms 8 | except: 9 | gpu_nms = cpu_nms 10 | 11 | 12 | def IoU(box, boxes): 13 | """Compute IoU between detect box and gt boxes 14 | Parameters: 15 | ---------- 16 | box: numpy array , shape (5, ): x1, y1, x2, y2, score 17 | input box 18 | boxes: numpy array, shape (n, 4): x1, y1, x2, y2 19 | input ground truth boxes 20 | Returns: 21 | ------- 22 | ovr: numpy.array, shape (n, ) 23 | IoU 24 | """ 25 | box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) 26 | area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) 27 | xx1 = np.maximum(box[0], boxes[:, 0]) 28 | yy1 = np.maximum(box[1], boxes[:, 1]) 29 | xx2 = np.minimum(box[2], boxes[:, 2]) 30 | yy2 = np.minimum(box[3], boxes[:, 3]) 31 | 32 | # compute the width and height of the bounding box 33 | w = np.maximum(0, xx2 - xx1 + 1) 34 | h = np.maximum(0, yy2 - yy1 + 1) 35 | 36 | inter = w * h 37 | ovr = np.true_divide(inter, (box_area + area - inter)) 38 | #ovr = inter / (box_area + area - inter) 39 | return ovr 40 | 41 | 42 | def nms(dets, scores, thresh, device="cpu"): 43 | """ 44 | greedily select boxes with high confidence 45 | keep boxes overlap <= thresh 46 | rule out overlap > thresh 47 | :param dets: [[x1, y1, x2, y2 score]] 48 | :param thresh: retain overlap <= thresh 49 | :return: indexes to keep 50 | """ 51 | if isinstance(device, str): 52 | device = torch.device(device) 53 | 54 | if device.type == 'cpu': 55 | ret = cpu_nms(dets.astype(np.float32), scores.astype(np.float32), 56 | thresh) 57 | 58 | else: 59 | dets = np.concatenate([ 60 | dets.astype(np.float32), 61 | scores.astype(np.float32).reshape(-1, 1) 62 | ], 1) 63 | ret = gpu_nms(dets, thresh, device_id=device.index) 64 | 65 | return ret 66 | 67 | 68 | def imnormalize(img): 69 | """ 70 | Normalize pixel value from (0, 255) to (-1, 1) 71 | """ 72 | 73 | img = (img - 127.5) * 0.0078125 74 | return img 75 | 76 | 77 | def iou_torch(box, boxes): 78 | """Compute IoU between detect box and gt boxes 79 | 80 | Args: 81 | box (torch.IntTensor): shape (4, ) 82 | boxes (torch.IntTensor): shape (n, 4) 83 | 84 | Returns: 85 | torch.FloatTensor: [description] 86 | """ 87 | 88 | box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) 89 | area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) 90 | xx1 = torch.max(box[0], boxes[:, 0]) 91 | yy1 = torch.max(box[1], boxes[:, 1]) 92 | xx2 = torch.min(box[2], boxes[:, 2]) 93 | yy2 = torch.min(box[3], boxes[:, 3]) 94 | 95 | # compute the width and height of the bounding box 96 | w = xx2 - xx1 + 1 97 | h = yy2 - yy1 + 1 98 | w = torch.max(torch.zeros_like(w), w) 99 | h = torch.max(torch.zeros_like(h), h) 100 | 101 | inter = w * h 102 | ovr = inter.float() / (box_area + area - inter).float() 103 | 104 | return ovr 105 | -------------------------------------------------------------------------------- /mtcnn/utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/mtcnn/utils/nms/__init__.py -------------------------------------------------------------------------------- /mtcnn/utils/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.ndarray[np.float32_t, ndim=1] scores, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | 23 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 24 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 25 | 26 | cdef int ndets = dets.shape[0] 27 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 28 | np.zeros((ndets), dtype=np.int) 29 | 30 | # nominal indices 31 | cdef int _i, _j 32 | # sorted indices 33 | cdef int i, j 34 | # temp variables for box i's (the box currently under consideration) 35 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 36 | # variables for computing overlap with box j (lower scoring box) 37 | cdef np.float32_t xx1, yy1, xx2, yy2 38 | cdef np.float32_t w, h 39 | cdef np.float32_t inter, ovr 40 | 41 | keep = [] 42 | for _i in range(ndets): 43 | i = order[_i] 44 | if suppressed[i] == 1: 45 | continue 46 | keep.append(i) 47 | ix1 = x1[i] 48 | iy1 = y1[i] 49 | ix2 = x2[i] 50 | iy2 = y2[i] 51 | iarea = areas[i] 52 | for _j in range(_i + 1, ndets): 53 | j = order[_j] 54 | if suppressed[j] == 1: 55 | continue 56 | xx1 = max(ix1, x1[j]) 57 | yy1 = max(iy1, y1[j]) 58 | xx2 = min(ix2, x2[j]) 59 | yy2 = min(iy2, y2[j]) 60 | w = max(0.0, xx2 - xx1 + 1) 61 | h = max(0.0, yy2 - yy1 + 1) 62 | inter = w * h 63 | ovr = inter / (iarea + areas[j] - inter) 64 | if ovr >= thresh: 65 | suppressed[j] = 1 66 | 67 | return keep 68 | 69 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 70 | cdef unsigned int N = boxes.shape[0] 71 | cdef float iw, ih, box_area 72 | cdef float ua 73 | cdef int pos = 0 74 | cdef float maxscore = 0 75 | cdef int maxpos = 0 76 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 77 | 78 | for i in range(N): 79 | maxscore = boxes[i, 4] 80 | maxpos = i 81 | 82 | tx1 = boxes[i,0] 83 | ty1 = boxes[i,1] 84 | tx2 = boxes[i,2] 85 | ty2 = boxes[i,3] 86 | ts = boxes[i,4] 87 | 88 | pos = i + 1 89 | # get max box 90 | while pos < N: 91 | if maxscore < boxes[pos, 4]: 92 | maxscore = boxes[pos, 4] 93 | maxpos = pos 94 | pos = pos + 1 95 | 96 | # add max box as a detection 97 | boxes[i,0] = boxes[maxpos,0] 98 | boxes[i,1] = boxes[maxpos,1] 99 | boxes[i,2] = boxes[maxpos,2] 100 | boxes[i,3] = boxes[maxpos,3] 101 | boxes[i,4] = boxes[maxpos,4] 102 | 103 | # swap ith box with position of max box 104 | boxes[maxpos,0] = tx1 105 | boxes[maxpos,1] = ty1 106 | boxes[maxpos,2] = tx2 107 | boxes[maxpos,3] = ty2 108 | boxes[maxpos,4] = ts 109 | 110 | tx1 = boxes[i,0] 111 | ty1 = boxes[i,1] 112 | tx2 = boxes[i,2] 113 | ty2 = boxes[i,3] 114 | ts = boxes[i,4] 115 | 116 | pos = i + 1 117 | # NMS iterations, note that N changes if detection boxes fall below threshold 118 | while pos < N: 119 | x1 = boxes[pos, 0] 120 | y1 = boxes[pos, 1] 121 | x2 = boxes[pos, 2] 122 | y2 = boxes[pos, 3] 123 | s = boxes[pos, 4] 124 | 125 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 126 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 127 | if iw > 0: 128 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 129 | if ih > 0: 130 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 131 | ov = iw * ih / ua #iou between max box and detection box 132 | 133 | if method == 1: # linear 134 | if ov > Nt: 135 | weight = 1 - ov 136 | else: 137 | weight = 1 138 | elif method == 2: # gaussian 139 | weight = np.exp(-(ov * ov)/sigma) 140 | else: # original NMS 141 | if ov > Nt: 142 | weight = 0 143 | else: 144 | weight = 1 145 | 146 | boxes[pos, 4] = weight*boxes[pos, 4] 147 | 148 | # if box score falls below threshold, discard the box by swapping with last box 149 | # update N 150 | if boxes[pos, 4] < threshold: 151 | boxes[pos,0] = boxes[N-1, 0] 152 | boxes[pos,1] = boxes[N-1, 1] 153 | boxes[pos,2] = boxes[N-1, 2] 154 | boxes[pos,3] = boxes[N-1, 3] 155 | boxes[pos,4] = boxes[N-1, 4] 156 | N = N - 1 157 | pos = pos - 1 158 | 159 | pos = pos + 1 160 | 161 | keep = [i for i in range(N)] 162 | return keep 163 | -------------------------------------------------------------------------------- /mtcnn/utils/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /mtcnn/utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /mtcnn/utils/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /mtcnn/utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /output/caffe_models/det1.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/output/caffe_models/det1.caffemodel -------------------------------------------------------------------------------- /output/caffe_models/det1.prototxt: -------------------------------------------------------------------------------- 1 | name: "PNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 12 6 | input_dim: 12 7 | 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 0 20 | } 21 | convolution_param { 22 | num_output: 10 23 | kernel_size: 3 24 | stride: 1 25 | weight_filler { 26 | type: "xavier" 27 | } 28 | bias_filler { 29 | type: "constant" 30 | value: 0 31 | } 32 | } 33 | } 34 | layer { 35 | name: "PReLU1" 36 | type: "PReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 2 48 | stride: 2 49 | } 50 | } 51 | 52 | layer { 53 | name: "conv2" 54 | type: "Convolution" 55 | bottom: "pool1" 56 | top: "conv2" 57 | param { 58 | lr_mult: 1 59 | decay_mult: 1 60 | } 61 | param { 62 | lr_mult: 2 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 16 67 | kernel_size: 3 68 | stride: 1 69 | weight_filler { 70 | type: "xavier" 71 | } 72 | bias_filler { 73 | type: "constant" 74 | value: 0 75 | } 76 | } 77 | } 78 | layer { 79 | name: "PReLU2" 80 | type: "PReLU" 81 | bottom: "conv2" 82 | top: "conv2" 83 | } 84 | 85 | layer { 86 | name: "conv3" 87 | type: "Convolution" 88 | bottom: "conv2" 89 | top: "conv3" 90 | param { 91 | lr_mult: 1 92 | decay_mult: 1 93 | } 94 | param { 95 | lr_mult: 2 96 | decay_mult: 0 97 | } 98 | convolution_param { 99 | num_output: 32 100 | kernel_size: 3 101 | stride: 1 102 | weight_filler { 103 | type: "xavier" 104 | } 105 | bias_filler { 106 | type: "constant" 107 | value: 0 108 | } 109 | } 110 | } 111 | layer { 112 | name: "PReLU3" 113 | type: "PReLU" 114 | bottom: "conv3" 115 | top: "conv3" 116 | } 117 | 118 | 119 | layer { 120 | name: "conv4-1" 121 | type: "Convolution" 122 | bottom: "conv3" 123 | top: "conv4-1" 124 | param { 125 | lr_mult: 1 126 | decay_mult: 1 127 | } 128 | param { 129 | lr_mult: 2 130 | decay_mult: 0 131 | } 132 | convolution_param { 133 | num_output: 2 134 | kernel_size: 1 135 | stride: 1 136 | weight_filler { 137 | type: "xavier" 138 | } 139 | bias_filler { 140 | type: "constant" 141 | value: 0 142 | } 143 | } 144 | } 145 | 146 | layer { 147 | name: "conv4-2" 148 | type: "Convolution" 149 | bottom: "conv3" 150 | top: "conv4-2" 151 | param { 152 | lr_mult: 1 153 | decay_mult: 1 154 | } 155 | param { 156 | lr_mult: 2 157 | decay_mult: 0 158 | } 159 | convolution_param { 160 | num_output: 4 161 | kernel_size: 1 162 | stride: 1 163 | weight_filler { 164 | type: "xavier" 165 | } 166 | bias_filler { 167 | type: "constant" 168 | value: 0 169 | } 170 | } 171 | } 172 | layer { 173 | name: "prob1" 174 | type: "Softmax" 175 | bottom: "conv4-1" 176 | top: "prob1" 177 | } 178 | -------------------------------------------------------------------------------- /output/caffe_models/det2.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/output/caffe_models/det2.caffemodel -------------------------------------------------------------------------------- /output/caffe_models/det2.prototxt: -------------------------------------------------------------------------------- 1 | name: "RNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 24 6 | input_dim: 24 7 | 8 | 9 | ########################## 10 | ###################### 11 | layer { 12 | name: "conv1" 13 | type: "Convolution" 14 | bottom: "data" 15 | top: "conv1" 16 | param { 17 | lr_mult: 0 18 | decay_mult: 0 19 | } 20 | param { 21 | lr_mult: 0 22 | decay_mult: 0 23 | } 24 | convolution_param { 25 | num_output: 28 26 | kernel_size: 3 27 | stride: 1 28 | weight_filler { 29 | type: "xavier" 30 | } 31 | bias_filler { 32 | type: "constant" 33 | value: 0 34 | } 35 | } 36 | } 37 | layer { 38 | name: "prelu1" 39 | type: "PReLU" 40 | bottom: "conv1" 41 | top: "conv1" 42 | propagate_down: true 43 | } 44 | layer { 45 | name: "pool1" 46 | type: "Pooling" 47 | bottom: "conv1" 48 | top: "pool1" 49 | pooling_param { 50 | pool: MAX 51 | kernel_size: 3 52 | stride: 2 53 | } 54 | } 55 | 56 | layer { 57 | name: "conv2" 58 | type: "Convolution" 59 | bottom: "pool1" 60 | top: "conv2" 61 | param { 62 | lr_mult: 0 63 | decay_mult: 0 64 | } 65 | param { 66 | lr_mult: 0 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 48 71 | kernel_size: 3 72 | stride: 1 73 | weight_filler { 74 | type: "xavier" 75 | } 76 | bias_filler { 77 | type: "constant" 78 | value: 0 79 | } 80 | } 81 | } 82 | layer { 83 | name: "prelu2" 84 | type: "PReLU" 85 | bottom: "conv2" 86 | top: "conv2" 87 | propagate_down: true 88 | } 89 | layer { 90 | name: "pool2" 91 | type: "Pooling" 92 | bottom: "conv2" 93 | top: "pool2" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | #################################### 101 | 102 | ################################## 103 | layer { 104 | name: "conv3" 105 | type: "Convolution" 106 | bottom: "pool2" 107 | top: "conv3" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 64 118 | kernel_size: 2 119 | stride: 1 120 | weight_filler { 121 | type: "xavier" 122 | } 123 | bias_filler { 124 | type: "constant" 125 | value: 0 126 | } 127 | } 128 | } 129 | layer { 130 | name: "prelu3" 131 | type: "PReLU" 132 | bottom: "conv3" 133 | top: "conv3" 134 | propagate_down: true 135 | } 136 | ############################### 137 | 138 | ############################### 139 | 140 | layer { 141 | name: "conv4" 142 | type: "InnerProduct" 143 | bottom: "conv3" 144 | top: "conv4" 145 | param { 146 | lr_mult: 0 147 | decay_mult: 0 148 | } 149 | param { 150 | lr_mult: 0 151 | decay_mult: 0 152 | } 153 | inner_product_param { 154 | num_output: 128 155 | weight_filler { 156 | type: "xavier" 157 | } 158 | bias_filler { 159 | type: "constant" 160 | value: 0 161 | } 162 | } 163 | } 164 | layer { 165 | name: "prelu4" 166 | type: "PReLU" 167 | bottom: "conv4" 168 | top: "conv4" 169 | } 170 | 171 | layer { 172 | name: "conv5-1" 173 | type: "InnerProduct" 174 | bottom: "conv4" 175 | top: "conv5-1" 176 | param { 177 | lr_mult: 0 178 | decay_mult: 0 179 | } 180 | param { 181 | lr_mult: 0 182 | decay_mult: 0 183 | } 184 | inner_product_param { 185 | num_output: 2 186 | #kernel_size: 1 187 | #stride: 1 188 | weight_filler { 189 | type: "xavier" 190 | } 191 | bias_filler { 192 | type: "constant" 193 | value: 0 194 | } 195 | } 196 | } 197 | layer { 198 | name: "conv5-2" 199 | type: "InnerProduct" 200 | bottom: "conv4" 201 | top: "conv5-2" 202 | param { 203 | lr_mult: 1 204 | decay_mult: 1 205 | } 206 | param { 207 | lr_mult: 2 208 | decay_mult: 1 209 | } 210 | inner_product_param { 211 | num_output: 4 212 | #kernel_size: 1 213 | #stride: 1 214 | weight_filler { 215 | type: "xavier" 216 | } 217 | bias_filler { 218 | type: "constant" 219 | value: 0 220 | } 221 | } 222 | } 223 | layer { 224 | name: "prob1" 225 | type: "Softmax" 226 | bottom: "conv5-1" 227 | top: "prob1" 228 | } -------------------------------------------------------------------------------- /output/caffe_models/det3.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/output/caffe_models/det3.caffemodel -------------------------------------------------------------------------------- /output/caffe_models/det3.prototxt: -------------------------------------------------------------------------------- 1 | name: "ONet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 48 6 | input_dim: 48 7 | ################################## 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 1 20 | } 21 | convolution_param { 22 | num_output: 32 23 | kernel_size: 3 24 | stride: 1 25 | weight_filler { 26 | type: "xavier" 27 | } 28 | bias_filler { 29 | type: "constant" 30 | value: 0 31 | } 32 | } 33 | } 34 | layer { 35 | name: "prelu1" 36 | type: "PReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 3 48 | stride: 2 49 | } 50 | } 51 | layer { 52 | name: "conv2" 53 | type: "Convolution" 54 | bottom: "pool1" 55 | top: "conv2" 56 | param { 57 | lr_mult: 1 58 | decay_mult: 1 59 | } 60 | param { 61 | lr_mult: 2 62 | decay_mult: 1 63 | } 64 | convolution_param { 65 | num_output: 64 66 | kernel_size: 3 67 | stride: 1 68 | weight_filler { 69 | type: "xavier" 70 | } 71 | bias_filler { 72 | type: "constant" 73 | value: 0 74 | } 75 | } 76 | } 77 | 78 | layer { 79 | name: "prelu2" 80 | type: "PReLU" 81 | bottom: "conv2" 82 | top: "conv2" 83 | } 84 | layer { 85 | name: "pool2" 86 | type: "Pooling" 87 | bottom: "conv2" 88 | top: "pool2" 89 | pooling_param { 90 | pool: MAX 91 | kernel_size: 3 92 | stride: 2 93 | } 94 | } 95 | 96 | layer { 97 | name: "conv3" 98 | type: "Convolution" 99 | bottom: "pool2" 100 | top: "conv3" 101 | param { 102 | lr_mult: 1 103 | decay_mult: 1 104 | } 105 | param { 106 | lr_mult: 2 107 | decay_mult: 1 108 | } 109 | convolution_param { 110 | num_output: 64 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | } 115 | bias_filler { 116 | type: "constant" 117 | value: 0 118 | } 119 | } 120 | } 121 | layer { 122 | name: "prelu3" 123 | type: "PReLU" 124 | bottom: "conv3" 125 | top: "conv3" 126 | } 127 | layer { 128 | name: "pool3" 129 | type: "Pooling" 130 | bottom: "conv3" 131 | top: "pool3" 132 | pooling_param { 133 | pool: MAX 134 | kernel_size: 2 135 | stride: 2 136 | } 137 | } 138 | layer { 139 | name: "conv4" 140 | type: "Convolution" 141 | bottom: "pool3" 142 | top: "conv4" 143 | param { 144 | lr_mult: 1 145 | decay_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | decay_mult: 1 150 | } 151 | convolution_param { 152 | num_output: 128 153 | kernel_size: 2 154 | weight_filler { 155 | type: "xavier" 156 | } 157 | bias_filler { 158 | type: "constant" 159 | value: 0 160 | } 161 | } 162 | } 163 | layer { 164 | name: "prelu4" 165 | type: "PReLU" 166 | bottom: "conv4" 167 | top: "conv4" 168 | } 169 | 170 | 171 | layer { 172 | name: "conv5" 173 | type: "InnerProduct" 174 | bottom: "conv4" 175 | top: "conv5" 176 | param { 177 | lr_mult: 1 178 | decay_mult: 1 179 | } 180 | param { 181 | lr_mult: 2 182 | decay_mult: 1 183 | } 184 | inner_product_param { 185 | #kernel_size: 3 186 | num_output: 256 187 | weight_filler { 188 | type: "xavier" 189 | } 190 | bias_filler { 191 | type: "constant" 192 | value: 0 193 | } 194 | } 195 | } 196 | 197 | layer { 198 | name: "drop5" 199 | type: "Dropout" 200 | bottom: "conv5" 201 | top: "conv5" 202 | dropout_param { 203 | dropout_ratio: 0.25 204 | } 205 | } 206 | layer { 207 | name: "prelu5" 208 | type: "PReLU" 209 | bottom: "conv5" 210 | top: "conv5" 211 | } 212 | 213 | 214 | layer { 215 | name: "conv6-1" 216 | type: "InnerProduct" 217 | bottom: "conv5" 218 | top: "conv6-1" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 1 226 | } 227 | inner_product_param { 228 | #kernel_size: 1 229 | num_output: 2 230 | weight_filler { 231 | type: "xavier" 232 | } 233 | bias_filler { 234 | type: "constant" 235 | value: 0 236 | } 237 | } 238 | } 239 | layer { 240 | name: "conv6-2" 241 | type: "InnerProduct" 242 | bottom: "conv5" 243 | top: "conv6-2" 244 | param { 245 | lr_mult: 1 246 | decay_mult: 1 247 | } 248 | param { 249 | lr_mult: 2 250 | decay_mult: 1 251 | } 252 | inner_product_param { 253 | #kernel_size: 1 254 | num_output: 4 255 | weight_filler { 256 | type: "xavier" 257 | } 258 | bias_filler { 259 | type: "constant" 260 | value: 0 261 | } 262 | } 263 | } 264 | layer { 265 | name: "conv6-3" 266 | type: "InnerProduct" 267 | bottom: "conv5" 268 | top: "conv6-3" 269 | param { 270 | lr_mult: 1 271 | decay_mult: 1 272 | } 273 | param { 274 | lr_mult: 2 275 | decay_mult: 1 276 | } 277 | inner_product_param { 278 | #kernel_size: 1 279 | num_output: 10 280 | weight_filler { 281 | type: "xavier" 282 | } 283 | bias_filler { 284 | type: "constant" 285 | value: 0 286 | } 287 | } 288 | } 289 | layer { 290 | name: "prob1" 291 | type: "Softmax" 292 | bottom: "conv6-1" 293 | top: "prob1" 294 | } 295 | -------------------------------------------------------------------------------- /output/caffe_models/det4.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/output/caffe_models/det4.caffemodel -------------------------------------------------------------------------------- /scripts/convert_caffe_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | The purpose of this script is to convert pretrained weights taken from 3 | official implementation here: 4 | https://github.com/kpzhang93/MTCNN_face_detection_alignment/tree/master/code/codes/MTCNNv2 5 | to required format. 6 | In a nutshell, it just renames and transposes some of the weights. 7 | You don't have to use this script because weights are already in `src/weights`. 8 | """ 9 | import os 10 | import argparse 11 | import caffe 12 | import numpy as np 13 | 14 | def get_all_weights(net): 15 | all_weights = {} 16 | for p in net.params: 17 | if 'conv' in p: 18 | name = 'body.' + p 19 | if '-' in p: 20 | if '-1' in p: 21 | s = 'cls.' + p 22 | elif '-2' in p: 23 | s = 'box_offset.' + p 24 | else: 25 | s = 'landmarks.' + p 26 | 27 | all_weights[s + '.weight'] = net.params[p][0].data 28 | all_weights[s + '.bias'] = net.params[p][1].data 29 | elif len(net.params[p][0].data.shape) == 4: # Conv layers in body 30 | all_weights[name + '.weight'] = net.params[p][0].data.transpose((0, 1, 3, 2)) 31 | all_weights[name + '.bias'] = net.params[p][1].data 32 | else: # Linear layer in body 33 | all_weights[name + '.weight'] = net.params[p][0].data 34 | all_weights[name + '.bias'] = net.params[p][1].data 35 | elif 'prelu' in p.lower(): # Prelu layers 36 | all_weights['body.' + p.lower() + '.weight'] = net.params[p][0].data 37 | return all_weights 38 | 39 | 40 | def convert(caffe_model_folder, output_folder): 41 | # P-Net 42 | net = caffe.Net(os.path.join(caffe_model_folder, 'det1.prototxt'), os.path.join(caffe_model_folder, 'det1.caffemodel'), caffe.TEST) 43 | np.save(os.path.join(output_folder, 'pnet.npy'), get_all_weights(net)) 44 | 45 | # R-Net 46 | net = caffe.Net(os.path.join(caffe_model_folder, 'det2.prototxt'), os.path.join(caffe_model_folder, 'det2.caffemodel'), caffe.TEST) 47 | np.save(os.path.join(output_folder, 'rnet.npy'), get_all_weights(net)) 48 | 49 | # O-Net 50 | net = caffe.Net(os.path.join(caffe_model_folder, 'det3.prototxt'), os.path.join(caffe_model_folder, 'det3.caffemodel'), caffe.TEST) 51 | np.save(os.path.join(output_folder, 'onet.npy'), get_all_weights(net)) 52 | 53 | if __name__ == "__main__": 54 | parser = argparse.ArgumentParser(description='Extract weight from caffe model.') 55 | parser.add_argument('--caffe_model_folder', help='Path to caffe model (det1, det2, det3, det4)') 56 | parser.add_argument('--output_folder', help='Path to storing extracted weights.') 57 | args = parser.parse_args() 58 | convert(args.caffe_model_folder, args.output_folder) -------------------------------------------------------------------------------- /scripts/detect_on_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import numpy as np 5 | import argparse 6 | import mtcnn 7 | 8 | 9 | parser = argparse.ArgumentParser(description='this is a description') 10 | parser.add_argument('--video_path', type=str, 11 | default=None, help="Read from video.") 12 | parser.add_argument('--saved_path', type=str, default=None, 13 | help="If set, Save as video. Or show it on screen.") 14 | parser.add_argument("--minsize", type=int, default=24, 15 | help="Min size of faces you want to detect. Larger number will speed up detect method.") 16 | parser.add_argument("--device", type=str, default='cpu', 17 | help="Target device to process video.") 18 | parser.add_argument("--model_dir", type=str, default="", help="There are pre-trained pnet, rnet, onet in this folder.") 19 | 20 | args = parser.parse_args() 21 | 22 | if args.model_dir == '': 23 | pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') 24 | else: 25 | pnet, rnet, onet = mtcnn.get_net(args.model_dir) 26 | 27 | detector = mtcnn.FaceDetector(pnet, rnet, onet, device=args.device) 28 | 29 | fourcc = cv2.VideoWriter_fourcc(*"XVID") 30 | 31 | cap = cv2.VideoCapture(args.video_path) 32 | fps = cap.get(cv2.CAP_PROP_FPS) 33 | size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), 34 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) 35 | 36 | if args.saved_path is not None: 37 | out = cv2.VideoWriter(args.saved_path, fourcc, fps, size) 38 | 39 | while True: 40 | 41 | res, image = cap.read() 42 | if not res: 43 | break 44 | 45 | boxes, landmarks = detector.detect(image, minsize=args.minsize) 46 | 47 | image = mtcnn.utils.draw.draw_boxes2(image, boxes) 48 | image = mtcnn.utils.draw.batch_draw_landmarks(image, landmarks) 49 | 50 | if args.saved_path is None: 51 | cv2.imshow("asdfas", image) 52 | cv2.waitKey(1) 53 | else: 54 | out.write(image) 55 | 56 | if args.saved_path is not None: 57 | out.release() 58 | -------------------------------------------------------------------------------- /scripts/gen_onet_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | import mtcnn.train.gen_onet_train as gotd 5 | import mtcnn.train.gen_landmark as gl 6 | from mtcnn.datasets import get_by_name 7 | from mtcnn.network.mtcnn_pytorch import PNet, RNet 8 | 9 | 10 | parser = argparse.ArgumentParser( 11 | description='Generate training data for onet.') 12 | parser.add_argument('-pm', type=str, dest="pnet_model_file", help="Pre-trained pnet model file.") 13 | parser.add_argument('-rm', type=str, dest="rnet_model_file", help="Pre-trained rnet model file.") 14 | parser.add_argument('-o', dest="output_folder", default="output/data_train", type=str, help="Folder to save training data for onet.") 15 | parser.add_argument("-d", dest="detection_dataset",type=str, default="WiderFace", 16 | help="Face Detection dataset name.") 17 | parser.add_argument("-l", type=str, dest="landmarks_dataset", default="CelebA", 18 | help="Landmark localization dataset name.") 19 | args = parser.parse_args() 20 | 21 | landmarks_dataset = get_by_name(args.landmarks_dataset) 22 | landmarks_meta = landmarks_dataset.get_train_meta() 23 | landmarks_eval_meta = landmarks_dataset.get_val_meta() 24 | 25 | print("Start generate landmarks training data for onet.") 26 | gl.gen_landmark_data(landmarks_meta, 48, args.output_folder, argument=True, suffix='onet') 27 | print("Done") 28 | print("Start generate landmarks eval data for onet.") 29 | gl.gen_landmark_data(landmarks_eval_meta, 48, args.output_folder, argument=True, suffix='onet_eval') 30 | print("Done") 31 | 32 | # load pre-trained pnet 33 | print("Loading pre-trained pnet.") 34 | device = 'cuda:0' if torch.cuda.is_available() else 'cpu' 35 | pnet = PNet(device=device, is_train=False) 36 | pnet.load(args.pnet_model_file) 37 | rnet = RNet(device=device, is_train=False) 38 | rnet.load(args.rnet_model_file) 39 | 40 | detection_dataset = get_by_name(args.detection_dataset) 41 | detection_meta = detection_dataset.get_train_meta() 42 | detection_eval_meta = detection_dataset.get_val_meta() 43 | print("Start generate classification and bounding box regression training data.") 44 | gotd.generate_training_data_for_onet(pnet, rnet, detection_meta, args.output_folder, crop_size=48, suffix='onet') 45 | print("Done") 46 | 47 | print("Start generate classification and bounding box regression eval data.") 48 | gotd.generate_training_data_for_onet(pnet, rnet, detection_eval_meta, args.output_folder, crop_size=48, suffix='onet_eval') 49 | print("Done") 50 | -------------------------------------------------------------------------------- /scripts/gen_pnet_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mtcnn.train.gen_pnet_train as gptd 4 | import mtcnn.train.gen_landmark as gl 5 | from mtcnn.datasets import get_by_name 6 | 7 | 8 | parser = argparse.ArgumentParser( 9 | description='Generate training data for pnet.') 10 | parser.add_argument('-o', dest="output_folder", default="output/data_train", type=str, help="Folder to save training data for pnet.") 11 | parser.add_argument("-d", dest="detection_dataset",type=str, default="WiderFace", 12 | help="Face Detection dataset name.") 13 | parser.add_argument("-l", type=str, dest="landmarks_dataset", default="CelebA", 14 | help="Landmark localization dataset name.") 15 | args = parser.parse_args() 16 | 17 | landmarks_dataset = get_by_name(args.landmarks_dataset) 18 | landmarks_meta = landmarks_dataset.get_train_meta() 19 | landmarks_eval_meta = landmarks_dataset.get_val_meta() 20 | 21 | print("Start generate landmarks training data for pnet.") 22 | gl.gen_landmark_data(landmarks_meta, 12, args.output_folder, argument=False, suffix='pnet') 23 | print("Done") 24 | print("Start generate landmarks eval data for pnet.") 25 | gl.gen_landmark_data(landmarks_eval_meta, 12, args.output_folder, argument=False, suffix='pnet_eval') 26 | print("Done") 27 | 28 | detection_dataset = get_by_name(args.detection_dataset) 29 | detection_meta = detection_dataset.get_train_meta() 30 | detection_eval_meta = detection_dataset.get_val_meta() 31 | print("Start generate classification and bounding box regression training data.") 32 | gptd.generate_training_data_for_pnet(detection_meta, output_folder=args.output_folder, suffix='pnet') 33 | print("Done") 34 | 35 | print("Start generate classification and bounding box regression eval data.") 36 | gptd.generate_training_data_for_pnet(detection_eval_meta, output_folder=args.output_folder, suffix='pnet_eval') 37 | print("Done") 38 | -------------------------------------------------------------------------------- /scripts/gen_rnet_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | import mtcnn.train.gen_rnet_train as grtd 5 | import mtcnn.train.gen_landmark as gl 6 | from mtcnn.datasets import get_by_name 7 | from mtcnn.network.mtcnn_pytorch import PNet 8 | 9 | 10 | parser = argparse.ArgumentParser( 11 | description='Generate training data for rnet.') 12 | parser.add_argument('-m', type=str, dest="model_file", help="Pre-trained model file.") 13 | parser.add_argument('-o', dest="output_folder", default="output/data_train", type=str, help="Folder to save training data for rnet.") 14 | parser.add_argument("-d", dest="detection_dataset",type=str, default="WiderFace", 15 | help="Face Detection dataset name.") 16 | parser.add_argument("-l", type=str, dest="landmarks_dataset", default="CelebA", 17 | help="Landmark localization dataset name.") 18 | args = parser.parse_args() 19 | 20 | landmarks_dataset = get_by_name(args.landmarks_dataset) 21 | landmarks_meta = landmarks_dataset.get_train_meta() 22 | landmarks_eval_meta = landmarks_dataset.get_val_meta() 23 | 24 | print("Start generate landmarks training data for rnet.") 25 | gl.gen_landmark_data(landmarks_meta, 24, args.output_folder, argument=False, suffix='rnet') 26 | print("Done") 27 | print("Start generate landmarks eval data for rnet.") 28 | gl.gen_landmark_data(landmarks_eval_meta, 24, args.output_folder, argument=False, suffix='rnet_eval') 29 | print("Done") 30 | 31 | # load pre-trained pnet 32 | print("Loading pre-trained pnet.") 33 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 34 | pnet = PNet(device=device, is_train=False) 35 | pnet.load(args.model_file) 36 | 37 | detection_dataset = get_by_name(args.detection_dataset) 38 | detection_meta = detection_dataset.get_train_meta() 39 | detection_eval_meta = detection_dataset.get_val_meta() 40 | print("Start generate classification and bounding box regression training data.") 41 | grtd.generate_training_data_for_rnet(pnet, detection_meta, args.output_folder, crop_size=24, suffix='rnet') 42 | print("Done") 43 | 44 | print("Start generate classification and bounding box regression eval data.") 45 | grtd.generate_training_data_for_rnet(pnet, detection_eval_meta, args.output_folder, crop_size=24, suffix='rnet_eval') 46 | print("Done") 47 | -------------------------------------------------------------------------------- /scripts/track_on_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import numpy as np 5 | import argparse 6 | import mtcnn 7 | 8 | 9 | parser = argparse.ArgumentParser(description='this is a description') 10 | parser.add_argument('--video_path', type=str, help="Read from video.") 11 | parser.add_argument('--output_folder', type=str, help="Save the tracking result.") 12 | parser.add_argument('--saved_path', type=str, default=None, 13 | help="If set, Save as video. Or show it on screen.") 14 | parser.add_argument("--minsize", type=int, default=24, 15 | help="Min size of faces you want to detect. Larger number will speed up detect method.") 16 | parser.add_argument('--min_interval', type=int, default=3, help="See FaceTracker.") 17 | parser.add_argument("--device", type=str, default='cpu', 18 | help="Target device to process video.") 19 | 20 | args = parser.parse_args() 21 | 22 | pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') 23 | detector = mtcnn.FaceDetector(pnet, rnet, onet, device=args.device) 24 | tracker = mtcnn.FaceTracker(detector, min_interval=args.min_interval) 25 | tracker.set_detect_params(minsize=args.minsize) 26 | 27 | fourcc = cv2.VideoWriter_fourcc(*"XVID") 28 | 29 | cap = cv2.VideoCapture(args.video_path) 30 | fps = cap.get(cv2.CAP_PROP_FPS) 31 | size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), 32 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) 33 | 34 | if args.saved_path is not None: 35 | out = cv2.VideoWriter(args.saved_path, fourcc, fps, size) 36 | 37 | while True: 38 | 39 | res, image = cap.read() 40 | if not res: 41 | break 42 | 43 | boxes, landmarks = tracker.track(image) 44 | 45 | image = mtcnn.utils.draw.draw_boxes2(image, boxes) 46 | # image = mtcnn.utils.draw.draw_boxes2(image, tracker.boxes_cache) 47 | image = mtcnn.utils.draw.batch_draw_landmarks(image, landmarks) 48 | 49 | if args.saved_path is None: 50 | cv2.imshow("asdfas", image) 51 | cv2.waitKey(1) 52 | else: 53 | out.write(image) 54 | 55 | for k, v in tracker.get_cache().items(): 56 | if len(v) < 5: 57 | continue 58 | saved_dir = os.path.join(args.output_folder, str(k)) 59 | if not os.path.isdir(saved_dir): 60 | os.makedirs(saved_dir) 61 | for i, img in enumerate(v): 62 | cv2.imwrite(os.path.join(saved_dir, "%d.jpg" % i), img) 63 | 64 | 65 | if args.saved_path is not None: 66 | out.release() 67 | -------------------------------------------------------------------------------- /scripts/train_onet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from mtcnn.train.train_net import Trainer 3 | 4 | parser = argparse.ArgumentParser( 5 | description='Generate training data for onet.') 6 | parser.add_argument('-e', dest='epoch', type=int) 7 | parser.add_argument('-b', dest='batch_size', type=int) 8 | parser.add_argument('-o', dest="output_filename", help="Path to save the model.") 9 | parser.add_argument('-d', dest="data_train", default="output/data_train", type=str, help="Folder that save training data for onet.") 10 | parser.add_argument('-dv', dest="device", default='cpu', type=str, help="'gpu', 'cuda:0' and so on.") 11 | parser.add_argument('-r', dest="resume", default=False, type=bool, help="If resume from latest checkpoint.") 12 | 13 | 14 | args = parser.parse_args() 15 | 16 | trainer = Trainer('onet', device=args.device, log_dir='./runs/onet/', resume=args.resume, output_folder='./runs/onet') 17 | trainer.train(args.epoch, args.batch_size, args.data_train) 18 | trainer.export_model(args.output_filename) -------------------------------------------------------------------------------- /scripts/train_pnet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from mtcnn.train.train_net import Trainer 3 | 4 | parser = argparse.ArgumentParser( 5 | description='Generate training data for pnet.') 6 | parser.add_argument('-e', dest='epoch', type=int) 7 | parser.add_argument('-b', dest='batch_size', type=int) 8 | parser.add_argument('-o', dest="output_filename", help="Path to save the model.") 9 | parser.add_argument('-d', dest="data_train", default="output/data_train", type=str, help="Folder that save training data for pnet.") 10 | parser.add_argument('-dv', dest="device", default='cpu', type=str, help="'gpu', 'cuda:0' and so on.") 11 | parser.add_argument('-r', dest="resume", default=False, type=bool, help="If resume from latest checkpoint.") 12 | 13 | 14 | args = parser.parse_args() 15 | 16 | trainer = Trainer('pnet', device='cuda:0', log_dir='./runs/pnet/', resume=args.resume) 17 | trainer.train(args.epoch, args.batch_size, args.data_train) 18 | trainer.export_model(args.output_filename) 19 | -------------------------------------------------------------------------------- /scripts/train_rnet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from mtcnn.train.train_net import Trainer 3 | 4 | parser = argparse.ArgumentParser( 5 | description='Generate training data for rnet.') 6 | parser.add_argument('-e', dest='epoch', type=int) 7 | parser.add_argument('-b', dest='batch_size', type=int) 8 | parser.add_argument('-o', dest="output_filename", help="Path to save the model.") 9 | parser.add_argument('-d', dest="data_train", default="output/data_train", type=str, help="Folder that save training data for rnet.") 10 | parser.add_argument('-dv', dest="device", default='cpu', type=str, help="'gpu', 'cuda:0' and so on.") 11 | parser.add_argument('-r', dest="resume", default=False, type=bool, help="If resume from latest checkpoint.") 12 | 13 | 14 | args = parser.parse_args() 15 | 16 | trainer = Trainer('rnet', device=args.device, log_dir='./runs/rnet/', resume=args.resume, output_folder='./runs/rnet') 17 | trainer.train(args.epoch, args.batch_size, args.data_train) 18 | trainer.export_model(args.output_filename) 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from os.path import join as pjoin 4 | import numpy as np 5 | from distutils.core import setup 6 | from distutils.extension import Extension 7 | from Cython.Distutils import build_ext 8 | from Cython.Build import cythonize 9 | 10 | USE_GPU = True 11 | 12 | if "--disable_gpu" in sys.argv: 13 | USE_GPU = False 14 | sys.argv.remove("--disable_gpu") 15 | 16 | 17 | def find_in_path(name, path): 18 | "Find a file in a search path" 19 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', 45 | os.environ['PATH'] + os.pathsep + default_path) 46 | if nvcc is None: 47 | raise EnvironmentError( 48 | 'The nvcc binary could not be ' 49 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME' 50 | ) 51 | home = os.path.dirname(os.path.dirname(nvcc)) 52 | 53 | cudaconfig = { 54 | 'home': home, 55 | 'nvcc': nvcc, 56 | 'include': pjoin(home, 'include'), 57 | 'lib64': pjoin(home, 'lib64') 58 | } 59 | for k, v in cudaconfig.items(): 60 | if not os.path.exists(v): 61 | raise EnvironmentError( 62 | 'The CUDA %s path could not be located in %s' % (k, v)) 63 | 64 | return cudaconfig 65 | 66 | 67 | CUDA = locate_cuda() if USE_GPU else None 68 | 69 | # Obtain the numpy include directory. This logic works across numpy versions. 70 | try: 71 | numpy_include = np.get_include() 72 | except AttributeError: 73 | numpy_include = np.get_numpy_include() 74 | 75 | 76 | def customize_compiler_for_nvcc(self): 77 | """inject deep into distutils to customize how the dispatch 78 | to gcc/nvcc works. 79 | 80 | If you subclass UnixCCompiler, it's not trivial to get your subclass 81 | injected in, and still have the right customizations (i.e. 82 | distutils.sysconfig.customize_compiler) run on it. So instead of going 83 | the OO route, I have this. Note, it's kindof like a wierd functional 84 | subclassing going on.""" 85 | 86 | # tell the compiler it can processes .cu 87 | self.src_extensions.append('.cu') 88 | 89 | # save references to the default compiler_so and _comple methods 90 | default_compiler_so = self.compiler_so 91 | super = self._compile 92 | 93 | # now redefine the _compile method. This gets executed for each 94 | # object but distutils doesn't have the ability to change compilers 95 | # based on source extension: we add it. 96 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 97 | print(extra_postargs) 98 | if os.path.splitext(src)[1] == '.cu': 99 | # use the cuda for .cu files 100 | self.set_executable('compiler_so', CUDA['nvcc']) 101 | # use only a subset of the extra_postargs, which are 1-1 translated 102 | # from the extra_compile_args in the Extension class 103 | postargs = extra_postargs['nvcc'] 104 | else: 105 | postargs = extra_postargs['gcc'] 106 | 107 | super(obj, src, ext, cc_args, postargs, pp_opts) 108 | # reset the default compiler_so, which we might have changed for cuda 109 | self.compiler_so = default_compiler_so 110 | 111 | # inject our redefined _compile method into the class 112 | self._compile = _compile 113 | 114 | 115 | # run the customize_compiler 116 | class custom_build_ext(build_ext): 117 | def build_extensions(self): 118 | customize_compiler_for_nvcc(self.compiler) 119 | build_ext.build_extensions(self) 120 | 121 | 122 | ext_modules = [ 123 | Extension("mtcnn.utils.nms.cpu_nms", ["mtcnn/utils/nms/cpu_nms.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs=[numpy_include]), 126 | ] 127 | 128 | if USE_GPU: 129 | gpu_extention = Extension( 130 | 'mtcnn.utils.nms.gpu_nms', 131 | ['mtcnn/utils/nms/nms_kernel.cu', 'mtcnn/utils/nms/gpu_nms.pyx'], 132 | library_dirs=[CUDA['lib64']], 133 | libraries=['cudart'], 134 | language='c++', 135 | runtime_library_dirs=[CUDA['lib64']], 136 | # this syntax is specific to this build system 137 | # we're only going to use certain compiler args with nvcc and not with gcc 138 | # the implementation of this trick is in customize_compiler() below 139 | extra_compile_args={ 140 | 'gcc': ["-Wno-unused-function"], 141 | 'nvcc': [ 142 | '-arch=sm_52', '--ptxas-options=-v', '-c', 143 | '--compiler-options', "'-fPIC'" 144 | ] 145 | }, 146 | include_dirs=[numpy_include, CUDA['include']]) 147 | 148 | ext_modules.append(gpu_extention) 149 | 150 | 151 | def package_files(directory): 152 | paths = [] 153 | for (path, directories, filenames) in os.walk(directory): 154 | for filename in filenames: 155 | paths.append(os.path.join('..', path, filename)) 156 | return paths 157 | 158 | 159 | extra_files = package_files('mtcnn') 160 | 161 | setup( 162 | name="torch_mtcnn", 163 | version="0.1", 164 | description= 165 | 'MTCNN pytorch implementation. Joint training and detecting together.', 166 | author='HanBing', 167 | author_email='beatmight@gmail.com', 168 | packages=[ 169 | 'mtcnn', 'mtcnn.datasets', 'mtcnn.deploy', 'mtcnn.network', 170 | 'mtcnn.train', 'mtcnn.utils', 'mtcnn.utils.nms' 171 | ], 172 | ext_modules=ext_modules, 173 | # inject our custom trigger 174 | cmdclass={'build_ext': custom_build_ext}, 175 | package_data={'': extra_files}) 176 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/__init__.py -------------------------------------------------------------------------------- /tests/asset/images/audrey.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/images/audrey.jpg -------------------------------------------------------------------------------- /tests/asset/images/bksomels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/images/bksomels.jpg -------------------------------------------------------------------------------- /tests/asset/images/gpripe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/images/gpripe.jpg -------------------------------------------------------------------------------- /tests/asset/images/office5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/images/office5.jpg -------------------------------------------------------------------------------- /tests/asset/images/roate.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/images/roate.jpg -------------------------------------------------------------------------------- /tests/asset/video/track.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tests/asset/video/track.mp4 -------------------------------------------------------------------------------- /tests/test_align.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases for mtcnn.deploy.detect.py 3 | """ 4 | import os 5 | import time 6 | import cv2 7 | import unittest 8 | 9 | import numpy as np 10 | import mtcnn.deploy.detect as detect 11 | import mtcnn.utils.draw as draw 12 | 13 | from mtcnn.deploy import get_net_caffe 14 | from mtcnn.deploy.align import align_multi, filter_side_face 15 | 16 | here = os.path.dirname(__file__) 17 | 18 | 19 | class TestDetection(unittest.TestCase): 20 | 21 | def setUp(self): 22 | pnet, rnet, onet = get_net_caffe("output/converted") 23 | 24 | self.detector = detect.FaceDetector(pnet, rnet, onet) 25 | self.test_img = os.path.join(here, 'asset/images/office5.jpg') 26 | 27 | def test_detection(self): 28 | img = cv2.imread(self.test_img) 29 | boxes, landmarks = self.detector.detect(self.test_img) 30 | boxes, faces = align_multi(img, boxes, landmarks, (92, 112)) 31 | 32 | for face in faces: 33 | cv2.imshow("Aligned Faces", face) 34 | cv2.waitKey(0) 35 | 36 | def test_filter_side_face(self): 37 | img = cv2.imread(self.test_img) 38 | boxes, landmarks = self.detector.detect(self.test_img) 39 | 40 | mask = filter_side_face(boxes, landmarks) 41 | boxes, faces = align_multi(img, boxes, landmarks, (92, 112)) 42 | 43 | for m, face in zip(mask, faces): 44 | if m == 0: 45 | cv2.imshow("Aligned Faces", face) 46 | cv2.waitKey(0) 47 | 48 | -------------------------------------------------------------------------------- /tests/test_batch_detection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases for mtcnn.deploy.detect.py 3 | """ 4 | import os 5 | import time 6 | import cv2 7 | import torch 8 | import unittest 9 | 10 | import numpy as np 11 | import mtcnn.deploy.batch_detect as detect 12 | import mtcnn.network.mtcnn_pytorch as mtcnn 13 | import mtcnn.utils.draw as draw 14 | 15 | here = os.path.dirname(__file__) 16 | 17 | 18 | class TestDetection(unittest.TestCase): 19 | 20 | def setUp(self): 21 | weight_folder = os.path.join(here, '../output/converted') 22 | 23 | pnet = mtcnn.PNet() 24 | rnet = mtcnn.RNet() 25 | onet = mtcnn.ONet() 26 | 27 | pnet.load_caffe_model( 28 | np.load(os.path.join(weight_folder, 'pnet.npy'))[()]) 29 | rnet.load_caffe_model( 30 | np.load(os.path.join(weight_folder, 'rnet.npy'))[()]) 31 | onet.load_caffe_model( 32 | np.load(os.path.join(weight_folder, 'onet.npy'))[()]) 33 | 34 | self.detector = detect.BatchImageDetector(pnet, rnet, onet) 35 | self.batch_imgs = [os.path.join(here, 'asset/images/office5.jpg')] * 10 36 | 37 | def test_stage_one(self): 38 | imgs = [cv2.imread(img) for img in self.batch_imgs] 39 | 40 | norm_imgs = self.detector._preprocess(imgs) 41 | stage_one_boxes = self.detector.stage_one( 42 | norm_imgs, 0.6, 0.707, 12, 0.7) 43 | img_labels = stage_one_boxes[:, -1] 44 | 45 | for i, img in enumerate(imgs): 46 | mask = img_labels == i 47 | boxes = stage_one_boxes[mask] 48 | draw.draw_boxes2(img, boxes) 49 | cv2.imshow('Stage One Boxes', img) 50 | cv2.waitKey(0) 51 | 52 | def test_stage_two(self): 53 | # Running this test case after 'test_stage_one' passed. 54 | imgs = [cv2.imread(img) for img in self.batch_imgs] 55 | 56 | norm_imgs = self.detector._preprocess(imgs) 57 | stage_one_boxes = self.detector.stage_one(norm_imgs, 0.6, 0.707, 12, 0.7) 58 | stage_two_boxes = self.detector.stage_two(norm_imgs, stage_one_boxes, 0.7, 0.7) 59 | img_labels = stage_two_boxes[:, -1] 60 | 61 | for i, img in enumerate(imgs): 62 | mask = img_labels == i 63 | boxes = stage_two_boxes[mask] 64 | draw.draw_boxes2(img, boxes) 65 | cv2.imshow('Stage One Boxes', img) 66 | cv2.waitKey(0) 67 | 68 | def test_stage_three(self): 69 | # Running this test case after 'test_stage_one' passed. 70 | imgs = [cv2.imread(img) for img in self.batch_imgs] 71 | 72 | norm_imgs = self.detector._preprocess(imgs) 73 | stage_one_boxes = self.detector.stage_one(norm_imgs, 0.6, 0.707, 12, 0.7) 74 | stage_two_boxes = self.detector.stage_two(norm_imgs, stage_one_boxes, 0.7, 0.7) 75 | stage_three_boxes, landmarks = self.detector.stage_three(norm_imgs, stage_two_boxes, 0.7, 0.3) 76 | img_labels = stage_three_boxes[:, -1] 77 | 78 | for i, img in enumerate(imgs): 79 | mask = img_labels == i 80 | boxes = stage_three_boxes[mask] 81 | draw.draw_boxes2(img, boxes) 82 | cv2.imshow('Stage One Boxes', img) 83 | cv2.waitKey(0) 84 | 85 | def test_performance(self): 86 | imgs = [cv2.imread(img) for img in self.batch_imgs] 87 | start = time.time() 88 | self.detector.detect(imgs) 89 | end = time.time() 90 | avg_time = (end - start) / len(imgs) 91 | print("Average time per image is %fs." % avg_time) -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test case for data.py 3 | """ 4 | import os 5 | import unittest 6 | 7 | import mtcnn.train.data as data 8 | 9 | here = os.path.dirname(__file__) 10 | 11 | 12 | class TestData(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.output_folder = os.path.join(here, '../output/test/') 16 | self.net_stage = 'pnet' 17 | self.batch_size = 128 18 | 19 | def test_data(self): 20 | dataset = data.MtcnnDataset(self.output_folder, self.net_stage, self.batch_size) 21 | 22 | for batch in dataset.get_iter(): 23 | self.assertEqual(len(batch), 4) 24 | print(1) 25 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases for datasets module. 3 | """ 4 | import os 5 | import cv2 6 | import sys 7 | import unittest 8 | import random 9 | import numpy as np 10 | 11 | from mtcnn.datasets import get_by_name 12 | from mtcnn.utils import draw 13 | 14 | here = os.path.dirname(__file__) 15 | 16 | 17 | class TestWiderFace(unittest.TestCase): 18 | def setUp(self): 19 | self.datasets = get_by_name("WiderFace") 20 | self.output_folder = os.path.join(here, '../output/test/wider_face') 21 | 22 | def test_get_widerface_train(self): 23 | ret = self.datasets.get_train_meta() 24 | 25 | for item in ret: 26 | self.assertIn('file_name', item) 27 | self.assertTrue(os.path.exists(item.get('file_name'))) 28 | self.assertIn('num_bb', item) 29 | self.assertIn('meta_data', item) 30 | self.assertEqual(len(item.get('meta_data')), item.get('num_bb')) 31 | 32 | # Sameple 10 image and save them in output/test/wider_face folder 33 | examples = random.choices(ret, k=10) 34 | if not os.path.isdir(self.output_folder): 35 | os.makedirs(self.output_folder) 36 | 37 | for i, item in enumerate(examples): 38 | img = cv2.imread(item['file_name']) 39 | boxes = np.array(item['meta_data'])[:, :4] 40 | draw.draw_boxes(img, boxes) 41 | saved_path = os.path.join(self.output_folder, "%d.jpg" % i) 42 | cv2.imwrite(saved_path, img) 43 | 44 | def test_get_widerface_test(self): 45 | ret = self.datasets.get_test_meta() 46 | 47 | for item in ret: 48 | self.assertTrue(os.path.exists(item)) 49 | 50 | 51 | class TestCelebA(unittest.TestCase): 52 | 53 | def setUp(self): 54 | self.datasets = get_by_name("CelebA") 55 | self.output_folder = os.path.join(here, '../output/test/celeba') 56 | 57 | def test_get_celeba_train(self): 58 | ret = self.datasets.get_train_meta() 59 | 60 | for item in ret: 61 | self.assertIn('file_name', item) 62 | if not os.path.exists(item['file_name']): 63 | print(1) 64 | self.assertTrue(os.path.exists(item['file_name'])) 65 | self.assertIn('num_bb', item) 66 | self.assertIn('meta_data', item) 67 | self.assertEqual(len(item.get('meta_data')), item.get('num_bb')) 68 | 69 | # Sameple 10 image and save them in output/test/wider_face folder 70 | examples = random.choices(ret, k=10) 71 | if not os.path.isdir(self.output_folder): 72 | os.makedirs(self.output_folder) 73 | 74 | for i, item in enumerate(examples): 75 | img = cv2.imread(item['file_name']) 76 | 77 | boxes = np.array(item['meta_data'])[:, :4] 78 | draw.draw_boxes(img, boxes) 79 | 80 | landmarks = np.reshape(np.array(item['landmarks']), (-1, 2)) 81 | draw.draw_landmarks(img, landmarks) 82 | 83 | saved_path = os.path.join(self.output_folder, "%d.jpg" % i) 84 | cv2.imwrite(saved_path, img) 85 | -------------------------------------------------------------------------------- /tests/test_detection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases for mtcnn.deploy.detect.py 3 | """ 4 | import os 5 | import time 6 | import cv2 7 | import unittest 8 | 9 | import numpy as np 10 | import mtcnn.deploy.detect as detect 11 | import mtcnn.network.mtcnn_pytorch as mtcnn 12 | import mtcnn.utils.draw as draw 13 | 14 | here = os.path.dirname(__file__) 15 | 16 | 17 | class TestDetection(unittest.TestCase): 18 | 19 | def setUp(self): 20 | weight_folder = os.path.join(here, '../output/converted') 21 | 22 | pnet = mtcnn.PNet() 23 | rnet = mtcnn.RNet() 24 | onet = mtcnn.ONet() 25 | 26 | pnet.load_caffe_model( 27 | np.load(os.path.join(weight_folder, 'pnet.npy'))[()]) 28 | rnet.load_caffe_model( 29 | np.load(os.path.join(weight_folder, 'rnet.npy'))[()]) 30 | onet.load_caffe_model( 31 | np.load(os.path.join(weight_folder, 'onet.npy'))[()]) 32 | 33 | self.detector = detect.FaceDetector(pnet, rnet, onet, "cuda:0") 34 | self.test_img = os.path.join(here, 'asset/images/office5.jpg') 35 | 36 | def test_detection(self): 37 | img = cv2.imread(self.test_img) 38 | boxes, landmarks = self.detector.detect(self.test_img) 39 | draw.draw_boxes2(img, boxes 40 | ) 41 | draw.batch_draw_landmarks(img, landmarks) 42 | cv2.imshow('Stage One Boxes', img) 43 | cv2.waitKey(0) 44 | 45 | def test_stage_one(self): 46 | img = cv2.imread(self.test_img) 47 | norm_img = self.detector._preprocess(self.test_img) 48 | stage_one_boxes = self.detector.stage_one(norm_img, 0.6 , 0.707, 12, 0.7) 49 | draw.draw_boxes2(img, stage_one_boxes) 50 | cv2.imshow('Stage One Boxes', img) 51 | cv2.waitKey(0) 52 | 53 | def test_stage_two(self): 54 | # Running this test case after 'test_stage_one' passed. 55 | img = cv2.imread(self.test_img) 56 | norm_img = self.detector._preprocess(self.test_img) 57 | stage_one_boxes = self.detector.stage_one(norm_img, 0.6, 0.707, 12, 0.7) 58 | stage_two_boxes = self.detector.stage_two(norm_img, stage_one_boxes, 0.7, 0.7) 59 | draw.draw_boxes2(img, stage_two_boxes) 60 | cv2.imshow('Stage One Boxes', img) 61 | cv2.waitKey(0) 62 | 63 | def test_stage_three(self): 64 | # Running this test case after 'test_stage_three' passed. 65 | img = cv2.imread(self.test_img) 66 | norm_img = self.detector._preprocess(self.test_img) 67 | stage_one_boxes = self.detector.stage_one(norm_img, 0.6, 0.707, 12, 0.7) 68 | stage_two_boxes = self.detector.stage_two(norm_img, stage_one_boxes, 0.7, 0.7) 69 | stage_three_boxes, landmarks = self.detector.stage_three(norm_img, stage_two_boxes, 0.7, 0.3) 70 | draw.draw_boxes2(img, stage_three_boxes) 71 | draw.batch_draw_landmarks(img, landmarks) 72 | cv2.imshow('Stage One Boxes', img) 73 | cv2.waitKey(0) 74 | 75 | def test_performance(self): 76 | start = time.time() 77 | for i in range(10): 78 | self.detector.detect(self.test_img) 79 | end = time.time() 80 | avg_time = (end - start) / 10 81 | print("Average time cost is %f. " % avg_time) 82 | -------------------------------------------------------------------------------- /tests/test_functional.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test Cases mtcnn.utils.functional 3 | """ 4 | 5 | import os 6 | import torch 7 | import sys 8 | import unittest 9 | import cv2 10 | import numpy as np 11 | 12 | import mtcnn.utils.functional as func 13 | 14 | here = os.path.dirname(__file__) 15 | 16 | class TestFunctional(unittest.TestCase): 17 | 18 | def setUp(self): 19 | # construct a list containing the images that will be examined 20 | # along with their respective bounding boxes 21 | self.images = [ 22 | (os.path.join(here, 'asset/images/audrey.jpg'), np.array([ 23 | (12, 84, 140, 212, 1), 24 | (24, 84, 152, 212, 1), 25 | (36, 84, 164, 212, 1), 26 | (12, 96, 140, 224, 1), 27 | (24, 96, 152, 224, 1), 28 | (24, 108, 152, 236, 1)]), 1), 29 | (os.path.join(here, 'asset/images/bksomels.jpg'), np.array([ 30 | (114, 60, 178, 124, 1), 31 | (120, 60, 184, 124, 1), 32 | (114, 66, 178, 130, 1)]), 1), 33 | (os.path.join(here, 'asset/images/gpripe.jpg'), np.array([ 34 | (12, 30, 76, 94, 1), 35 | (12, 36, 76, 100, 1), 36 | (72, 36, 200, 164, 1), 37 | (84, 48, 212, 176, 1)]), 2)] 38 | 39 | def test_iou(self): 40 | pass 41 | 42 | def test_nms(self): 43 | 44 | # loop over the images 45 | for (imagePath, boundingBoxes, num_face) in self.images: 46 | # load the image and clone it 47 | print("[x] %d initial bounding boxes" % (len(boundingBoxes))) 48 | image = cv2.imread(imagePath) 49 | orig = image.copy() 50 | 51 | # loop over the bounding boxes for each image and draw them 52 | for (startX, startY, endX, endY, _) in boundingBoxes: 53 | cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2) 54 | 55 | # perform non-maximum suppression on the bounding boxes 56 | pick = func.nms(boundingBoxes[:, :4], boundingBoxes[:, 4], 0.3) 57 | print("[x] after applying non-maximum, %d bounding boxes" % (len(pick))) 58 | 59 | # loop over the picked bounding boxes and draw them 60 | for i in pick: 61 | (startX, startY, endX, endY) = boundingBoxes[i][:4] 62 | cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2) 63 | 64 | # # display the images 65 | # cv2.imshow("Original", orig) 66 | # cv2.imshow("After NMS", image) 67 | # cv2.waitKey(0) 68 | self.assertEqual(len(pick), num_face) 69 | 70 | def test_iou_torch(self): 71 | boxes = self.images[0][1][:, :4] 72 | b = torch.IntTensor(boxes[0]) 73 | boxes = torch.IntTensor(boxes) 74 | 75 | over = func.iou_torch(b, boxes).numpy().tolist() 76 | self.assertEqual(over[0], 1) 77 | -------------------------------------------------------------------------------- /tests/test_gptd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test Cases for generating pnet training data. 3 | """ 4 | 5 | import os 6 | import sys 7 | import random 8 | import unittest 9 | import cv2 10 | 11 | from mtcnn.datasets import get_by_name 12 | from mtcnn import get_net_caffe 13 | import mtcnn.train.gen_pnet_train as gptd 14 | import mtcnn.train.gen_rnet_train as grtd 15 | import mtcnn.train.gen_onet_train as gotd 16 | from mtcnn.train.data import get_training_data 17 | 18 | 19 | DEFAULT_DATASET = 'WiderFace' 20 | 21 | here = os.path.dirname(__file__) 22 | 23 | 24 | class TestGenTrain(unittest.TestCase): 25 | 26 | def setUp(self): 27 | self.dataset = get_by_name(DEFAULT_DATASET) 28 | self.output_folder = os.path.join(here, '../output/test') 29 | self.top = 100 30 | self.pnet, self.rnet, _ = get_net_caffe(os.path.join(here, '../output/converted')) 31 | 32 | def test_gen_pnet_train(self): 33 | meta = self.dataset.get_train_meta() 34 | meta = random.choices(meta, k=self.top) 35 | gptd.generate_training_data_for_pnet( 36 | meta, output_folder=self.output_folder, crop_size=12) 37 | eval_meta = self.dataset.get_val_meta() 38 | eval_meta = random.choices(eval_meta, k=self.top) 39 | gptd.generate_training_data_for_pnet(eval_meta, output_folder=self.output_folder, crop_size=12, suffix='pnet_eval') 40 | 41 | def test_get_pnet_train(self): 42 | pnet_data = get_training_data(self.output_folder, suffix='pnet') 43 | pnet_eval_data = get_training_data(self.output_folder, suffix='pnet_eval') 44 | 45 | def test_gen_rnet_train(self): 46 | meta = self.dataset.get_train_meta() 47 | meta = random.choices(meta, k=self.top) 48 | grtd.generate_training_data_for_rnet(self.pnet, meta, self.output_folder, suffix='rnet') 49 | eval_meta = self.dataset.get_val_meta() 50 | eval_meta = random.choices(eval_meta, k=self.top) 51 | grtd.generate_training_data_for_rnet(self.pnet, eval_meta, self.output_folder, suffix='rnet_eval') 52 | 53 | def test_get_rnet_train(self): 54 | rnet_data = get_training_data(self.output_folder, suffix='rnet') 55 | rnet_eval = get_training_data(self.output_folder, suffix='rnet_eval') 56 | 57 | def test_gen_onet_train(self): 58 | meta = self.dataset.get_train_meta() 59 | meta = random.choices(meta, k=self.top) 60 | gotd.generate_training_data_for_onet(self.pnet, self.rnet, meta, self.output_folder, suffix='onet') 61 | eval_meta = self.dataset.get_val_meta() 62 | eval_meta = random.choices(eval_meta, k=self.top) 63 | gotd.generate_training_data_for_onet(self.pnet, self.rnet, eval_meta, self.output_folder, suffix='onet_eval') 64 | 65 | def test_get_onet_train(self): 66 | rnet_data = get_training_data(self.output_folder, suffix='onet') 67 | rnet_eval = get_training_data(self.output_folder, suffix='onet_eval') -------------------------------------------------------------------------------- /tests/test_landmarks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases for generate facial landmark localization training data 3 | """ 4 | 5 | import os 6 | import sys 7 | import random 8 | import unittest 9 | import cv2 10 | 11 | from mtcnn.datasets import get_by_name 12 | import mtcnn.train.gen_landmark as gl 13 | from mtcnn.train.data import get_landmark_data 14 | from mtcnn.utils import draw 15 | 16 | DEFAULT_DATASET = 'CelebA' 17 | 18 | here = os.path.dirname(__file__) 19 | 20 | 21 | class TestGenLandmarks(unittest.TestCase): 22 | 23 | def setUp(self): 24 | self.datasets = get_by_name(DEFAULT_DATASET) 25 | self.output_folder = os.path.join(here, '../output/test') 26 | self.top = 1000 27 | self.crop_size = 24 28 | self.suffix = 'rnet' 29 | self.argument = False 30 | 31 | def test_gen_landmark_data(self): 32 | meta = self.datasets.get_train_meta() 33 | meta = random.choices(meta, k=self.top) 34 | eval_meta = self.datasets.get_val_meta() 35 | eval_meta = random.choices(eval_meta, k=self.top) 36 | gl.gen_landmark_data(eval_meta, self.crop_size, self.output_folder, argument=self.argument, suffix=self.suffix + '_eval') 37 | gl.gen_landmark_data(meta, self.crop_size, self.output_folder, argument=self.argument, suffix=self.suffix) 38 | 39 | def test_get_landmark_data(self): 40 | data = get_landmark_data(self.output_folder, suffix=self.suffix) 41 | 42 | images, landmarks = data.images, data.landmarks 43 | 44 | self.assertEqual(len(images), len(landmarks)) 45 | 46 | # Random sampling 10 pictures and draw landmark points on them. 47 | output_folder = os.path.join(self.output_folder, 'sample_images', 'landmarks') 48 | if not os.path.isdir(output_folder): 49 | os.makedirs(output_folder) 50 | 51 | # convert from (n, 10) to (n, 5, 2) 52 | landmarks = landmarks.reshape(-1, 2, 5).transpose(0, 2, 1) 53 | 54 | for i, (im, lm) in enumerate(zip(images[:10], landmarks[:10])): 55 | im = cv2.imread(im) 56 | w = im.shape[0] 57 | h = im.shape[1] 58 | 59 | lm[:, 0] *= w 60 | lm[:, 1] *= h 61 | 62 | lm = lm.astype(int) 63 | 64 | draw.draw_landmarks(im, lm) 65 | cv2.imwrite(os.path.join(output_folder, '%d.jpg' % i), im) 66 | -------------------------------------------------------------------------------- /tests/test_net_jit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import torch 4 | 5 | import numpy as np 6 | import mtcnn.network.mtcnn_pytorch as mtcnn 7 | 8 | here = os.path.dirname(__file__) 9 | 10 | class TestMtcnnPytorch(unittest.TestCase): 11 | 12 | def test_pnet(self): 13 | pnet = mtcnn.PNet().to_script() 14 | data = torch.randn(100, 3, 12, 12) 15 | 16 | det, box, _ = pnet(data) 17 | self.assertEqual(list(det.shape), [100, 2, 1, 1]) 18 | self.assertEqual(list(box.shape), [100, 4, 1, 1]) 19 | 20 | def test_rnet(self): 21 | rnet = mtcnn.RNet().to_script() 22 | data = torch.randn(100, 3, 24, 24) 23 | 24 | det, box, _ = rnet(data) 25 | self.assertEqual(list(det.shape), [100, 2]) 26 | self.assertEqual(list(box.shape), [100, 4]) 27 | 28 | def test_onet(self): 29 | onet = mtcnn.ONet().to_script() 30 | data = torch.randn(100, 3, 48, 48) 31 | 32 | det, box, landmarks = onet(data) 33 | self.assertEqual(list(det.shape), [100, 2]) 34 | self.assertEqual(list(box.shape), [100, 4]) 35 | self.assertEqual(list(landmarks.shape), [100, 10]) 36 | -------------------------------------------------------------------------------- /tests/test_net_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import torch 4 | 5 | import numpy as np 6 | import mtcnn.network.mtcnn_pytorch as mtcnn 7 | 8 | here = os.path.dirname(__file__) 9 | 10 | class TestMtcnnPytorch(unittest.TestCase): 11 | 12 | def test_pnet(self): 13 | pnet = mtcnn.PNet(is_train=True) 14 | data = torch.randn(100, 3, 12, 12) 15 | 16 | det, box, landmarks = pnet(data) 17 | self.assertEqual(list(det.shape), [100, 2, 1, 1]) 18 | self.assertEqual(list(box.shape), [100, 4, 1, 1]) 19 | self.assertEqual(list(landmarks.shape), [100, 10, 1, 1]) 20 | 21 | pnet.get_loss(data, torch.randint(-1, 3, (100, ), dtype=torch.int64), torch.randn(100, 4), torch.randn(100, 10)) 22 | 23 | def test_rnet(self): 24 | rnet = mtcnn.RNet(is_train=True) 25 | data = torch.randn(100, 3, 24, 24) 26 | 27 | det, box, landmarks = rnet(data) 28 | self.assertEqual(list(det.shape), [100, 2]) 29 | self.assertEqual(list(box.shape), [100, 4]) 30 | self.assertEqual(list(landmarks.shape), [100, 10]) 31 | 32 | rnet.get_loss(data, torch.ones(100, dtype=torch.int64), torch.randn(100, 4), torch.randn(100, 10)) 33 | 34 | def test_onet(self): 35 | onet = mtcnn.ONet(is_train=True) 36 | data = torch.randn(100, 3, 48, 48) 37 | 38 | det, box, landmarks = onet(data) 39 | self.assertEqual(list(det.shape), [100, 2]) 40 | self.assertEqual(list(box.shape), [100, 4]) 41 | self.assertEqual(list(landmarks.shape), [100, 10]) 42 | 43 | onet.get_loss(data, torch.ones(100, dtype=torch.int64), torch.randn(100, 4), torch.randn(100, 10)) 44 | 45 | def test_load_caffe_model(self): 46 | pnet = mtcnn.PNet() 47 | rnet = mtcnn.RNet() 48 | onet = mtcnn.ONet() 49 | weight_folder = os.path.join(here, '../output/converted') 50 | pnet.load_caffe_model(np.load(os.path.join(weight_folder, 'pnet.npy'))[()]) 51 | rnet.load_caffe_model(np.load(os.path.join(weight_folder, 'rnet.npy'))[()]) 52 | onet.load_caffe_model(np.load(os.path.join(weight_folder, 'onet.npy'))[()]) 53 | 54 | if __name__ == "__main__": 55 | unittest.main() -------------------------------------------------------------------------------- /tests/test_tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import unittest 4 | 5 | import numpy as np 6 | import mtcnn.network.mtcnn_pytorch as mtcnn 7 | 8 | from mtcnn.deploy.detect import FaceDetector 9 | from mtcnn.deploy.tracker import FaceTracker 10 | 11 | here = os.path.dirname(__file__) 12 | 13 | class TestTracker(unittest.TestCase): 14 | 15 | 16 | def setUp(self): 17 | self.test_video = os.path.join(here, './asset/video/school.avi') 18 | weight_folder = os.path.join(here, '../output/converted') 19 | 20 | pnet = mtcnn.PNet() 21 | rnet = mtcnn.RNet() 22 | onet = mtcnn.ONet() 23 | 24 | pnet.load_caffe_model( 25 | np.load(os.path.join(weight_folder, 'pnet.npy'))[()]) 26 | rnet.load_caffe_model( 27 | np.load(os.path.join(weight_folder, 'rnet.npy'))[()]) 28 | onet.load_caffe_model( 29 | np.load(os.path.join(weight_folder, 'onet.npy'))[()]) 30 | 31 | self.detector = FaceDetector(pnet, rnet, onet) 32 | self.tracker = FaceTracker(self.detector) 33 | 34 | def test_video(self): 35 | cap = cv2.VideoCapture(self.test_video) 36 | 37 | res, frame = cap.read() 38 | 39 | i = 0 40 | while res: 41 | self.tracker.track(frame) 42 | res, frame = cap.read() 43 | i += 1 44 | print("The %sth frame has been processed." % i) 45 | if i > 50: 46 | break 47 | cache = self.tracker.get_cache() 48 | 49 | saved_folder = "/home/hanbing/Desktop/image_folder" 50 | for key, images in cache.items(): 51 | if len(images) < 5: 52 | continue 53 | dir_name = os.path.join(saved_folder, str(key)) 54 | if not os.path.isdir(dir_name): 55 | os.makedirs(dir_name) 56 | 57 | for i, img in enumerate(images): 58 | file_name = os.path.join(dir_name, str(i) + '.jpg') 59 | cv2.imwrite(file_name, img) 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /tests/test_train.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from mtcnn.train.train_net import Trainer 4 | 5 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 6 | 7 | class TestTrain(unittest.TestCase): 8 | 9 | def test_train_pnet(self): 10 | 11 | trainer = Trainer('pnet', device=device, log_dir='./runs/test/', output_folder='./runs/test/') 12 | trainer.train(20, 256, './output/test') 13 | 14 | def test_train_rnet(self): 15 | trainer = Trainer('rnet', device=device, log_dir='./runs/test/', output_folder='./runs/test/') 16 | trainer.train(3, 256, './output/test') 17 | 18 | def test_train_onet(self): 19 | trainer = Trainer('onet', device=device, log_dir='./runs/test/', output_folder='./runs/test') 20 | trainer.train(3, 256, './output/test') 21 | 22 | if __name__ == "__main__": 23 | unittest.main() 24 | 25 | -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrightXiaoHan/FaceDetector/6ea4510411689a437bc380967d4a0cc086381777/tutorial/README.md --------------------------------------------------------------------------------