├── README.txt ├── models ├── __init__.py ├── BASE.py ├── MSBASE.py ├── CPFLD.py ├── M1BASE.py └── NPFLD.py ├── .gitignore ├── train_base_7.5w.sh ├── train_pfld_7.5w.sh ├── train_m1base_7.5w.sh ├── train_msbase_7.5w.sh ├── train_pfld_0.25_7.5w.sh ├── export_onnx.py ├── load_pfld_data.py └── train.py /README.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | datas 2 | outputs 3 | 4 | *.out 5 | *.pyc 6 | *.swp 7 | -------------------------------------------------------------------------------- /train_base_7.5w.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUTPUT_DIR='./outputs/base_7.5w' 4 | TRAIN_DATA_PATH='./datas/7.5w/pfld_train_data.rec' 5 | VALID_DATA_PATH='./datas/7.5w/pfld_valid_data.rec' 6 | LEARNING_RATE=0.0005 7 | BATCH_SIZE=128 8 | EPOCHES=1000 9 | GPU_IDS='0,1' 10 | IMAGE_SIZE=96 11 | MODEL_TYPE='BASE' 12 | WITH_ANGLE=1 13 | WITH_CATEGORY=0 14 | ALPHA=1.0 15 | 16 | python train.py --output_dir=${OUTPUT_DIR} --train_data_path=${TRAIN_DATA_PATH} --valid_data_path=${VALID_DATA_PATH} --learning_rate=${LEARNING_RATE} --batch_size=${BATCH_SIZE} --epoches=${EPOCHES} --gpu_ids=${GPU_IDS} --image_size=${IMAGE_SIZE} --model_type=${MODEL_TYPE} --with_angle_loss=${WITH_ANGLE} --with_category_loss=${WITH_CATEGORY} --alpha=${ALPHA} 17 | -------------------------------------------------------------------------------- /train_pfld_7.5w.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUTPUT_DIR='./outputs/pfld_7.5w' 4 | TRAIN_DATA_PATH='./datas/7.5w/pfld_train_data.rec' 5 | VALID_DATA_PATH='./datas/7.5w/pfld_valid_data.rec' 6 | LEARNING_RATE=0.0001 7 | BATCH_SIZE=128 8 | EPOCHES=1000 9 | GPU_IDS='0,1' 10 | IMAGE_SIZE=96 11 | MODEL_TYPE='CPFLD' 12 | WITH_ANGLE=1 13 | WITH_CATEGORY=0 14 | ALPHA=1.0 15 | 16 | python train.py --output_dir=${OUTPUT_DIR} --train_data_path=${TRAIN_DATA_PATH} --valid_data_path=${VALID_DATA_PATH} --learning_rate=${LEARNING_RATE} --batch_size=${BATCH_SIZE} --epoches=${EPOCHES} --gpu_ids=${GPU_IDS} --image_size=${IMAGE_SIZE} --model_type=${MODEL_TYPE} --with_angle_loss=${WITH_ANGLE} --with_category_loss=${WITH_CATEGORY} --alpha=${ALPHA} 17 | -------------------------------------------------------------------------------- /train_m1base_7.5w.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUTPUT_DIR='./outputs/m1base_7.5w' 4 | TRAIN_DATA_PATH='./datas/7.5w/pfld_train_data.rec' 5 | VALID_DATA_PATH='./datas/7.5w/pfld_valid_data.rec' 6 | LEARNING_RATE=0.0001 7 | BATCH_SIZE=128 8 | EPOCHES=1000 9 | GPU_IDS='0,1' 10 | IMAGE_SIZE=96 11 | MODEL_TYPE='M1BASE' 12 | WITH_ANGLE=1 13 | WITH_CATEGORY=0 14 | ALPHA=1.0 15 | 16 | python train.py --output_dir=${OUTPUT_DIR} --train_data_path=${TRAIN_DATA_PATH} --valid_data_path=${VALID_DATA_PATH} --learning_rate=${LEARNING_RATE} --batch_size=${BATCH_SIZE} --epoches=${EPOCHES} --gpu_ids=${GPU_IDS} --image_size=${IMAGE_SIZE} --model_type=${MODEL_TYPE} --with_angle_loss=${WITH_ANGLE} --with_category_loss=${WITH_CATEGORY} --alpha=${ALPHA} 17 | -------------------------------------------------------------------------------- /train_msbase_7.5w.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUTPUT_DIR='./outputs/msbase_7.5w' 4 | TRAIN_DATA_PATH='./datas/7.5w/pfld_train_data.rec' 5 | VALID_DATA_PATH='./datas/7.5w/pfld_valid_data.rec' 6 | LEARNING_RATE=0.0005 7 | BATCH_SIZE=128 8 | EPOCHES=1000 9 | GPU_IDS='0,1' 10 | IMAGE_SIZE=96 11 | MODEL_TYPE='MSBASE' 12 | WITH_ANGLE=1 13 | WITH_CATEGORY=0 14 | ALPHA=1.0 15 | 16 | python train.py --output_dir=${OUTPUT_DIR} --train_data_path=${TRAIN_DATA_PATH} --valid_data_path=${VALID_DATA_PATH} --learning_rate=${LEARNING_RATE} --batch_size=${BATCH_SIZE} --epoches=${EPOCHES} --gpu_ids=${GPU_IDS} --image_size=${IMAGE_SIZE} --model_type=${MODEL_TYPE} --with_angle_loss=${WITH_ANGLE} --with_category_loss=${WITH_CATEGORY} --alpha=${ALPHA} 17 | -------------------------------------------------------------------------------- /train_pfld_0.25_7.5w.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUTPUT_DIR='./outputs/pfld_0.25_7.5w' 4 | TRAIN_DATA_PATH='./datas/7.5w/pfld_train_data.rec' 5 | VALID_DATA_PATH='./datas/7.5w/pfld_valid_data.rec' 6 | LEARNING_RATE=0.0001 7 | BATCH_SIZE=128 8 | EPOCHES=1000 9 | GPU_IDS='0,1' 10 | IMAGE_SIZE=96 11 | MODEL_TYPE='CPFLD' 12 | WITH_ANGLE=1 13 | WITH_CATEGORY=0 14 | ALPHA=0.25 15 | 16 | python train.py --output_dir=${OUTPUT_DIR} --train_data_path=${TRAIN_DATA_PATH} --valid_data_path=${VALID_DATA_PATH} --learning_rate=${LEARNING_RATE} --batch_size=${BATCH_SIZE} --epoches=${EPOCHES} --gpu_ids=${GPU_IDS} --image_size=${IMAGE_SIZE} --model_type=${MODEL_TYPE} --with_angle_loss=${WITH_ANGLE} --with_category_loss=${WITH_CATEGORY} --alpha=${ALPHA} 17 | -------------------------------------------------------------------------------- /export_onnx.py: -------------------------------------------------------------------------------- 1 | 2 | import mxnet as mx 3 | import numpy as np 4 | from mxnet.contrib import onnx as onnx_mxnet 5 | import logging 6 | logging.basicConfig(level=logging.INFO) 7 | from onnx import checker 8 | import onnx 9 | 10 | syms = './outputs/pfld_7.5w/lmks_detector-symbol.json' 11 | params = './outputs/pfld_7.5w/lmks_detector-0400.params' 12 | 13 | input_shape = (1,3,96,96) 14 | 15 | onnx_file = './outputs/pfld_7.5w/pfld-lite.onnx' 16 | 17 | # Invoke export model API. It returns path of the converted onnx model 18 | converted_model_path = onnx_mxnet.export_model(syms, params, [input_shape], np.float32, onnx_file) 19 | 20 | # Load onnx model 21 | model_proto = onnx.load_model(converted_model_path) 22 | 23 | # Check if converted ONNX protobuf is valid 24 | checker.check_graph(model_proto.graph) 25 | -------------------------------------------------------------------------------- /load_pfld_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | def make_pfld_record(output=None, listName=None, imageFolder=None, dest_size=98): 7 | record = mx.recordio.MXRecordIO(output, 'w') 8 | File = open(listName, 'r') 9 | line = File.readline() 10 | idx = 0 11 | while line: 12 | idx += 1 13 | print(idx) 14 | info = line.split(' ') 15 | filename = info[0].split('/')[-1] 16 | image = cv2.imread(os.path.join(imageFolder, filename)) 17 | image = cv2.resize(image, (dest_size, dest_size)) 18 | lmks = [] 19 | for i in range(0, 98): 20 | x = float(info[i*2 + 1]) 21 | y = float(info[i*2 + 2]) 22 | lmks.append(x) 23 | lmks.append(y) 24 | categories = [] 25 | for i in range(0, 6): 26 | categories.append( 27 | float(info[1 + 98*2 + i]) 28 | ) 29 | angles = [] 30 | for i in range(0, 3): 31 | angles.append( 32 | float(info[1 + 98*2 + 6 + i]) 33 | ) 34 | label = lmks + categories + angles 35 | 36 | header = mx.recordio.IRHeader(0, label, i, 0) 37 | packed_s = mx.recordio.pack_img(header, image) 38 | record.write(packed_s) 39 | 40 | line = File.readline() 41 | 42 | if File is not None: 43 | File.close() 44 | record.close() 45 | 46 | if __name__ == '__main__': 47 | train_record_name = './datas/pfld_train_data.rec' 48 | valid_record_name = './datas/pfld_valid_data.rec' 49 | 50 | train_file = './datas/train_data/list.txt' 51 | train_folder = './datas/train_data/imgs/' 52 | valid_file = './datas/test_data/list.txt' 53 | valid_folder = './datas/test_data/imgs/' 54 | 55 | image_size = 96 56 | make_pfld_record(train_record_name, train_file, train_folder, image_size) 57 | make_pfld_record(valid_record_name, valid_file, valid_folder, image_size) 58 | -------------------------------------------------------------------------------- /models/BASE.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | from mxnet.gluon.model_zoo import vision 4 | from mxnet import gluon 5 | 6 | 7 | 8 | class BASE(mx.gluon.HybridBlock): 9 | def __init__(self, num_of_pts=98, **kwargs): 10 | super(BASE, self).__init__(**kwargs) 11 | self.pts_num = num_of_pts 12 | self.lmks_net = mx.gluon.nn.HybridSequential() 13 | self.lmks_net.add( 14 | 15 | nn.Conv2D(channels=16, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 16 | nn.BatchNorm(), 17 | nn.Activation('relu'), 18 | 19 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 20 | nn.BatchNorm(), 21 | nn.Activation('relu'), 22 | 23 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 24 | nn.BatchNorm(), 25 | nn.Activation('relu'), 26 | 27 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 28 | nn.BatchNorm(), 29 | nn.Activation('relu'), 30 | 31 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 32 | nn.BatchNorm(), 33 | nn.Activation('relu'), 34 | 35 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 36 | nn.BatchNorm(), 37 | nn.Activation('relu'), 38 | 39 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 40 | nn.BatchNorm(), 41 | nn.Activation('relu'), 42 | 43 | nn.Conv2D(channels=128, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 44 | nn.BatchNorm(), 45 | nn.Activation('relu'), 46 | 47 | nn.MaxPool2D(pool_size=(2,2), strides=(2,2)), 48 | 49 | nn.Conv2D(channels=num_of_pts*2, kernel_size=(3,3), strides=(1,1), padding=(0,0)) 50 | ) 51 | 52 | def hybrid_forward(self, F, x): 53 | return self.lmks_net(x) 54 | 55 | 56 | if __name__ == '__main__': 57 | x = mx.nd.random.uniform(0.0, 1.0, shape=(1, 3, 96, 96)) 58 | net = BASE(num_of_pts=98) 59 | net.initialize(init=mx.initializer.Xavier()) 60 | net.summary(x) 61 | -------------------------------------------------------------------------------- /models/MSBASE.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | from mxnet.gluon.model_zoo import vision 4 | from mxnet import gluon 5 | 6 | 7 | 8 | class MSBASE(mx.gluon.HybridBlock): 9 | def __init__(self, num_of_pts=98, **kwargs): 10 | super(MSBASE, self).__init__(**kwargs) 11 | self.pts_num = num_of_pts 12 | 13 | self.s1_feature = nn.HybridSequential() 14 | self.s2_feature = nn.HybridSequential() 15 | self.s3_feature = nn.HybridSequential() 16 | 17 | self.s1_post = nn.HybridSequential() 18 | self.s2_post = nn.HybridSequential() 19 | self.s3_post = nn.HybridSequential() 20 | 21 | self.lmks_out = mx.gluon.nn.HybridSequential() 22 | 23 | self.s1_feature.add( 24 | 25 | nn.Conv2D(channels=16, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 26 | nn.BatchNorm(), 27 | nn.Activation('relu'), 28 | 29 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 30 | nn.BatchNorm(), 31 | nn.Activation('relu'), 32 | 33 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 34 | nn.BatchNorm(), 35 | nn.Activation('relu'), 36 | 37 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 38 | nn.BatchNorm(), 39 | nn.Activation('relu'), 40 | 41 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 42 | nn.BatchNorm(), 43 | nn.Activation('relu'), 44 | 45 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(1,1), padding=(1,1)), 46 | nn.BatchNorm(), 47 | nn.Activation('relu'), 48 | ) 49 | 50 | self.s2_feature.add( 51 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 52 | nn.BatchNorm(), 53 | nn.Activation('relu'), 54 | ) 55 | 56 | self.s3_feature.add( 57 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 58 | nn.BatchNorm(), 59 | nn.Activation('relu'), 60 | ) 61 | 62 | self.s1_avg = nn.AvgPool2D(pool_size=(2,2), strides=(2,2)) 63 | 64 | self.s2_avg = nn.AvgPool2D(pool_size=(2,2), strides=(2,2)) 65 | 66 | self.lmks_out.add( 67 | nn.Conv2D(channels=num_of_pts*2, kernel_size=(3,3), strides=(1,1), padding=(0,0)) 68 | ) 69 | 70 | def hybrid_forward(self, F, x): 71 | 72 | s1_f = self.s1_feature(x) 73 | s2_f = mx.sym.add_n( 74 | self.s2_feature(s1_f), self.s1_avg(s1_f) 75 | ) 76 | s3_f = mx.sym.add_n( 77 | self.s3_feature(s2_f), self.s2_avg(s2_f) 78 | ) 79 | lmks = self.lmks_out(s3_f) 80 | 81 | return lmks 82 | 83 | 84 | if __name__ == '__main__': 85 | x = mx.nd.random.uniform(0.0, 1.0, shape=(1, 3, 96, 96)) 86 | net = MSBASE(num_of_pts=98) 87 | net.initialize(init=mx.initializer.Xavier()) 88 | net.summary(x) 89 | -------------------------------------------------------------------------------- /models/CPFLD.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | from mxnet.gluon.model_zoo import vision 4 | from mxnet import gluon 5 | 6 | def _add_conv(out, channels=1, kernel=1, stride=1, pad=0, num_group=1, relu=True): 7 | out.add(nn.Conv2D(channels, kernel, stride, pad, groups=num_group, use_bias=True)) 8 | out.add(nn.BatchNorm(scale=True)) 9 | if relu: 10 | out.add(nn.Activation('relu')) 11 | 12 | 13 | def _add_conv_dw(out, dw_channels, channels, stride, relu=True): 14 | _add_conv(out, channels=dw_channels, kernel=3, stride=stride, pad=1, num_group=dw_channels, relu=relu) 15 | _add_conv(out, channels=channels, relu=relu) 16 | 17 | 18 | class LinearBottleneck(nn.HybridBlock): 19 | def __init__(self, in_channels, channels, t, alpha, stride, **kwargs): 20 | super(LinearBottleneck, self).__init__(**kwargs) 21 | 22 | self.use_shortcut = stride == 1 and in_channels == channels 23 | 24 | expand_channels = int(in_channels * t * alpha) 25 | with self.name_scope(): 26 | self.out = nn.HybridSequential() 27 | _add_conv(self.out, expand_channels, relu=True) 28 | _add_conv(self.out, expand_channels, kernel=3, stride=stride, pad=1, num_group=expand_channels,relu=True) 29 | _add_conv(self.out, channels, relu=True) 30 | 31 | def hybrid_forward(self, F, x): 32 | out = self.out(x) 33 | #if self.use_shortcut: 34 | # out = F.elemwise_add(out, x) 35 | return out 36 | 37 | 38 | class CPFLD(mx.gluon.HybridBlock): 39 | def __init__(self, num_of_pts=98, alpha=1.0, **kwargs): 40 | super(CPFLD, self).__init__(**kwargs) 41 | self.pts_num = num_of_pts 42 | self.feature_shared = mx.gluon.nn.HybridSequential() 43 | self.lmks_net = mx.gluon.nn.HybridSequential() 44 | self.angs_net = mx.gluon.nn.HybridSequential() 45 | 46 | ##------shared feature-----## 47 | ## shadow feature extraction 48 | _add_conv(self.feature_shared, channels=64, kernel=3, stride=2, pad=1, num_group=1) 49 | _add_conv_dw(self.feature_shared, dw_channels=64, channels=64, stride=1) 50 | ## mobilenet-v2, t=2, c=64, n=5, s=2 51 | self.feature_shared.add( 52 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=2), 53 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 54 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 55 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 56 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1) 57 | ) 58 | 59 | ##------landmark regression-----## 60 | ## mobilenet-v2, t=2, c=128, n=1, s=2 61 | self.lmks_net.add( 62 | LinearBottleneck(in_channels=64, channels=128, t=2, alpha=alpha, stride=2) 63 | ) 64 | ## mobilenet-v2, t=4, c=128, n=6, s=1 65 | self.lmks_net.add( 66 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 67 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 68 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 69 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 70 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 71 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1) 72 | ) 73 | ## mobilenet-v2, t=2, c=16, n=1, s=1 74 | self.lmks_net.add( 75 | LinearBottleneck(in_channels=128, channels=16, t=2, alpha=alpha, stride=1), 76 | ) 77 | ## landmarks regression: base line 78 | self.s2_conv = nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1), activation=None, use_bias=True) 79 | self.s2_bn = nn.BatchNorm(scale=True) 80 | self.s2_act = nn.Activation('relu') 81 | self.s3_conv = nn.Conv2D(channels=128, kernel_size=(3,3), strides=(2,2), padding=(1,1), activation=None, use_bias=True) 82 | self.s3_bn = nn.BatchNorm(scale=True) 83 | self.s3_act = nn.Activation('relu') 84 | 85 | 86 | self.lmks_out = nn.HybridSequential() 87 | self.lmks_out.add( 88 | nn.Conv2D(channels=num_of_pts*2, kernel_size=(3,3), strides=(1,1), padding=(0,0)), 89 | ) 90 | 91 | def hybrid_forward(self, F, x): 92 | x = self.feature_shared(x) 93 | 94 | ## regress facial landmark: base-line 95 | s1 = self.lmks_net(x) 96 | 97 | s2 = self.s2_conv(s1) 98 | s2 = self.s2_bn(s2) 99 | s2 = self.s2_act(s2) 100 | 101 | s3 = self.s3_conv(s2) 102 | s3 = self.s3_bn(s3) 103 | s3 = self.s3_act(s3) 104 | 105 | lmk = self.lmks_out(s3) 106 | return lmk 107 | 108 | 109 | if __name__ == '__main__': 110 | x = mx.nd.random.uniform(0.0, 1.0, shape=(1, 3, 96, 96)) 111 | net = CPFLD(num_of_pts=98, alpha=0.25) 112 | net.initialize(init=mx.initializer.Xavier()) 113 | net.summary(x) 114 | -------------------------------------------------------------------------------- /models/M1BASE.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | from mxnet.gluon.model_zoo import vision 4 | from mxnet import gluon 5 | 6 | 7 | 8 | class M1BASE(mx.gluon.HybridBlock): 9 | def __init__(self, num_of_pts=98, **kwargs): 10 | super(M1BASE, self).__init__(**kwargs) 11 | self.pts_num = num_of_pts 12 | 13 | self.s1_feature = nn.HybridSequential() 14 | self.s2_feature = nn.HybridSequential() 15 | self.s3_feature = nn.HybridSequential() 16 | 17 | self.s1_post = nn.HybridSequential() 18 | self.s2_post = nn.HybridSequential() 19 | self.s3_post = nn.HybridSequential() 20 | 21 | self.lmks_out = mx.gluon.nn.HybridSequential() 22 | 23 | self.s1_feature.add( 24 | 25 | nn.Conv2D(channels=16, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 26 | nn.BatchNorm(), 27 | nn.Activation('relu'), 28 | 29 | ## 1-st down-scale 30 | nn.Conv2D(channels=16, kernel_size=(3,3), strides=(1,1), groups=16, padding=(1,1)), 31 | nn.BatchNorm(), 32 | nn.Conv2D(channels=32, kernel_size=(1,1), strides=(1,1), groups=1), 33 | nn.Activation('relu'), 34 | 35 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), groups=32, padding=(1,1)), 36 | nn.BatchNorm(), 37 | nn.Conv2D(channels=32, kernel_size=(1,1), strides=(1,1), groups=1), 38 | nn.Activation('relu'), 39 | 40 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), groups=32, padding=(1,1)), 41 | nn.BatchNorm(), 42 | nn.Conv2D(channels=32, kernel_size=(1,1), strides=(1,1), groups=1), 43 | nn.Activation('relu'), 44 | 45 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), groups=32, padding=(1,1)), 46 | nn.BatchNorm(), 47 | nn.Conv2D(channels=32, kernel_size=(1,1), strides=(1,1), groups=1), 48 | nn.Activation('relu'), 49 | 50 | ## 2-nd down-scale 51 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(1,1), groups=32, padding=(1,1)), 52 | nn.BatchNorm(), 53 | nn.Conv2D(channels=64, kernel_size=(1,1), strides=(1,1), groups=1), 54 | nn.Activation('relu'), 55 | 56 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(1,1), groups=64, padding=(1,1)), 57 | nn.BatchNorm(), 58 | nn.Conv2D(channels=64, kernel_size=(1,1), strides=(1,1), groups=1), 59 | nn.Activation('relu'), 60 | 61 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(1,1), groups=64, padding=(1,1)), 62 | nn.BatchNorm(), 63 | nn.Conv2D(channels=64, kernel_size=(1,1), strides=(1,1), groups=1), 64 | nn.Activation('relu'), 65 | 66 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(2,2), groups=64, padding=(1,1)), 67 | nn.BatchNorm(), 68 | nn.Conv2D(channels=64, kernel_size=(1,1), strides=(1,1), groups=1), 69 | nn.Activation('relu'), 70 | 71 | ## 3-rd keep-scale 72 | nn.Conv2D(channels=64, kernel_size=(3,3), strides=(1,1), groups=64, padding=(1,1)), 73 | nn.BatchNorm(), 74 | nn.Conv2D(channels=128, kernel_size=(1,1), strides=(1,1), groups=1), 75 | nn.Activation('relu'), 76 | 77 | nn.Conv2D(channels=128, kernel_size=(3,3), strides=(1,1), groups=128, padding=(1,1)), 78 | nn.BatchNorm(), 79 | nn.Conv2D(channels=128, kernel_size=(1,1), strides=(1,1), groups=1), 80 | nn.Activation('relu'), 81 | 82 | nn.Conv2D(channels=128, kernel_size=(3,3), strides=(1,1), groups=128, padding=(1,1)), 83 | nn.BatchNorm(), 84 | nn.Conv2D(channels=128, kernel_size=(1,1), strides=(1,1), groups=1), 85 | nn.Activation('relu'), 86 | 87 | nn.Conv2D(channels=128, kernel_size=(3,3), strides=(1,1), groups=128, padding=(1,1)), 88 | nn.BatchNorm(), 89 | nn.Conv2D(channels=256, kernel_size=(1,1), strides=(1,1), groups=1), 90 | nn.Activation('relu'), 91 | 92 | nn.Conv2D(channels=256, kernel_size=(3,3), strides=(1,1), groups=256, padding=(1,1)), 93 | nn.BatchNorm(), 94 | nn.Conv2D(channels=16, kernel_size=(1,1), strides=(1,1), groups=1), 95 | ) 96 | 97 | self.s2_feature.add( 98 | nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 99 | nn.BatchNorm(), 100 | nn.Activation('relu'), 101 | ) 102 | 103 | self.s3_feature.add( 104 | nn.Conv2D(channels=128, kernel_size=(3,3), strides=(2,2), padding=(1,1)), 105 | nn.BatchNorm(), 106 | nn.Activation('relu'), 107 | ) 108 | 109 | self.lmks_out.add( 110 | nn.Conv2D(channels=num_of_pts*2, kernel_size=(3,3), strides=(1,1), padding=(0,0)) 111 | ) 112 | 113 | def hybrid_forward(self, F, x): 114 | 115 | s1_f = self.s1_feature(x) 116 | s2_f = self.s2_feature(s1_f) 117 | s3_f = self.s3_feature(s2_f) 118 | 119 | lmks = self.lmks_out(s3_f) 120 | 121 | return lmks 122 | 123 | 124 | if __name__ == '__main__': 125 | x = mx.nd.random.uniform(0.0, 1.0, shape=(1, 3, 96, 96)) 126 | net = M1BASE(num_of_pts=98) 127 | net.initialize(init=mx.initializer.Xavier()) 128 | net.summary(x) 129 | -------------------------------------------------------------------------------- /models/NPFLD.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet.gluon import nn 3 | from mxnet.gluon.model_zoo import vision 4 | from mxnet import gluon 5 | 6 | def _add_conv(out, channels=1, kernel=1, stride=1, pad=0, num_group=1, relu=True): 7 | out.add(nn.Conv2D(channels, kernel, stride, pad, groups=num_group, use_bias=False)) 8 | out.add(nn.BatchNorm(scale=True)) 9 | if relu: 10 | out.add(nn.Activation('relu')) 11 | 12 | 13 | def _add_conv_dw(out, dw_channels, channels, stride, relu=True): 14 | _add_conv(out, channels=dw_channels, kernel=3, stride=stride, pad=1, num_group=dw_channels, relu=relu) 15 | _add_conv(out, channels=channels, relu=relu) 16 | 17 | 18 | class LinearBottleneck(nn.HybridBlock): 19 | def __init__(self, in_channels, channels, t, alpha, stride, **kwargs): 20 | super(LinearBottleneck, self).__init__(**kwargs) 21 | 22 | self.use_shortcut = stride == 1 and in_channels == channels 23 | 24 | expand_channels = int(in_channels * t * alpha) 25 | with self.name_scope(): 26 | self.out = nn.HybridSequential() 27 | _add_conv(self.out, expand_channels, relu=True) 28 | _add_conv(self.out, expand_channels, kernel=3, stride=stride, pad=1, num_group=expand_channels,relu=True) 29 | _add_conv(self.out, channels, relu=True) 30 | 31 | def hybrid_forward(self, F, x): 32 | out = self.out(x) 33 | if self.use_shortcut: 34 | out = F.elemwise_add(out, x) 35 | return out 36 | 37 | 38 | class NPFLD(mx.gluon.HybridBlock): 39 | def __init__(self, num_of_pts=98, alpha=1.0, **kwargs): 40 | super(NPFLD, self).__init__(**kwargs) 41 | self.pts_num = num_of_pts 42 | self.feature_shared = mx.gluon.nn.HybridSequential() 43 | self.lmks_net = mx.gluon.nn.HybridSequential() 44 | self.angs_net = mx.gluon.nn.HybridSequential() 45 | 46 | ##------shared feature-----## 47 | ## shadow feature extraction 48 | _add_conv(self.feature_shared, channels=64, kernel=3, stride=2, pad=1, num_group=1) 49 | _add_conv_dw(self.feature_shared, dw_channels=64, channels=64, stride=1) 50 | ## mobilenet-v2, t=2, c=64, n=5, s=2 51 | self.feature_shared.add( 52 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=2), 53 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 54 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 55 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1), 56 | LinearBottleneck(in_channels=64, channels=64, t=2, alpha=alpha, stride=1) 57 | ) 58 | 59 | ##------landmark regression-----## 60 | ## mobilenet-v2, t=2, c=128, n=1, s=2 61 | self.lmks_net.add( 62 | LinearBottleneck(in_channels=64, channels=128, t=2, alpha=alpha, stride=2) 63 | ) 64 | ## mobilenet-v2, t=4, c=128, n=6, s=1 65 | self.lmks_net.add( 66 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 67 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 68 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 69 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 70 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1), 71 | LinearBottleneck(in_channels=128, channels=128, t=4, alpha=alpha, stride=1) 72 | ) 73 | ## mobilenet-v2, t=2, c=16, n=1, s=1 74 | self.lmks_net.add( 75 | LinearBottleneck(in_channels=128, channels=16, t=2, alpha=alpha, stride=1), 76 | ) 77 | ## landmarks regression: base line 78 | self.s2_conv = nn.Conv2D(channels=32, kernel_size=(3,3), strides=(2,2), padding=(1,1), activation=None, use_bias=False) 79 | self.s2_bn = nn.BatchNorm(scale=True) 80 | self.s2_act = nn.Activation('relu') 81 | self.s3_conv = nn.Conv2D(channels=128, kernel_size=(3,3), strides=(2,2), padding=(1,1), activation=None, use_bias=False) 82 | self.s3_bn = nn.BatchNorm(scale=True) 83 | self.s3_act = nn.Activation('relu') 84 | 85 | self.s1_flatten = nn.Flatten() 86 | self.s2_flatten = nn.Flatten() 87 | self.s3_flatten = nn.Flatten() 88 | 89 | self.lmks_out = nn.HybridSequential() 90 | self.lmks_out.add( 91 | nn.Dense(units=self.pts_num*2, activation=None, use_bias=True) 92 | ) 93 | 94 | def hybrid_forward(self, F, x): 95 | x = self.feature_shared(x) 96 | 97 | ## regress facial landmark: base-line 98 | s1 = self.lmks_net(x) 99 | 100 | s2 = self.s2_conv(s1) 101 | s2 = self.s2_bn(s2) 102 | s2 = self.s2_act(s2) 103 | 104 | s3 = self.s3_conv(s2) 105 | s3 = self.s3_bn(s3) 106 | s3 = self.s3_act(s3) 107 | 108 | s1 = self.s1_flatten(s1) 109 | s2 = self.s2_flatten(s2) 110 | s3 = self.s3_flatten(s3) 111 | lmk = F.concat(s1, s2, s3, dim=1) 112 | lmk = self.lmks_out(lmk) 113 | 114 | return lmk 115 | 116 | 117 | if __name__ == '__main__': 118 | x = mx.nd.random.uniform(0.0, 1.0, shape=(1, 3, 96, 96)) 119 | net = NPFLD(num_of_pts=98, alpha=0.25) 120 | net.initialize(init=mx.initializer.Xavier()) 121 | net.summary(x) 122 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from models.NPFLD import NPFLD 3 | from models.CPFLD import CPFLD 4 | from models.BASE import BASE 5 | from models.MSBASE import MSBASE 6 | from models.M1BASE import M1BASE 7 | import numpy as np 8 | from mxnet import nd 9 | from mxnet import autograd 10 | import os 11 | import sys 12 | import math 13 | import cv2 14 | import argparse 15 | 16 | 17 | def preprocess(data): 18 | data = (data-123.0) / 58.0 19 | return data 20 | 21 | if __name__ == '__main__': 22 | 23 | parser = argparse.ArgumentParser(description="pfld landmarks detector") 24 | parser.add_argument("--output_dir", type = str, default = None) 25 | parser.add_argument("--pretrain_param", type = str, default = None) 26 | parser.add_argument("--train_data_path", type = str, default = None) 27 | parser.add_argument("--valid_data_path", type = str, default = None) 28 | parser.add_argument("--learning_rate", type = float, default = 0.0001) 29 | parser.add_argument("--batch_size", type = int, default = 128) 30 | parser.add_argument("--epoches", type = int, default = 1000) 31 | parser.add_argument("--gpu_ids", type = str, default = "0,1") 32 | parser.add_argument("--image_size", type = int, default = 112) 33 | parser.add_argument("--num_of_pts", type = int, default = 98) 34 | parser.add_argument("--model_type", type = str, default = 'NPFLD') 35 | parser.add_argument("--logfile_name", type = str, default = 'log.txt') 36 | parser.add_argument("--with_angle_loss", type = str, default = 1) 37 | parser.add_argument("--with_category_loss", type = int, default = 0) 38 | parser.add_argument("--alpha", type = float, default = 1.0) 39 | args = parser.parse_args() 40 | 41 | 42 | train_data_file = args.train_data_path 43 | valid_data_file = args.valid_data_path 44 | output_dir = args.output_dir 45 | 46 | if not os.path.exists(output_dir): 47 | os.makedirs(output_dir) 48 | 49 | use_gpu = None 50 | devices = [] 51 | if 'None' in args.gpu_ids: 52 | use_gpu = False 53 | devices.append(mx.cpu()) 54 | else: 55 | use_gpu = True 56 | gpu_infos = args.gpu_ids.split(',') 57 | for gi in gpu_infos: 58 | devices.append(mx.gpu(int(gi))) 59 | 60 | image_size = args.image_size 61 | batch_size = args.batch_size 62 | epoches = args.epoches 63 | base_lr = args.learning_rate 64 | pts_num = args.num_of_pts 65 | alpha = args.alpha 66 | model_type = args.model_type 67 | with_category = args.with_category_loss 68 | with_angle = args.with_angle_loss 69 | logF_name = os.path.join(output_dir, args.logfile_name) 70 | 71 | logFile = open(logF_name, 'w') 72 | logFile.write("=======================================================\n") 73 | 74 | net = None 75 | if 'NPFLD' in model_type: 76 | net = NPFLD(num_of_pts=pts_num, alpha=alpha) 77 | if 'CPFLD' in model_type: 78 | net = CPFLD(num_of_pts=pts_num, alpha=alpha) 79 | if 'BASE' in model_type: 80 | net = BASE(num_of_pts=pts_num) 81 | if 'MSBASE' in model_type: 82 | net = MSBASE(num_of_pts=pts_num) 83 | if 'M1BASE' in model_type: 84 | net = M1BASE(num_of_pts=pts_num) 85 | net.initialize(mx.init.Normal(sigma=0.001), ctx=devices, force_reinit=True) 86 | 87 | net.hybridize() 88 | if args.pretrain_param is not None: 89 | net.load_parameters(args.pretrain_param) 90 | 91 | huber_loss = mx.gluon.loss.HuberLoss(rho=5) 92 | mse_loss = mx.gluon.loss.L2Loss() 93 | lmks_metric = mx.metric.MAE() 94 | angs_metric = mx.metric.MAE() 95 | 96 | 97 | lr_epoch = [] 98 | train_iter = mx.io.ImageRecordIter( 99 | path_imgrec=train_data_file, 100 | data_shape=(3, image_size, image_size), 101 | batch_size=batch_size, 102 | label_width=205, 103 | shuffle = True, 104 | shuffle_chunk_size = 1024, 105 | seed = 1234, 106 | prefetch_buffer = 10, 107 | preprocess_threads = 16 108 | ) 109 | 110 | valid_iter = mx.io.ImageRecordIter( 111 | path_imgrec=valid_data_file, 112 | data_shape=(3, image_size, image_size), 113 | batch_size=50, 114 | label_width=205, 115 | shuffle = False, 116 | preprocess_threads = 16, 117 | ) 118 | 119 | 120 | ## trainning 121 | trainer = mx.gluon.Trainer( 122 | params=net.collect_params(), 123 | #optimizer='sgd', 124 | #optimizer_params={'learning_rate': base_lr, 'momentum': 0.9, 'wd': 5e-5} 125 | optimizer='adam', 126 | optimizer_params={'learning_rate': base_lr} 127 | ) 128 | 129 | for epoch in range(0, epoches): 130 | # reset training learning rate 131 | if (epoch+1) in lr_epoch: 132 | idx = 0 133 | for i in range(0, len(lr_epoch)): 134 | idx = i 135 | if (epoch+1) == lr_epoch[i]: 136 | break 137 | lr = base_lr * math.pow(0.1, idx+1) 138 | trainer.set_learning_rate(lr) 139 | # reset data iterator 140 | train_iter.reset() 141 | valid_iter.reset() 142 | batch_idx = 0 143 | for batch in train_iter: 144 | batch_idx += 1 145 | batch_size = batch.data[0].shape[0] 146 | data = batch.data[0] 147 | data = preprocess(data) 148 | labels = batch.label[0] 149 | lmks = labels[:, 0:98*2] * image_size 150 | cate = labels[:, 2*98+1:2*98+6] 151 | angs = labels[:, -3:] * np.pi / 180.0 152 | 153 | cat_ratios = nd.mean(cate, axis=0) 154 | cat_ratios = (cat_ratios > 0.0) * (1.0 / (cat_ratios+0.00001)) 155 | cate = cate * cat_ratios 156 | cate = nd.sum(cate, axis=1) 157 | cate = (cate <= 0.0001) * 1 + cate 158 | 159 | data_list = mx.gluon.utils.split_and_load(data, ctx_list=devices, even_split=False) 160 | lmks_list = mx.gluon.utils.split_and_load(lmks, ctx_list=devices, even_split=False) 161 | angs_list = mx.gluon.utils.split_and_load(angs, ctx_list=devices, even_split=False) 162 | cate_list = mx.gluon.utils.split_and_load(cate, ctx_list=devices, even_split=False) 163 | loss_list = [] 164 | 165 | with mx.autograd.record(): 166 | for data, lmks, angs, cate in zip(data_list, lmks_list, angs_list, cate_list): 167 | lmks_regs = net(data) 168 | lmks_regs = nd.Flatten(lmks_regs) 169 | 170 | lmks_loss = nd.square(lmks_regs - lmks) 171 | lmks_loss = nd.sum(lmks_loss, axis=1) 172 | 173 | #angs_loss = 1 - mx.nd.cos((angs_regs - angs)) 174 | #angs_loss = mx.nd.sum(angs_loss, axis=1) 175 | 176 | loss = lmks_loss 177 | 178 | #if with_angle: 179 | # loss = loss * angs_loss 180 | 181 | if with_category: 182 | loss = loss * cate 183 | 184 | loss_list.append(loss) 185 | 186 | lmks_metric.update(lmks, lmks_regs) 187 | for loss in loss_list: 188 | loss.backward() 189 | trainer.step(batch_size=batch_size, ignore_stale_grad=True) 190 | 191 | batch_loss = sum([l.sum().asscalar() for l in loss_list]) / batch_size 192 | #print('epoch:{}--{}'.format(epoch, batch_idx), 'loss={}'.format(batch_loss)) 193 | # print infos, and save models after epoch 194 | lmks_name, lmks_mae = lmks_metric.get() 195 | angs_name, angs_mae = angs_metric.get() 196 | 197 | print('After epoch {}: {} = {}, {}={}, learning-rate={}, model_type---{}'.format(epoch + 1, lmks_name, lmks_mae, angs_name, angs_mae, trainer.learning_rate, model_type)) 198 | net.export(os.path.join(output_dir, 'lmks_detector'), epoch=epoch+1) 199 | #net.save_parameters(os.path.join(output_dir, 'lmks_detector_{}.params'.format(epoch+1))) 200 | lmks_metric.reset() 201 | angs_metric.reset() 202 | 203 | # validate model in test data 204 | NME = 0.0 205 | FR = 0.0 206 | NUM = 0 207 | for batch in valid_iter: 208 | data = batch.data[0] 209 | data = preprocess(data) 210 | labels = batch.label[0] 211 | lmks = labels[:, 0:98*2] * image_size 212 | angs = labels[:, -3:] * np.pi / 180.0 213 | data = data.as_in_context(devices[0]) 214 | lmks = lmks.as_in_context(devices[0]) 215 | angs = angs.as_in_context(devices[0]) 216 | regs = net(data) 217 | regs = nd.Flatten(regs) 218 | batch_size = data.shape[0] 219 | NUM += batch_size 220 | regs = regs.asnumpy() 221 | lmks = lmks.asnumpy() 222 | for i in range(0, batch_size): 223 | ne = 0.0 224 | for j in range(0, 98): 225 | e = (regs[i, j*2 + 0] - lmks[i, j*2 + 0]) * (regs[i, j*2 + 0] - lmks[i, j*2 + 0]) + \ 226 | (regs[i, j*2 + 1] - lmks[i, j*2 + 1]) * (regs[i, j*2 + 1] - lmks[i, j*2 + 1]) 227 | e = np.sqrt(e) 228 | ne += e 229 | inter_occular=(lmks[i, 2*60 + 0] - lmks[i, 2*72 + 0]) * (lmks[i, 2*60 + 0] - lmks[i, 2*72 + 0]) +\ 230 | (lmks[i, 2*60 + 1] - lmks[i, 2*72 + 1]) * (lmks[i, 2*60 + 1] - lmks[i, 2*72 + 1]) 231 | inter_occular = np.sqrt(inter_occular) 232 | ne = ne / (inter_occular * 98.0) 233 | NME += ne 234 | if ne > 0.1: 235 | FR += 1.0 236 | NME /= NUM 237 | FR /= NUM 238 | 239 | print('Validaton: {} = {}, {} = {}'.format('NME', NME, 'FR', FR)) 240 | 241 | val_log = 'epoch-{}, Validaton: {} = {}, {} = {}'.format(epoch, 'NME', NME, 'FR', FR) 242 | logFile.write(val_log + "\n") 243 | logFile.flush() 244 | 245 | if logFile is not None: 246 | logFile.close() 247 | --------------------------------------------------------------------------------