├── lib ├── __init__.py ├── core │ ├── __init__.py │ ├── model │ │ ├── __init__.py │ │ ├── head │ │ │ ├── __init__.py │ │ │ └── centernet_head.py │ │ ├── loss │ │ │ ├── __init__.py │ │ │ ├── iouloss.py │ │ │ └── centernet_loss.py │ │ ├── sqeeze_excitation │ │ │ ├── __init__.py │ │ │ └── se.py │ │ ├── fpn │ │ │ ├── __init__.py │ │ │ ├── plain_fpn.py │ │ │ └── seperateconv_fpn.py │ │ ├── net │ │ │ ├── __init__.py │ │ │ ├── resnet │ │ │ │ ├── __init__.py │ │ │ │ └── backbone.py │ │ │ ├── shufflenet │ │ │ │ └── backbone.py │ │ │ ├── arg_scope │ │ │ │ └── resnet_args_cope.py │ │ │ ├── mobilenet │ │ │ │ ├── backbone.py │ │ │ │ └── mobilenet_v2.py │ │ │ └── mobilenetv3 │ │ │ │ └── backbone.py │ │ └── centernet.py │ ├── anchor │ │ ├── __init__.py │ │ ├── nms.py │ │ ├── tf_anchors.py │ │ ├── box_utils.py │ │ └── anchor.py │ ├── .DS_Store │ └── api │ │ ├── face_detector_bk.py │ │ └── face_detector.py ├── dataset │ ├── augmentor │ │ ├── data_aug │ │ │ ├── __init__.py │ │ │ └── bbox_util.py │ │ ├── README.md │ │ ├── test.jpg │ │ ├── test2.jpg │ │ ├── test.py │ │ └── visual_augmentation.py │ ├── .DS_Store │ ├── centernet_data_sampler.py │ └── ttf_net_data_sampler.py ├── helper │ ├── __init__.py │ └── logger.py └── .DS_Store ├── tools ├── __init__.py ├── .DS_Store ├── convert_to_coreml.py └── auto_freeze.py ├── configs ├── __init__.py ├── face │ ├── __init__.py │ ├── face_mbv3_config.py │ └── face_shufflenet_5x5_config.py └── mscoco │ ├── __init__.py │ ├── mbv3_config.py │ ├── shufflenetplus_config.py │ ├── shufflenet_5x5_config.py │ └── resnet_config.py ├── visulization ├── __init__.py ├── coco_id_map.py ├── vis_with_coreml.py ├── vis_with_mnn.py └── vis.py ├── model_eval ├── .DS_Store ├── fddb_plot.py ├── xml_2_coco.py ├── fddb.py ├── custome_eval.py └── wider.py ├── train.py ├── train_config.py ├── prepare_coco_data.py ├── xml_2_txt.py ├── README.md └── prepare_wider_data.py /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/face/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/mscoco/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /visulization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/model/head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/model/loss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/dataset/augmentor/data_aug/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/helper/__init__.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- -------------------------------------------------------------------------------- /lib/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- -------------------------------------------------------------------------------- /lib/core/model/sqeeze_excitation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/model/fpn/__init__.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- -------------------------------------------------------------------------------- /lib/core/model/net/__init__.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- -------------------------------------------------------------------------------- /lib/core/model/net/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- -------------------------------------------------------------------------------- /lib/dataset/augmentor/README.md: -------------------------------------------------------------------------------- 1 | # augmentor 2 | A simple image augmentor 3 | -------------------------------------------------------------------------------- /lib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/.DS_Store -------------------------------------------------------------------------------- /tools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/tools/.DS_Store -------------------------------------------------------------------------------- /lib/core/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/core/.DS_Store -------------------------------------------------------------------------------- /model_eval/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/model_eval/.DS_Store -------------------------------------------------------------------------------- /lib/dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/.DS_Store -------------------------------------------------------------------------------- /lib/dataset/augmentor/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/augmentor/test.jpg -------------------------------------------------------------------------------- /lib/dataset/augmentor/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/augmentor/test2.jpg -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from lib.helper.logger import logger 2 | from lib.core.base_trainer.net_work import trainner 3 | import setproctitle 4 | 5 | 6 | 7 | logger.info('train start') 8 | setproctitle.setproctitle("detect") 9 | 10 | trainner=trainner() 11 | 12 | trainner.train() 13 | -------------------------------------------------------------------------------- /train_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | 5 | from configs.mscoco.mbv3_config import config as mb3_config 6 | from configs.face.face_mbv3_config import config as face_mbv3_config 7 | from configs.face.face_shufflenet_5x5_config import config as face_shufflenet_5x5_config 8 | from configs.mscoco.shufflenetplus_config import config as shufflenet_plus_config 9 | from configs.mscoco.shufflenet_5x5_config import config as shufflenet_5x5_config 10 | ##### the config for different task 11 | config=mb3_config 12 | 13 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 14 | config.TRAIN.num_gpu = 1 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /lib/helper/logger.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | 4 | 5 | 6 | #-*-coding:utf-8-*- 7 | 8 | import logging 9 | 10 | 11 | def get_logger(LEVEL,log_file=None): 12 | head = '[%(asctime)-15s] [%(levelname)s] %(message)s ' 13 | if LEVEL=='info': 14 | logging.basicConfig(level=logging.INFO, format=head) 15 | elif LEVEL=='debug': 16 | logging.basicConfig(level=logging.DEBUG, format=head) 17 | logger = logging.getLogger() 18 | 19 | if log_file !=None: 20 | 21 | fh = logging.FileHandler(log_file) 22 | logger.addHandler(fh) 23 | return logger 24 | 25 | logger=get_logger('info') 26 | -------------------------------------------------------------------------------- /model_eval/fddb_plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | sns.set_style('whitegrid') 5 | 6 | roc = pd.read_csv('/home/lz/WiderFace-Evaluation/fddb/evaluation/tempDiscROC.txt', sep=' ', header=None) 7 | roc.columns = ['tpr', 'fp', 'threshold'] 8 | 9 | 10 | def plot_roc(): 11 | _, axis = plt.subplots(nrows=1, ncols=1, figsize=(7, 4), dpi=120) 12 | axis.plot(roc.fp, roc.tpr, c='r', linewidth=2.0); 13 | axis.set_title('Discrete Score ROC') 14 | axis.set_xlim([0, 2000.0]) 15 | axis.set_ylim([0.6, 1.0]) 16 | axis.set_xlabel('False Positives') 17 | axis.set_ylabel('True Positive Rate'); 18 | plt.show() 19 | plot_roc() -------------------------------------------------------------------------------- /lib/core/model/net/shufflenet/backbone.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | 4 | import tensorflow as tf 5 | import tensorflow.contrib.slim as slim 6 | 7 | from train_config import config as cfg 8 | 9 | from lib.core.model.net.shufflenet.shufflenetv2 import ShufflenetV2 10 | from lib.core.model.net.shufflenet.shufflenetv2 import shufflenet_arg_scope 11 | 12 | from lib.core.model.fpn.seperateconv_fpn import create_fpn_net 13 | 14 | def shufflenetv2_ssd(image,is_training=True): 15 | 16 | arg_scope = shufflenet_arg_scope(weight_decay=cfg.TRAIN.weight_decay_factor) 17 | 18 | with tf.contrib.slim.arg_scope(arg_scope): 19 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 20 | shufflenet_fms = ShufflenetV2(image,is_training=is_training) 21 | 22 | 23 | return shufflenet_fms 24 | -------------------------------------------------------------------------------- /lib/core/model/sqeeze_excitation/se.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | def se(fm,input_dim,refraction=4): 5 | se=tf.reduce_mean(fm,axis=[1,2],keep_dims=True) 6 | se = slim.conv2d(se, 7 | input_dim//refraction, 8 | [1, 1], 9 | stride=1, 10 | activation_fn=tf.nn.relu, 11 | biases_initializer=None, 12 | normalizer_fn=slim.batch_norm, 13 | scope='conv1x1_se_a') 14 | se = slim.conv2d(se, 15 | input_dim, 16 | [1, 1], 17 | stride=1, 18 | activation_fn=None, 19 | normalizer_fn=None, 20 | biases_initializer=None, 21 | scope='conv1x1_se_b') 22 | 23 | se=tf.nn.sigmoid(se) 24 | 25 | return fm*se -------------------------------------------------------------------------------- /tools/convert_to_coreml.py: -------------------------------------------------------------------------------- 1 | import coremltools as ct 2 | import coremltools 3 | from coremltools.models.neural_network import quantization_utils 4 | from coremltools.models.neural_network.quantization_utils import AdvancedQuantizedLayerSelector 5 | 6 | frozen_graph_file='./model/detector.pb' 7 | 8 | 9 | 10 | fp_16_file='./centernet.mlmodel' 11 | 12 | 13 | 14 | mlmodel = ct.convert(frozen_graph_file,inputs=[ct.ImageType()]) 15 | 16 | spec = mlmodel.get_spec() 17 | 18 | print(mlmodel) 19 | 20 | selector = AdvancedQuantizedLayerSelector( 21 | skip_layer_types=['batchnorm', 'depthwiseConv'], 22 | minimum_conv_kernel_channels=4, 23 | minimum_conv_weight_count=4096 24 | ) 25 | 26 | model_fp16 = quantization_utils.quantize_weights(mlmodel, nbits=16,quantization_mode='linear',selector=selector) 27 | 28 | model_fp16.save(fp_16_file) 29 | 30 | print(model_fp16) 31 | 32 | print('convert over, model was saved as ',fp_16_file) -------------------------------------------------------------------------------- /lib/core/model/net/resnet/backbone.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | from train_config import config as cfg 5 | 6 | from lib.core.model.net.resnet.resnet_v2 import resnet_v2_50,resnet_v2_18 7 | from lib.core.model.net.resnet.resnet_utils import resnet_arg_scope 8 | 9 | from lib.core.model.fpn.plain_fpn import create_fpn_net 10 | 11 | def resnet_ssd(image,is_training=True): 12 | 13 | arg_scope = resnet_arg_scope(weight_decay=cfg.TRAIN.weight_decay_factor) 14 | 15 | with tf.contrib.slim.arg_scope(arg_scope): 16 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 17 | _,endpoints = resnet_v2_18(image, is_training=is_training,global_pool=False,num_classes=None) 18 | 19 | for k, v in endpoints.items(): 20 | print('resnet backbone output:', k, v) 21 | 22 | resnet_fms=[endpoints['resnet_v2_50/block2'], 23 | endpoints['resnet_v2_50/block3'], 24 | endpoints['resnet_v2_50/block4']] 25 | 26 | 27 | 28 | 29 | return resnet_fms 30 | -------------------------------------------------------------------------------- /lib/core/model/fpn/plain_fpn.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | 7 | def create_fpn_net(blocks,dims_list): 8 | 9 | c3, c4, c5= blocks 10 | 11 | p5 = slim.conv2d(c5, dims_list[2], [1, 1],padding='SAME',scope='C5_reduced') 12 | p5_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p5) 13 | p5 = slim.conv2d(p5, dims_list[1], [3, 3],padding='SAME',scope='P5') 14 | 15 | p4 = slim.conv2d(c4, dims_list[1], [1, 1],padding='SAME',scope='C4_reduced') 16 | p4 = p4 + p5_upsampled 17 | p4_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p4) 18 | p4 = slim.conv2d(p4, dims_list[1], [3, 3],padding='SAME',scope='P4') 19 | 20 | p3 = slim.conv2d(c3, dims_list[0], [1, 1], padding='SAME', scope='C3_reduced') 21 | p3 = p3 + p4_upsampled 22 | p3 = slim.conv2d(p3, dims_list[1], [3, 3], padding='SAME', scope='P3') 23 | 24 | p6 = slim.conv2d(c5, dims_list[3], [3, 3], stride=2, scope='p6') 25 | p7 = slim.conv2d(p6, dims_list[4], [3, 3], stride=2, scope='p7') 26 | 27 | fpn_fms = [p3,p4,p5,p6,p7] 28 | for fm in fpn_fms: 29 | print(fm) 30 | return fpn_fms -------------------------------------------------------------------------------- /lib/core/model/fpn/seperateconv_fpn.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | 7 | def create_fpn_net(blocks,dims_list): 8 | 9 | c3, c4, c5= blocks 10 | 11 | p5 = slim.conv2d(c5, dims_list[2], [1, 1],padding='SAME',scope='C5_reduced') 12 | p5_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p5) 13 | p5 = slim.separable_conv2d(p5, dims_list[1], [3, 3],padding='SAME',scope='P5') 14 | 15 | p4 = slim.conv2d(c4, dims_list[1], [1, 1],padding='SAME',scope='C4_reduced') 16 | p4 = p4 + p5_upsampled 17 | p4_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p4) 18 | p4 = slim.separable_conv2d(p4, dims_list[1], [3, 3],padding='SAME',scope='P4') 19 | 20 | p3 = slim.conv2d(c3, dims_list[0], [1, 1], padding='SAME', scope='C3_reduced') 21 | p3 = p3 + p4_upsampled 22 | p3 = slim.separable_conv2d(p3, dims_list[1], [3, 3], padding='SAME', scope='P3') 23 | 24 | p6 = slim.separable_conv2d(c5, dims_list[3], [3, 3], stride=2, scope='p6') 25 | p7 = slim.separable_conv2d(p6, dims_list[4], [3, 3], stride=2, scope='p7') 26 | 27 | fpn_fms = [p3,p4,p5,p6,p7] 28 | for fm in fpn_fms: 29 | print(fm) 30 | return fpn_fms -------------------------------------------------------------------------------- /tools/auto_freeze.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import tensorflow as tf 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 5 | 6 | 7 | 8 | 9 | import argparse 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--pretrained_model", help="the trained file, end with .ckpt", 12 | type=str) 13 | args = parser.parse_args() 14 | pretrained_model=args.pretrained_model 15 | 16 | print(pretrained_model) 17 | 18 | command="python tools/centernet_for_freeze_bn.py --pretrained_model %s "%pretrained_model 19 | os.system(command) 20 | print('save ckpt with bn defaut False') 21 | 22 | 23 | 24 | 25 | #### freeze again 26 | model_folder = './model' 27 | checkpoint = tf.train.get_checkpoint_state(model_folder) 28 | 29 | ##input_checkpoint 30 | input_checkpoint = checkpoint.model_checkpoint_path 31 | ##input_graph 32 | input_meta_graph = input_checkpoint + '.meta' 33 | 34 | ##output_node_names 35 | output_node_names='tower_0/images,tower_0/detections' 36 | 37 | #output_graph 38 | output_graph='./model/detector.pb' 39 | 40 | print('excuted') 41 | 42 | command="python tools/freeze.py --input_checkpoint %s --input_meta_graph %s --output_node_names %s --output_graph %s"\ 43 | %(input_checkpoint,input_meta_graph,output_node_names,output_graph) 44 | os.system(command) 45 | 46 | 47 | print('detector.pb is saved with all feeeze') -------------------------------------------------------------------------------- /prepare_coco_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | from lib.dataset.coco_data import BoxInfo 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--mscocodir', type=str,default='../pubdata/mscoco', help='detect with coco or face',required=False) 10 | args = parser.parse_args() 11 | 12 | coco_dir=args.mscocodir 13 | 14 | train_im_path = os.path.join(coco_dir,'train2017') 15 | train_ann_path = os.path.join(coco_dir,'annotations/instances_train2017.json') 16 | val_im_path = os.path.join(coco_dir,'val2017') 17 | val_ann_path = os.path.join(coco_dir,'annotations/instances_val2017.json') 18 | 19 | 20 | 21 | train_data=BoxInfo(train_im_path,train_ann_path) 22 | 23 | 24 | fw = open('train.txt', 'w') 25 | for meta in train_data.metas: 26 | fname, boxes = meta.img_url, meta.bbox 27 | 28 | 29 | 30 | tmp_str = '' 31 | tmp_str =tmp_str+ fname+'|' 32 | 33 | for box in boxes: 34 | data = ' %d,%d,%d,%d,%d'%(box[0], box[1], box[2], box[3],box[4]) 35 | tmp_str=tmp_str+data 36 | if len(boxes) == 0: 37 | print(tmp_str) 38 | continue 39 | ####err box? 40 | if box[2] <= 0 or box[3] <= 0: 41 | pass 42 | else: 43 | fw.write(tmp_str + '\n') 44 | fw.close() 45 | 46 | 47 | 48 | 49 | 50 | 51 | val_data=BoxInfo(val_im_path,val_ann_path) 52 | 53 | fw = open('val.txt', 'w') 54 | for meta in val_data.metas: 55 | fname, boxes = meta.img_url, meta.bbox 56 | 57 | tmp_str = '' 58 | tmp_str = tmp_str + fname + '|' 59 | 60 | for box in boxes: 61 | data = ' %d,%d,%d,%d,%d' % (box[0], box[1], box[2], box[3], box[4]) 62 | tmp_str = tmp_str + data 63 | if len(boxes) == 0: 64 | print(tmp_str) 65 | continue 66 | ####err box? 67 | if box[2] <= 0 or box[3] <= 0: 68 | pass 69 | else: 70 | fw.write(tmp_str + '\n') 71 | fw.close() 72 | -------------------------------------------------------------------------------- /visulization/coco_id_map.py: -------------------------------------------------------------------------------- 1 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'), 2 | 6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'), 3 | 11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'), 4 | 16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'), 5 | 21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'), 6 | 26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'), 7 | 31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'), 8 | 35: (40, 'baseball glove'), 9 | 36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'), 10 | 40: (46, 'wine glass'), 11 | 41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'), 12 | 46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'), 13 | 51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'), 14 | 56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'), 15 | 61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'), 16 | 66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'), 17 | 71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'), 18 | 76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')} -------------------------------------------------------------------------------- /lib/core/model/net/arg_scope/resnet_args_cope.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow.contrib.slim as slim 3 | from tensorflow.contrib.slim import arg_scope 4 | from tensorflow.python.framework import ops 5 | from tensorflow.python.ops import nn_ops 6 | from tensorflow.contrib.layers.python.layers import regularizers, \ 7 | layers 8 | from train_config import config 9 | 10 | 11 | 12 | 13 | 14 | def resnet_arg_scope(bn_is_training, 15 | bn_trainable=True, 16 | trainable=True, 17 | weight_decay=config.TRAIN.weight_decay_factor, 18 | batch_norm_decay=0.997, 19 | batch_norm_scale=True, 20 | bn_method='BN', 21 | data_format='NHWC'): 22 | batch_norm_params = { 23 | 'is_training': bn_is_training, 24 | 'decay': batch_norm_decay, 25 | 'scale': batch_norm_scale, 26 | 'trainable': bn_trainable, 27 | 'updates_collections': ops.GraphKeys.UPDATE_OPS, 28 | 'fused':True 29 | } 30 | if 'BN' in bn_method: 31 | norm_func=slim.batch_norm 32 | norm_params=batch_norm_params 33 | elif 'None' in bn_method : 34 | norm_func = None 35 | norm_params = None 36 | 37 | with arg_scope( 38 | [slim.conv2d,slim.separable_conv2d,slim.conv2d_transpose], 39 | weights_regularizer=regularizers.l2_regularizer(weight_decay), 40 | weights_initializer=slim.xavier_initializer(), 41 | trainable=trainable, 42 | activation_fn=nn_ops.relu, 43 | normalizer_fn=norm_func, 44 | normalizer_params=norm_params, 45 | data_format=data_format,): 46 | with arg_scope( 47 | [layers.batch_norm,layers.max_pool2d], data_format=data_format): 48 | with arg_scope([layers.batch_norm], **batch_norm_params) as arg_sc: 49 | 50 | return arg_sc 51 | -------------------------------------------------------------------------------- /lib/core/anchor/nms.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from train_config import config as cfg 3 | 4 | def batch_non_max_suppression( 5 | boxes, scores,labels, 6 | score_threshold, iou_threshold, 7 | max_boxes): 8 | """ 9 | Arguments: 10 | boxes: a float tensor with shape [batch_size, N, 4]. 11 | scores: a float tensor with shape [batch_size, N]. 12 | score_threshold: a float number. 13 | iou_threshold: a float number, threshold for IoU. 14 | max_boxes: an integer, maximum number of retained boxes. 15 | Returns: 16 | boxes: a float tensor with shape [batch_size, max_boxes, 4]. 17 | scores: a float tensor with shape [batch_size, max_boxes]. 18 | num_detections: an int tensor with shape [batch_size]. 19 | """ 20 | def fn(x): 21 | boxes, scores,labels = x 22 | 23 | # low scoring boxes are removed 24 | ids = tf.where(tf.greater_equal(scores, score_threshold)) 25 | ids = tf.squeeze(ids, axis=1) 26 | boxes = tf.gather(boxes, ids) 27 | scores = tf.gather(scores, ids) 28 | labels = tf.gather(labels, ids) 29 | selected_indices = tf.image.non_max_suppression( 30 | boxes, scores, max_boxes, iou_threshold 31 | ) 32 | boxes = tf.gather(boxes, selected_indices) 33 | scores = tf.gather(scores, selected_indices) 34 | labels = tf.gather(labels, selected_indices) 35 | num_boxes = tf.to_int32(tf.shape(boxes)[0]) 36 | 37 | zero_padding = max_boxes - num_boxes 38 | boxes = tf.pad(boxes, [[0, zero_padding], [0, 0]]) 39 | scores = tf.pad(scores, [[0, zero_padding]]) 40 | labels = tf.pad(labels, [[0, zero_padding]],constant_values=-1) 41 | 42 | boxes.set_shape([max_boxes, 4]) 43 | scores.set_shape([max_boxes]) 44 | labels.set_shape([max_boxes]) 45 | return boxes, scores,labels, num_boxes 46 | 47 | boxes, scores, labels, num_detections = tf.map_fn( 48 | fn, [boxes, scores,labels], 49 | dtype=(tf.float32, tf.float32,tf.int64, tf.int32), 50 | parallel_iterations=cfg.TEST.parallel_iterations, 51 | back_prop=False, swap_memory=False, infer_shape=True 52 | ) 53 | return boxes, scores,labels, num_detections 54 | 55 | -------------------------------------------------------------------------------- /lib/core/model/net/mobilenet/backbone.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | from train_config import config as cfg 5 | 6 | from lib.core.model.net.mobilenet.mobilenet_v2 import mobilenet_v2_050,mobilenet_v2_035,mobilenet_v2_025 7 | from lib.core.model.net.mobilenet.mobilenet import training_scope 8 | 9 | 10 | from lib.core.model.net.arg_scope.resnet_args_cope import resnet_arg_scope 11 | 12 | 13 | 14 | 15 | def create_fpn_net(blocks,dims_list): 16 | 17 | of1, of2, of3= blocks 18 | 19 | # lateral2 = slim.conv2d(of2, dims_list[1], [1, 1], 20 | # padding='SAME', 21 | # scope='lateral/res{}'.format(2)) 22 | # 23 | # upsample2_of3 = slim.conv2d(of3, dims_list[1], [1, 1], 24 | # padding='SAME', 25 | # scope='merge/res{}'.format(2)) 26 | # upsample2 = tf.keras.layers.UpSampling2D(data_format='channels_last' )(upsample2_of3) 27 | 28 | # fem_2 = lateral2 + upsample2 29 | 30 | lateral1 = slim.conv2d(of1, dims_list[0], [1, 1], 31 | padding='SAME', 32 | scope='lateral/res{}'.format(1)) 33 | 34 | upsample1_of2 = slim.conv2d(of2, dims_list[0], [1, 1], 35 | padding='SAME', 36 | scope='merge/res{}'.format(1)) 37 | upsample1 = tf.keras.layers.UpSampling2D(data_format='channels_last')(upsample1_of2) 38 | 39 | fem_1 = lateral1 + upsample1 40 | 41 | #####enhance model 42 | fpn_fms = [fem_1, upsample1_of2, of3] 43 | 44 | return fpn_fms 45 | 46 | def mobilenet_ssd(image,L2_reg,is_training=True): 47 | 48 | arg_scope = training_scope(weight_decay=L2_reg, is_training=is_training) 49 | 50 | with tf.contrib.slim.arg_scope(arg_scope): 51 | _,endpoint = mobilenet_v2_035(image,is_training=is_training,base_only=True,finegrain_classification_mode=False) 52 | 53 | for k,v in endpoint.items(): 54 | print('mobile backbone output:',k,v) 55 | 56 | mobilebet_fms=[ 57 | endpoint['layer_8/expansion_output'], 58 | endpoint['layer_15/expansion_output'], 59 | endpoint['layer_18/output']] 60 | 61 | if cfg.MODEL.fpn: 62 | mobilebet_fms=create_fpn_net(mobilebet_fms,dims_list=cfg.MODEL.fpn_dims) 63 | 64 | return mobilebet_fms 65 | -------------------------------------------------------------------------------- /model_eval/xml_2_coco.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | ####transform from xml to json 3 | 4 | import os 5 | import xml.etree.cElementTree as et 6 | import json 7 | import argparse 8 | import shutil 9 | import traceback 10 | import random 11 | import numpy as np 12 | import cv2 13 | 14 | def GetFileList(dir, fileList): 15 | newDir = dir 16 | if os.path.isfile(dir): 17 | fileList.append(dir) 18 | elif os.path.isdir(dir): 19 | for s in os.listdir(dir): 20 | #如果需要忽略某些文件夹,使用以下代码 21 | # if s == "pts": 22 | # continue 23 | newDir=os.path.join(dir,s) 24 | GetFileList(newDir, fileList) 25 | return fileList 26 | 27 | 28 | # load train/val split used in the training 29 | annotation_path = './val.txt' 30 | 31 | 32 | with open(annotation_path) as f: 33 | lines = f.readlines() 34 | 35 | # initialize the json data for the dataset 36 | data = {} 37 | cls_person = 0 38 | 39 | test_data = {} 40 | test_data['licenses'] = [] 41 | test_data['info'] = [] 42 | test_data['categories'] = [{'id': cls_person, 'name': 'person', 'supercategory': 'person'}] 43 | test_data['images'] = [] 44 | test_data['annotations'] = [] 45 | 46 | # process xml files 47 | counter=1 48 | anno_id = 0 49 | img_id = 0 50 | for line in lines: 51 | counter+=1 52 | if counter%1000==0: 53 | print('%d/%d images processed'%(counter, len(lines))) 54 | try: 55 | 56 | file_str,label = line.rstrip().rsplit('| ') 57 | 58 | labels = label.split(' ') 59 | boxes = [] 60 | 61 | for label in labels: 62 | 63 | bbox = np.array(label.split(','), dtype=np.float) 64 | boxes.append([bbox[0], bbox[1], bbox[2], bbox[3], bbox[4]]) 65 | 66 | 67 | #file_name = root.find('filename').text 68 | file_name = file_str 69 | image_id = img_id 70 | 71 | img=cv2.imread(file_name) 72 | img_height,img_width,_=img.shape 73 | 74 | 75 | img_entry = {'file_name': file_name, 'id': image_id, 'height': img_height, 'width': img_width} 76 | test_data['images'].append(img_entry) 77 | 78 | img_id += 1 79 | 80 | for box in boxes: 81 | 82 | xmin = int(box[0]) 83 | ymin = int(box[1]) 84 | xmax = int(box[2]) 85 | ymax = int(box[3]) 86 | 87 | anno_entry = {'image_id': image_id, 'category_id': cls_person, 'id': anno_id,\ 88 | 'iscrowd': 0, 'area': int(xmax-xmin) * int(ymax-ymin),\ 89 | 'bbox': [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]} 90 | test_data['annotations'].append(anno_entry) 91 | 92 | anno_id += 1 93 | except Exception as ex: 94 | msg = "err:%s" % ex 95 | print(msg) 96 | traceback.print_exc() 97 | 98 | 99 | with open('./model_eval/DatasetTest_cocoStyle.json', 'w') as outfile: 100 | json.dump(test_data, outfile) -------------------------------------------------------------------------------- /xml_2_txt.py: -------------------------------------------------------------------------------- 1 | import xml.etree.cElementTree as et # 读取xml文件的包 2 | import os 3 | 4 | def GetFileList(dir, fileList): 5 | newDir = dir 6 | if os.path.isfile(dir): 7 | fileList.append(dir) 8 | elif os.path.isdir(dir): 9 | for s in os.listdir(dir): 10 | # if s == "pts": 11 | # continue 12 | newDir=os.path.join(dir,s) 13 | GetFileList(newDir, fileList) 14 | return fileList 15 | 16 | 17 | data_dir1='./data1209' 18 | data_dir2='./data1203' 19 | ratio=0.9 20 | 21 | xml_list1=[] 22 | GetFileList(data_dir1,xml_list1) 23 | xml_list1=[x for x in xml_list1 if 'xml' in x] 24 | 25 | xml_list2=[] 26 | GetFileList(data_dir2,xml_list2) 27 | xml_list2=[x for x in xml_list2 if 'xml' in x] 28 | 29 | xml_list=xml_list1+xml_list2 30 | 31 | 32 | xml_list=list(set(xml_list)) 33 | train_list=xml_list[:int(len(xml_list)*ratio)] 34 | val_list=xml_list[int(len(xml_list)*ratio):] 35 | 36 | train_file=open('train.txt',mode='w') 37 | val_file=open('val.txt',mode='w') 38 | 39 | 40 | for xml_name in train_list: 41 | try: 42 | tree = et.parse(xml_name) 43 | except: 44 | print(xml_name,'err') 45 | continue 46 | root = tree.getroot() # 使用getroot()获取根节点,得到的是一个Element对象 47 | 48 | img_name=root.find('filename').text 49 | 50 | print(img_name) 51 | tmp_str='' 52 | img_path=xml_name.replace('.xml','.jpg') 53 | tmp_str+=img_path+'|' 54 | 55 | 56 | obj=root.find('object') 57 | 58 | 59 | label=obj.find('name').text 60 | 61 | if label=='qrcode': 62 | 63 | xml_box = obj.find('bndbox') 64 | xmin = (int(float(xml_box.find('xmin').text)) ) 65 | ymin = (int(float(xml_box.find('ymin').text)) ) 66 | xmax = (int(float(xml_box.find('xmax').text)) ) 67 | ymax = (int(float(xml_box.find('ymax').text)) ) 68 | 69 | tmp_str+=' %d,%d,%d,%d,%d'%(xmin,ymin,xmax,ymax,1) 70 | 71 | tmp_str+='\n' 72 | 73 | train_file.write(tmp_str) 74 | 75 | train_file.close() 76 | 77 | for xml_name in val_list: 78 | try: 79 | tree = et.parse(xml_name) 80 | except: 81 | continue 82 | root = tree.getroot() # 使用getroot()获取根节点,得到的是一个Element对象 83 | 84 | img_name = root.find('filename').text 85 | 86 | tmp_str = '' 87 | img_path=xml_name.replace('.xml','.jpg') 88 | tmp_str += img_path + '|' 89 | 90 | obj = root.find('object') 91 | label = obj.find('name').text 92 | 93 | if label == 'qrcode': 94 | xml_box = obj.find('bndbox') 95 | xmin = (int(float(xml_box.find('xmin').text))) 96 | ymin = (int(float(xml_box.find('ymin').text))) 97 | xmax = (int(float(xml_box.find('xmax').text))) 98 | ymax = (int(float(xml_box.find('ymax').text)) ) 99 | 100 | 101 | 102 | tmp_str += ' %d,%d,%d,%d,%d' % (xmin, ymin, xmax, ymax, 1) 103 | 104 | tmp_str += '\n' 105 | 106 | val_file.write(tmp_str) 107 | 108 | val_file.close() 109 | -------------------------------------------------------------------------------- /visulization/vis_with_coreml.py: -------------------------------------------------------------------------------- 1 | # Copyright @ 2019 Alibaba. All rights reserved. 2 | # Created by ruhuan on 2019.09.09 3 | """ python demo usage about MNN API """ 4 | import sys 5 | sys.path.append('.') 6 | from train_config import config as cfg 7 | import tfcoreml 8 | import coremltools 9 | import cv2 10 | import numpy as np 11 | import os 12 | import PIL.Image 13 | from visulization.coco_id_map import coco_map 14 | from train_config import config as cfg 15 | 16 | def preprocess( image, target_height, target_width, label=None): 17 | ###sometimes use in objs detects 18 | h, w, c = image.shape 19 | 20 | bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype) 21 | 22 | scale_y = target_height / h 23 | scale_x = target_width / w 24 | 25 | scale = min(scale_x, scale_y) 26 | 27 | image = cv2.resize(image, None, fx=scale, fy=scale) 28 | 29 | h_, w_, _ = image.shape 30 | 31 | dx = (target_width - w_) // 2 32 | dy = (target_height - h_) // 2 33 | bimage[dy:h_ + dy, dx:w_ + dx, :] = image 34 | 35 | return bimage, scale, scale, dx, dy 36 | 37 | def inference(model_path,img_dir,thres=0.3): 38 | """ inference mobilenet_v1 using a specific picture """ 39 | centernet_model =coremltools.models.MLModel(model_path) 40 | 41 | 42 | img_list=os.listdir(img_dir) 43 | for pic in img_list: 44 | image = cv2.imread(os.path.join(img_dir,pic)) 45 | #cv2 read as bgr format #change to rgb format 46 | image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) 47 | 48 | image,_,_,_,_ = preprocess(image,target_height=cfg.DATA.hin,target_width=cfg.DATA.win) 49 | 50 | image_show=image.copy() 51 | 52 | image = image.astype(np.uint8) 53 | pil_img = PIL.Image.fromarray(image) 54 | 55 | coreml_inputs = {'tower_0/images': pil_img} 56 | 57 | coreml_outputs = centernet_model.predict(coreml_inputs, useCPUOnly=True) 58 | 59 | boxes=coreml_outputs['tower_0/detections'] 60 | 61 | boxes=boxes[0] 62 | 63 | for i in range(len(boxes)): 64 | bbox = boxes[i] 65 | 66 | if bbox[4]>thres: 67 | 68 | cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), 69 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) 70 | 71 | str_draw = '%s:%.2f' % (coco_map[int(bbox[5])%80][1], bbox[4]) 72 | cv2.putText(image_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, 73 | (255, 0, 255), 2) 74 | 75 | cv2.imshow('coreml result',image_show) 76 | cv2.waitKey(0) 77 | 78 | if __name__ == "__main__": 79 | 80 | import argparse 81 | 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument('--coreml_model', type=str, default='./centernet.mlmodel', help='the mnn model ', required=False) 84 | parser.add_argument('--imgDir', type=str, default='../pubdata/mscoco/val2017', help='the image dir to detect') 85 | parser.add_argument('--thres', type=float, default=0.3, help='the thres for detect') 86 | args = parser.parse_args() 87 | 88 | data_dir = args.imgDir 89 | model_path=args.coreml_model 90 | thres=args.thres 91 | inference(model_path,data_dir,thres) 92 | -------------------------------------------------------------------------------- /visulization/vis_with_mnn.py: -------------------------------------------------------------------------------- 1 | # Copyright @ 2019 Alibaba. All rights reserved. 2 | # Created by ruhuan on 2019.09.09 3 | """ python demo usage about MNN API """ 4 | import sys 5 | sys.path.append('.') 6 | 7 | import numpy as np 8 | import MNN 9 | import cv2 10 | import os 11 | 12 | from visulization.coco_id_map import coco_map 13 | from train_config import config as cfg 14 | 15 | def preprocess( image, target_height, target_width, label=None): 16 | ###sometimes use in objs detects 17 | h, w, c = image.shape 18 | 19 | bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype) 20 | 21 | scale_y = target_height / h 22 | scale_x = target_width / w 23 | 24 | scale = min(scale_x, scale_y) 25 | 26 | image = cv2.resize(image, None, fx=scale, fy=scale) 27 | 28 | h_, w_, _ = image.shape 29 | 30 | dx = (target_width - w_) // 2 31 | dy = (target_height - h_) // 2 32 | bimage[dy:h_ + dy, dx:w_ + dx, :] = image 33 | 34 | return bimage, scale, scale, dx, dy 35 | 36 | 37 | 38 | def inference(mnn_model_path,img_dir,thres=0.3): 39 | """ inference mobilenet_v1 using a specific picture """ 40 | interpreter = MNN.Interpreter(mnn_model_path) 41 | session = interpreter.createSession() 42 | input_tensor = interpreter.getSessionInput(session) 43 | 44 | img_list=os.listdir(img_dir) 45 | for pic in img_list: 46 | image = cv2.imread(os.path.join(img_dir,pic)) 47 | #cv2 read as bgr format 48 | image = image[..., ::-1] 49 | #change to rgb format 50 | 51 | image,_,_,_,_ = preprocess(image,target_height=cfg.DATA.hin,target_width=cfg.DATA.win) 52 | image_show=image.copy() 53 | 54 | image = image.astype(np.float32) 55 | 56 | tmp_input = MNN.Tensor((1, cfg.DATA.hin, cfg.DATA.win,3 ), MNN.Halide_Type_Float,\ 57 | image, MNN.Tensor_DimensionType_Tensorflow) 58 | #construct tensor from np.ndarray 59 | input_tensor.copyFrom(tmp_input) 60 | 61 | ### caution!!!!!!!!!!!!!!!! the model is nhwc 62 | 63 | interpreter.resizeSession(session) 64 | interpreter.runSession(session) 65 | 66 | output_tensor = interpreter.getSessionOutputAll(session) 67 | 68 | boxes=output_tensor['tower_0/concat_1'].getData() 69 | print(boxes) 70 | boxes=np.reshape(boxes,newshape=[100,6]) 71 | print(boxes.shape) 72 | for i in range(len(boxes)): 73 | bbox = boxes[i] 74 | print(bbox) 75 | if bbox[4]>thres: 76 | 77 | 78 | 79 | cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), 80 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) 81 | str_draw = '%s:%.2f' % (coco_map[int(bbox[5])][1], bbox[4]) 82 | cv2.putText(image_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, 83 | (255, 0, 255), 2) 84 | 85 | cv2.imshow('mnn result',image_show) 86 | cv2.waitKey(0) 87 | 88 | if __name__ == "__main__": 89 | 90 | import argparse 91 | 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument('--mnn_model', type=str, default='./centernet.mnn', help='the mnn model ', required=False) 94 | parser.add_argument('--imgDir', type=str, default='../pubdata/mscoco/val2017', help='the image dir to detect') 95 | parser.add_argument('--thres', type=float, default=0.3, help='the thres for detect') 96 | args = parser.parse_args() 97 | 98 | data_dir = args.imgDir 99 | model_path=args.mnn_model 100 | thres=args.thres 101 | inference(model_path,data_dir,thres) 102 | -------------------------------------------------------------------------------- /configs/face/face_mbv3_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 2 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 20 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 24 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=13000 ###widerface train size 22 | config.TRAIN.val_set_size=3000 ###widerface val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.0001,0.001,0.01,0.001,0.00001,0.0000025] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [500,1000,40000,50000,60000] 29 | config.TRAIN.lr_decay='cos' 30 | config.TRAIN.opt='adam' 31 | config.TRAIN.weight_decay_factor = 1.e-5 ##l2 regular 32 | config.TRAIN.vis=False ##check data flag 33 | config.TRAIN.mix_precision=False 34 | config.TRAIN.gradient_clip=False 35 | 36 | 37 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 38 | config.TRAIN.lock_basenet_bn=False 39 | config.TRAIN.frozen_stages=-1 ##no freeze 40 | 41 | config.DATA = edict() 42 | config.DATA.root_path='' 43 | config.DATA.train_txt_path='train.txt' 44 | config.DATA.val_txt_path='val.txt' 45 | config.DATA.num_category=1 ###face 1 voc 20 coco 80 46 | config.DATA.num_class = config.DATA.num_category # +1 background 47 | 48 | config.DATA.PIXEL_MEAN = [127.] ###rgb 49 | config.DATA.PIXEL_STD = [127.] 50 | 51 | config.DATA.hin = 512 # input size 52 | config.DATA.win = 512 53 | config.DATA.channel = 3 54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 55 | config.DATA.cover_obj=6 ###cover the small objs 56 | config.DATA.max_objs=1333 57 | 58 | 59 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 60 | config.DATA.scales=(320,640) 61 | config.DATA.use_int8_data=True ### we use uint8 data to decrease memery access to speed up 62 | config.DATA.use_int8_enlarge=255. 63 | config.DATA.cracy_crop=0.3 64 | config.DATA.alpha=0.54*2 65 | config.DATA.beta=0.54 66 | ##mobilenetv3 as basemodel 67 | config.MODEL = edict() 68 | config.MODEL.continue_train=False ### revover from a trained model 69 | config.MODEL.model_path = './model/' # save directory 70 | config.MODEL.net_structure='MobilenetV3' ######'resnet_v1_50,resnet_v1_101,MobilenetV2 71 | config.MODEL.pretrained_model='./v3-small-minimalistic_224_1.0_float/ema/model-498000' 72 | config.MODEL.task='face' 73 | config.MODEL.min_overlap=0.6 74 | config.MODEL.max_box= 1333 75 | 76 | config.MODEL.global_stride=4 77 | 78 | config.MODEL.head_dims=[32,32,32,32] 79 | config.MODEL.prehead_dims=[96,48] 80 | 81 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 82 | if config.MODEL.deployee: 83 | config.TRAIN.batch_size = 1 84 | config.TRAIN.lock_basenet_bn=True 85 | 86 | -------------------------------------------------------------------------------- /configs/mscoco/mbv3_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 4 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 50 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 16 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=117266 ###widerface train size 22 | config.TRAIN.val_set_size=5000 ###widerface val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.001,0.0001,0.00001,0.000001] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [500,1000,300000,400000,450000] 29 | config.TRAIN.lr_decay_every_step = [int(x//config.TRAIN.num_gpu) for x in config.TRAIN.lr_decay_every_step] 30 | 31 | config.TRAIN.lr_decay='step' 32 | 33 | config.TRAIN.opt='adam' 34 | config.TRAIN.weight_decay_factor = 1.e-5 ##l2 regular 35 | config.TRAIN.vis=False ##check data flag 36 | config.TRAIN.mix_precision=False 37 | 38 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 39 | config.TRAIN.lock_basenet_bn=False 40 | config.TRAIN.frozen_stages=-1 ##no freeze 41 | config.TRAIN.gradient_clip=False 42 | 43 | config.DATA = edict() 44 | config.DATA.root_path='' 45 | config.DATA.train_txt_path='train.txt' 46 | config.DATA.val_txt_path='val.txt' 47 | config.DATA.num_category=80 ###face 1 voc 20 coco 80 48 | config.DATA.num_class = config.DATA.num_category 49 | 50 | 51 | config.DATA.hin = 512 # input size 52 | config.DATA.win = 512 53 | config.DATA.channel = 3 54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 55 | config.DATA.cover_obj=8 ###cover the small objs 56 | 57 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 58 | config.DATA.scales=(320,640) 59 | config.DATA.use_int8_data=True 60 | config.DATA.use_int8_enlarge=255. ### use uint8 for heatmap generate for less memory acc, to speed up 61 | config.DATA.max_objs=128 62 | config.DATA.cracy_crop=0.3 63 | config.DATA.alpha=0.54 64 | config.DATA.beta=0.54 65 | ##mobilenetv3 as basemodel 66 | config.MODEL = edict() 67 | config.MODEL.continue_train=False ### revover from a trained model 68 | config.MODEL.model_path = './model/' # save directory 69 | config.MODEL.net_structure='MobilenetV3' 70 | config.MODEL.size=0.75 71 | config.MODEL.pretrained_model='./v3-large_224_0.75_float/ema/model-220000' 72 | config.MODEL.task='mscoco' 73 | config.MODEL.min_overlap=0.7 74 | config.MODEL.max_box= 100 75 | 76 | config.MODEL.global_stride=4 77 | config.MODEL.head_dims=[256,192,128] 78 | config.MODEL.prehead_dims=[128,48] ##no pre head 79 | 80 | 81 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 82 | if config.MODEL.deployee: 83 | config.TRAIN.batch_size = 1 84 | config.TRAIN.lock_basenet_bn=True 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /configs/face/face_shufflenet_5x5_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 2 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 20 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 32 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=13000 ###widerface train size 22 | config.TRAIN.val_set_size=3000 ###widerface val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.0001,0.001,0.01,0.001,0.00001,0.0000025] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [500,1000,60000,80000,100000] 29 | config.TRAIN.lr_decay='cos' 30 | config.TRAIN.opt='adam' 31 | config.TRAIN.weight_decay_factor = 1.e-4 ##l2 regular 32 | config.TRAIN.vis=False ##check data flag 33 | config.TRAIN.mix_precision=False 34 | config.TRAIN.gradient_clip=False 35 | 36 | 37 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 38 | config.TRAIN.lock_basenet_bn=False 39 | config.TRAIN.frozen_stages=-1 ##no freeze 40 | 41 | config.DATA = edict() 42 | config.DATA.root_path='' 43 | config.DATA.train_txt_path='train.txt' 44 | config.DATA.val_txt_path='val.txt' 45 | config.DATA.num_category=1 ###face 1 voc 20 coco 80 46 | config.DATA.num_class = config.DATA.num_category # +1 background 47 | 48 | config.DATA.PIXEL_MEAN = [127.] ###rgb 49 | config.DATA.PIXEL_STD = [127.] 50 | 51 | config.DATA.hin = 384 # input size 52 | config.DATA.win = 384 53 | config.DATA.channel = 3 54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 55 | config.DATA.cover_obj=6 ###cover the small objs 56 | config.DATA.max_objs=1333 57 | 58 | 59 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 60 | config.DATA.scales=(320,640) 61 | config.DATA.use_int8_data=True ### we use uint8 data to decrease memery access to speed up 62 | config.DATA.use_int8_enlarge=255. 63 | config.DATA.cracy_crop=0.3 64 | config.DATA.alpha=0.54*2 65 | config.DATA.beta=0.54*2 66 | ##mobilenetv3 as basemodel 67 | config.MODEL = edict() 68 | config.MODEL.continue_train=False ### revover from a trained model 69 | config.MODEL.model_path = './model/' # save directory 70 | config.MODEL.net_structure='ShuffleNetV2_5x5' ######'resnet_v1_50,resnet_v1_101,MobilenetV2 71 | config.MODEL.size='0.5x' 72 | config.MODEL.pretrained_model='./model/cls_for_convert.ckpt' 73 | config.MODEL.task='face' 74 | config.MODEL.min_overlap=0.6 75 | config.MODEL.max_box= 1333 76 | 77 | config.MODEL.global_stride=4 78 | 79 | config.MODEL.head_dims=[96,48,32] 80 | config.MODEL.prehead_dims=[128,48] ##no pre head 81 | 82 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 83 | if config.MODEL.deployee: 84 | config.TRAIN.batch_size = 1 85 | config.TRAIN.lock_basenet_bn=True 86 | 87 | -------------------------------------------------------------------------------- /configs/mscoco/shufflenetplus_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 3 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 20 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 16 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=117266 ###widerface train size 22 | config.TRAIN.val_set_size=5000 ###widerface val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.00025,0.000025,0.0000025,0.00000025] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [200,400,200000,300000,400000] 29 | 30 | config.TRAIN.lr_decay='cos' 31 | config.TRAIN.opt='adam' 32 | config.TRAIN.weight_decay_factor = 1.e-4 ##l2 regular 33 | config.TRAIN.vis=False ##check data flag 34 | config.TRAIN.mix_precision=False 35 | 36 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 37 | config.TRAIN.lock_basenet_bn=False 38 | config.TRAIN.frozen_stages=-1 ##no freeze 39 | config.TRAIN.gradient_clip=False 40 | 41 | config.DATA = edict() 42 | config.DATA.root_path='' 43 | config.DATA.train_txt_path='train.txt' 44 | config.DATA.val_txt_path='val.txt' 45 | config.DATA.num_category=80 ###face 1 voc 20 coco 80 46 | config.DATA.num_class = config.DATA.num_category 47 | 48 | config.DATA.PIXEL_MEAN = [127.] ###rgb 49 | config.DATA.PIXEL_STD = [127.] 50 | 51 | config.DATA.hin = 520 # input size 52 | config.DATA.win = 520 53 | config.DATA.channel = 3 54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 55 | config.DATA.cover_obj=4 ###cover the small objs 56 | 57 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 58 | config.DATA.scales=(320,640) 59 | config.DATA.use_int8_data=True 60 | config.DATA.use_int8_enlarge=255. ### use uint8 for heatmap generate for less memory acc, to speed up 61 | config.DATA.max_objs=128 62 | config.DATA.cracy_crop=0.5 63 | config.DATA.alpha=0.54 64 | config.DATA.beta=0.54 65 | 66 | 67 | ##mobilenetv3 as basemodel 68 | config.MODEL = edict() 69 | config.MODEL.continue_train=False ### revover from a trained model 70 | config.MODEL.model_path = './model/' # save directory 71 | config.MODEL.net_structure='ShuffleNetV2_Plus' ######'resnet_v1_50,resnet_v1_101,MobilenetV2 72 | config.MODEL.size='Small' 73 | config.MODEL.pretrained_model=None#'ShuffleNetV2+Small/ShuffleNetV2+Small.ckpt' 74 | config.MODEL.task='mscoco' 75 | config.MODEL.min_overlap=0.7 76 | config.MODEL.max_box= 100 77 | config.MODEL.offset= True 78 | config.MODEL.global_stride=4 79 | config.MODEL.head_dims=[64*3,64*3,32*3] 80 | 81 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 82 | if config.MODEL.deployee: 83 | config.TRAIN.batch_size = 1 84 | config.TRAIN.lock_basenet_bn=True 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /configs/mscoco/shufflenet_5x5_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 3 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 50 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 16 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=117266 ###widerface train size 22 | config.TRAIN.val_set_size=5000 ###widerface val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.001,0.0001,0.00001,0.000001] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [500,1000,300000,400000,500000] 29 | config.TRAIN.lr_decay_every_step = [int(x//config.TRAIN.num_gpu) for x in config.TRAIN.lr_decay_every_step] 30 | 31 | 32 | config.TRAIN.lr_decay='step' 33 | config.TRAIN.opt='adam' 34 | config.TRAIN.weight_decay_factor = 1.e-5 ##l2 regular 35 | config.TRAIN.vis=True 36 | ##check data flag 37 | config.TRAIN.mix_precision=False 38 | 39 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 40 | config.TRAIN.lock_basenet_bn=False 41 | config.TRAIN.frozen_stages=-1 ##no freeze 42 | config.TRAIN.gradient_clip=False 43 | 44 | config.DATA = edict() 45 | config.DATA.root_path='' 46 | config.DATA.train_txt_path='train.txt' 47 | config.DATA.val_txt_path='val.txt' 48 | config.DATA.num_category=80 ###face 1 voc 20 coco 80 49 | config.DATA.num_class = config.DATA.num_category 50 | 51 | config.DATA.PIXEL_MEAN = [127.] ###rgb 52 | config.DATA.PIXEL_STD = [127.] 53 | 54 | config.DATA.hin = 416 # input size 55 | config.DATA.win = 416 56 | config.DATA.channel = 3 57 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 58 | config.DATA.cover_obj=4 ###cover the small objs 59 | 60 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 61 | config.DATA.scales=(320,640) 62 | config.DATA.use_int8_data=True 63 | config.DATA.use_int8_enlarge=255. ### use uint8 for heatmap generate for less memory acc, to speed up 64 | config.DATA.max_objs=128 65 | config.DATA.cracy_crop=0.3 66 | config.DATA.alpha=0.54 67 | config.DATA.beta=0.54 68 | 69 | 70 | ##mobilenetv3 as basemodel 71 | config.MODEL = edict() 72 | config.MODEL.continue_train=False ### revover from a trained model 73 | config.MODEL.model_path = './model/' # save directory 74 | config.MODEL.net_structure='ShuffleNetV2_5x5' ######'resnet_v1_50,resnet_v1_101,MobilenetV2 75 | config.MODEL.size='1.0x' 76 | config.MODEL.pretrained_model=None 77 | config.MODEL.task='mscoco' 78 | config.MODEL.min_overlap=0.7 79 | config.MODEL.max_box= 100 80 | config.MODEL.offset= True 81 | config.MODEL.global_stride=4 82 | 83 | config.MODEL.head_dims=[192,160,128] 84 | config.MODEL.prehead_dims=[128,48] ##no pre head 85 | 86 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 87 | if config.MODEL.deployee: 88 | config.TRAIN.batch_size = 1 89 | config.TRAIN.lock_basenet_bn=True 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /lib/core/model/net/mobilenetv3/backbone.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | from train_config import config as cfg 5 | 6 | from lib.core.model.net.mobilenetv3 import mobilnet_v3 7 | from lib.core.model.net.mobilenet.mobilenet import training_scope 8 | from lib.core.model.net.mobilenetv3.mobilnet_v3 import hard_swish 9 | 10 | def mobilenetv3_large_detection(image,is_training=True): 11 | 12 | arg_scope = training_scope(weight_decay=cfg.TRAIN.weight_decay_factor, is_training=is_training) 13 | 14 | with tf.contrib.slim.arg_scope(arg_scope): 15 | 16 | _, endpoints = mobilnet_v3.large(image, 17 | depth_multiplier=cfg.MODEL.size, 18 | is_training=is_training, 19 | base_only=True, 20 | finegrain_classification_mode=False) 21 | 22 | for k,v in endpoints.items(): 23 | print('mobile backbone output:',k,v) 24 | 25 | extern_conv = slim.conv2d(_, 26 | 480, 27 | [1, 1], 28 | stride=1, 29 | padding='SAME', 30 | activation_fn=hard_swish, 31 | scope='extern1') 32 | 33 | print(extern_conv) 34 | mobilebet_fms = [endpoints['layer_5/expansion_output'], 35 | endpoints['layer_7/expansion_output'], 36 | endpoints['layer_13/output'], 37 | extern_conv] 38 | 39 | return mobilebet_fms 40 | 41 | 42 | def mobilenetv3_small_minimalistic(image,is_training=True): 43 | 44 | arg_scope = training_scope(weight_decay=cfg.TRAIN.weight_decay_factor, is_training=is_training) 45 | 46 | with tf.contrib.slim.arg_scope(arg_scope): 47 | if cfg.DATA.channel==1: 48 | if cfg.MODEL.global_stride==8: 49 | stride=2 50 | else: 51 | stride=1 52 | image = slim.separable_conv2d(image, 53 | 3, 54 | [3, 3], 55 | stride=stride, 56 | padding='SAME', 57 | scope='preconv') 58 | 59 | final_feature, endpoints = mobilnet_v3.small_minimalistic(image, 60 | depth_multiplier=1.0, 61 | is_training=is_training, 62 | base_only=True, 63 | finegrain_classification_mode=False) 64 | 65 | extern_conv=slim.separable_conv2d(final_feature, 128, 66 | [3, 3], 67 | stride=2, 68 | padding='SAME', 69 | scope='extern1') 70 | extern_conv = slim.separable_conv2d(extern_conv, 96, 71 | [3, 3], 72 | padding='SAME', 73 | scope='extern2') 74 | extern_conv = slim.separable_conv2d(extern_conv, 128, 75 | [3, 3], 76 | padding='SAME', 77 | scope='extern3') 78 | 79 | 80 | for k,v in endpoints.items(): 81 | print('mobile backbone output:',k,v) 82 | 83 | mobilebet_fms=[endpoints['layer_3/expansion_output'], 84 | endpoints['layer_5/expansion_output'], 85 | endpoints['layer_9/expansion_output'], 86 | #final_feature, 87 | extern_conv] 88 | 89 | 90 | return mobilebet_fms 91 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mobilenetv3_centernet 2 | 3 | 4 | 5 | 6 | ### there is a [pytorch version](https://github.com/610265158/mobile_centernet), trained with mobilenetv2, it is more simple. 7 | 8 | ## introduction 9 | 10 | This is a tensorflow implement mobilenetv3-centernet framework, 11 | which can be easily deployeed on Android(MNN) and IOS(CoreML) mobile devices, end to end. 12 | 13 | Purpose: Light detection algorithms that work on mobile devices is widely used, 14 | such as face detection. 15 | So there is an easy project contains model training and model converter. 16 | 17 | ** contact me if u have question 2120140200@mail.nankai.edu.cn ** 18 | 19 | 20 | 21 | ## pretrained model , and preformance 22 | 23 | ### mscoco 24 | 25 | no test time augmentation. 26 | | model |input_size |map | map@0.5|map@0.75| 27 | | :------: |:------: |:------: |:------: |:------: | 28 | |[mbv3-large-0.75-modified_head](https://drive.google.com/drive/folders/13zvokhOmfSexXNt6fDeFvjedllvLMJfZ?usp=sharing) |512x512 | 0.251| 0.423|0.258 | 29 | 30 | 31 | ## requirment 32 | 33 | + tensorflow 1.14 34 | 35 | + tensorpack 0.9.9 (for data provider) 36 | 37 | + opencv 38 | 39 | + python 3.6 40 | 41 | + MNNConverter 42 | 43 | + coremltools 44 | 45 | ## useage 46 | 47 | ### MSCOCO 48 | 49 | #### train 50 | 1. download mscoco data, then run `python prepare_coco_data.py --mscocodir ./mscoco` 51 | 52 | 2. download pretrained model from 53 | [mbv3-large0.75](https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_0.75_float.tgz) 54 | relese it in the current dir. 55 | 56 | 3. then, modify in config=mb3_config in train_config.py, then run: 57 | 58 | ```python train.py``` 59 | 60 | and if u want to check the data when training, u could set vis in confifs/mscoco/mbv3_config.py as True 61 | 62 | 4. After training, freeze the model as .pb by 63 | 64 | ` python tools/auto_freeze.py --pretrained_mobile ./model/yourmodel.ckpt` 65 | 66 | it will produce a detector.pb 67 | 68 | 69 | #### evaluation 70 | 71 | ``` 72 | python model_eval/custome_eval.py [--model [TRAINED_MODEL]] [--annFile [cocostyle annFile]] 73 | [--imgDir [the images dir]] [--is_show [show the result]] 74 | 75 | python model_eval/custome_eval.py --model model/detector.pb 76 | --annFile ../mscoco/annotations/instances_val2017.json 77 | --imgDir ../mscoco/val2017 78 | --is_show 1 79 | 80 | ps, no test time augmentation is used. 81 | ``` 82 | 83 | 84 | ### finetune 85 | 1. download the trained model, 86 | modify the config config.MODEL.pretrained_model='yourmodel.ckpt', 87 | and set config.MODEL.continue_train=True 88 | 2. `python train.py` 89 | 90 | 91 | ### visualization 92 | 93 | if u get a trained model and dont need to work on mobile device, run `python tools/auto_freeze.py`, it will read the checkpoint file in ./model, and produce detector.pb, then 94 | 95 | `python visualization/vis.py` 96 | 97 | u can check th code in visualization to make it runable, it's simple. 98 | 99 | 100 | ### model convert for mobile device 101 | I have carefully processed the postprocess, and it can works within the model, so it could be deployed end to end. 102 | 103 | 4.1 MNN 104 | 105 | + 4.1.1 convert model 106 | 107 | just use the MNN converter, for example: 108 | `./MNNConvert -f TF --modelFile detector.pb --MNNModel centernet.mnn --bizCode biz --fp16 1` 109 | 110 | + 4.1.2 visualization with mnn python wrapper 111 | 112 | `python visualization/vis_with_mnn.py --mnn_model centernet.mnn --imgDir 'your image dir'` 113 | 114 | 4.2 coreml 115 | 116 | + 4.2.1 convert 117 | 118 | `python tools/converter_to_coreml.py` 119 | 120 | + 4.2.2 visualization with coreml python wrapper 121 | 122 | `python visualization/vis_with_coreml.py --coreml_model centernet.mlmodel --imgDir 'your image dir'` 123 | 124 | ps, if you want to do quantization, please reffer to the official doc, it is easy. 125 | 126 | ### TODO: 127 | - [ ] Android project. 128 | -------------------------------------------------------------------------------- /configs/mscoco/resnet_config.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import numpy as np 5 | from easydict import EasyDict as edict 6 | 7 | config = edict() 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu 9 | config.TRAIN = edict() 10 | 11 | #### below are params for dataiter 12 | config.TRAIN.process_num = 4 ### process_num for data provider 13 | config.TRAIN.prefetch_size = 20 ### prefect Q size for data provider 14 | 15 | config.TRAIN.num_gpu = 1 ##match with os.environ["CUDA_VISIBLE_DEVICES"] 16 | config.TRAIN.batch_size = 16 ###A big batch size may achieve a better result, but the memory is a problem 17 | config.TRAIN.log_interval = 10 18 | config.TRAIN.epoch = 300 ###just keep training , evaluation shoule be take care by yourself, 19 | ### generally 10,0000 iters is enough 20 | 21 | config.TRAIN.train_set_size=117266 ###coco train size 22 | config.TRAIN.val_set_size=5000 ###coco val size 23 | 24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size 25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size 26 | 27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.00025,0.0001,0.00001,0.000001] ##warm up is used 28 | config.TRAIN.lr_decay_every_step = [500,1000,150000,200000,250000] 29 | 30 | config.TRAIN.opt='adam' 31 | config.TRAIN.weight_decay_factor = 5.e-5 ##l2 regular 32 | config.TRAIN.vis=False ##check data flag 33 | config.TRAIN.mix_precision=True 34 | 35 | config.TRAIN.norm='BN' ##'GN' OR 'BN' 36 | config.TRAIN.lock_basenet_bn=False 37 | config.TRAIN.frozen_stages=-1 ##no freeze 38 | 39 | config.DATA = edict() 40 | config.DATA.root_path='' 41 | config.DATA.train_txt_path='train.txt' 42 | config.DATA.val_txt_path='val.txt' 43 | config.DATA.num_category=80 ###face 1 voc 20 coco 80 44 | config.DATA.num_class = config.DATA.num_category # +1 background 45 | 46 | config.DATA.PIXEL_MEAN = [127.] ###rgb 47 | config.DATA.PIXEL_STD = [127.] 48 | 49 | config.DATA.hin = 416 # input size 50 | config.DATA.win = 416 51 | config.DATA.channel = 3 52 | config.DATA.max_size=[config.DATA.hin,config.DATA.win] ##h,w 53 | config.DATA.cover_small_face=0 ###cover the small faces 54 | 55 | config.DATA.mutiscale=False #if muti scale set False then config.DATA.MAX_SIZE will be the inputsize 56 | config.DATA.scales=(320,640) 57 | config.DATA.use_int8_data=True 58 | config.DATA.use_int8_enlarge=255. 59 | 60 | # anchors ------------------------- 61 | config.ANCHOR = edict() 62 | config.ANCHOR.rect=False 63 | config.ANCHOR.rect_longer=False #### make anchor h/w=1.5 64 | config.ANCHOR.ANCHOR_STRIDE = 16 65 | config.ANCHOR.ANCHOR_SIZES = (32, 64, 128, 256, 320) # sqrtarea of the anchor box 66 | config.ANCHOR.ANCHOR_STRIDES = (8, 16, 32, 64, 128) # strides for each FPN level. Must be the same length as ANCHOR_SIZES 67 | config.ANCHOR.ANCHOR_RATIOS = (0.25, 1., 4.) ###### squrae 68 | config.ANCHOR.ANCHOR_SCALES = (2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)) ###### 1:2 in size, 69 | config.ANCHOR.POSITIVE_ANCHOR_THRESH = 0.5 70 | config.ANCHOR.NEGATIVE_ANCHOR_THRESH = 0.4 71 | 72 | ##mobilenetv3 as basemodel 73 | config.MODEL = edict() 74 | config.MODEL.continue_train=False ### revover from a trained model 75 | config.MODEL.model_path = './model/' # save directory 76 | config.MODEL.net_structure='resnet_v2_50' ######'resnet_v1_50,resnet_v1_101,MobilenetV2 77 | config.MODEL.pretrained_model='resnet_v2_50.ckpt' 78 | config.MODEL.fpn_dims=[256,256,256,256,256] 79 | config.MODEL.face=False 80 | config.MODEL.min_overlap=0.7 81 | 82 | config.MODEL.focal_loss=True 83 | config.MODEL.fpn=True 84 | config.MODEL.max_negatives_per_positive= 3.0 85 | 86 | 87 | config.MODEL.deployee= False ### tensorflow, mnn, coreml 88 | if config.MODEL.deployee: 89 | config.TRAIN.batch_size = 1 90 | 91 | config.MODEL.iou_thres= 0.05 92 | config.MODEL.score_thres= 0.3 93 | config.MODEL.max_box= 1500 94 | -------------------------------------------------------------------------------- /lib/core/anchor/tf_anchors.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import sys 4 | sys.path.append('.') 5 | import tensorflow as tf 6 | import numpy as np 7 | from train_config import config as cfg 8 | 9 | from lib.core.anchor.anchor import CellAnchor 10 | 11 | 12 | def get_all_anchors(max_size,stride=None, sizes=None): 13 | """ 14 | Get all anchors in the largest possible image, shifted, floatbox 15 | Args: 16 | max_size(int) : h w 17 | stride (int): the stride of anchors. 18 | sizes (tuple[int]): the sizes (sqrt area) of anchors 19 | 20 | Returns: 21 | anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox 22 | The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. 23 | 24 | """ 25 | if stride is None: 26 | stride = cfg.ANCHOR.ANCHOR_STRIDE 27 | if sizes is None: 28 | sizes = cfg.ANCHOR.ANCHOR_SIZES 29 | # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 30 | # are centered on stride / 2, have (approximate) sqrt areas of the specified 31 | # sizes, and aspect ratios as given. 32 | cell_anchors = CellAnchor.generate_cell_anchor( 33 | stride, 34 | scales=np.array(sizes, dtype=np.float32) / stride, 35 | ratios=np.array(cfg.ANCHOR.ANCHOR_RATIOS, dtype=np.float32)) 36 | # anchors are intbox here. 37 | # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) 38 | 39 | 40 | field_size_y = tf.cast(tf.ceil(max_size[0] / stride), tf.float32) 41 | field_size_x = tf.cast(tf.ceil(max_size[1] / stride), tf.float32) 42 | shifts_x = tf.range(0, field_size_x) * stride 43 | shifts_y = tf.range(0, field_size_y) * stride 44 | shift_x, shift_y = tf.meshgrid(shifts_x, shifts_y) 45 | 46 | shift_x = tf.reshape(shift_x,shape=[1,-1]) 47 | shift_y = tf.reshape(shift_y,shape=[1,-1]) 48 | 49 | shifts = tf.transpose(tf.concat((shift_x, shift_y, shift_x, shift_y),axis=0)) 50 | # Kx4, K = field_size * field_size 51 | K = shifts.shape[0] 52 | A = cell_anchors.shape[0] 53 | 54 | field_of_anchors = ( 55 | tf.reshape(cell_anchors,shape=[1, A, 4]) + 56 | tf.transpose(tf.reshape(shifts,shape=[1, -1, 4]),(1, 0, 2))) 57 | 58 | field_of_anchors = tf.reshape(field_of_anchors,shape=(field_size_y, field_size_x, A, 4)) 59 | 60 | # FSxFSxAx4 61 | # Many rounding happens inside the anchor code anyway 62 | # assert np.all(field_of_anchors == field_of_anchors.astype('int32')) 63 | 64 | ##scale it to 0 - 1 65 | 66 | h=tf.cast(max_size[0],tf.float32) 67 | w=tf.cast(max_size[1],tf.float32) 68 | 69 | _xx0 = (field_of_anchors[:, :, :, 0:1])/w 70 | _xx1 = (field_of_anchors[:, :, :, 1:2])/h 71 | _xx2 = (field_of_anchors[:, :, :, 2:3]+1)/w 72 | _xx3 = (field_of_anchors[:, :, :, 3:4]+1)/h 73 | field_of_anchors=tf.concat([_xx0,_xx1,_xx2,_xx3],axis=3) 74 | 75 | return field_of_anchors 76 | 77 | def get_all_anchors_fpn(strides=None, sizes=None,scales=None,max_size=[640,640]): 78 | """ 79 | Returns: 80 | [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array. 81 | """ 82 | if strides is None: 83 | strides = cfg.ANCHOR.ANCHOR_STRIDES 84 | if sizes is None: 85 | sizes = cfg.ANCHOR.ANCHOR_SIZES 86 | if scales is None: 87 | scales = cfg.ANCHOR.ANCHOR_SCALES 88 | if max_size is None: 89 | max_size= [cfg.DATA.max_size,cfg.DATA.max_size] 90 | 91 | assert len(strides) == len(sizes) 92 | foas = [] 93 | for stride, size in zip(strides, sizes): 94 | sizes_ = size * np.array(scales) 95 | foa = get_all_anchors(stride=stride, sizes=sizes_,max_size=max_size) 96 | 97 | foas.append(foa) 98 | 99 | flatten_anchors_per_level = [tf.reshape(k,shape=(-1, 4)) for k in foas] 100 | anchors = tf.concat(flatten_anchors_per_level, axis=0) 101 | 102 | ###concat them 103 | return anchors 104 | 105 | 106 | if __name__=='__main__': 107 | import cv2 108 | anchors=get_all_anchors_fpn(max_size=[640,640]) 109 | 110 | init = tf.global_variables_initializer() 111 | with tf.Session() as sess: 112 | sess.run(init) 113 | anchors=sess.run(anchors) 114 | 115 | anchors=np.array(anchors) 116 | print(anchors.shape) 117 | image = np.ones(shape=[cfg.DATA.max_size, cfg.DATA.max_size, 3]) * 255 118 | for i in range(0,anchors.shape[0]): 119 | box=anchors[i] 120 | print(int(round((box[2]-box[0])*cfg.DATA.max_size))) 121 | cv2.rectangle(image, (int(round(box[0]*cfg.DATA.max_size)), int(round(box[1]*cfg.DATA.max_size))), 122 | (int(round(box[2]*cfg.DATA.max_size)), int(round(box[3]*cfg.DATA.max_size))), (255, 0, 0), 1) 123 | 124 | cv2.namedWindow('anchors',0) 125 | cv2.imshow('anchors',image) 126 | cv2.waitKey(0) -------------------------------------------------------------------------------- /lib/core/model/head/centernet_head.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | 3 | 4 | import tensorflow as tf 5 | import tensorflow.contrib.slim as slim 6 | from lib.core.model.net.arg_scope.resnet_args_cope import resnet_arg_scope 7 | from train_config import config as cfg 8 | 9 | from lib.core.model.sqeeze_excitation.se import se 10 | 11 | class CenternetHead(): 12 | 13 | def __call__(self, fms, training=True): 14 | arg_scope = resnet_arg_scope( bn_is_training=training, ) 15 | with slim.arg_scope(arg_scope): 16 | with tf.variable_scope('CenternetHead'): 17 | # c2, c3, c4, c5 = fms 18 | # deconv_feature=c5 19 | 20 | deconv_feature = self._unet_magic(fms) 21 | 22 | ##### 23 | 24 | kps = slim.separable_conv2d(deconv_feature, 25 | cfg.DATA.num_class, 26 | [3, 3], 27 | stride=1, 28 | activation_fn=None, 29 | normalizer_fn=None, 30 | weights_initializer=tf.initializers.random_normal(stddev=0.001), 31 | biases_initializer=tf.initializers.constant(-2.19), 32 | scope='centernet_cls_output') 33 | 34 | 35 | wh = slim.separable_conv2d(deconv_feature, 36 | 4, 37 | [3, 3], 38 | stride=1, 39 | activation_fn=None, 40 | normalizer_fn=None, 41 | weights_initializer=tf.initializers.random_normal(stddev=0.001), 42 | biases_initializer=tf.initializers.constant(0), 43 | scope='centernet_wh_output') 44 | 45 | return kps, wh*16 46 | 47 | def _complex_upsample(self,fm,output_dim, factor=2,scope='upsample'): 48 | with tf.variable_scope(scope): 49 | 50 | 51 | x = slim.separable_conv2d(fm, 52 | output_dim, 53 | [3, 3], 54 | activation_fn=None, 55 | padding='SAME', 56 | scope='branch_x_upsample_resize') 57 | y = slim.separable_conv2d(fm, 58 | output_dim, 59 | [5, 5], 60 | activation_fn=None, 61 | padding='SAME', 62 | scope='branch_y_upsample_resize') 63 | final = x+y 64 | final = tf.keras.layers.UpSampling2D(data_format='channels_last', interpolation='bilinear', 65 | size=(factor, factor))(final) 66 | 67 | return final 68 | 69 | def revers_conv(self,fm,output_dim,k_size,refraction=4,scope='boring'): 70 | 71 | input_channel = fm.shape[3].value 72 | 73 | mid_channels=input_channel//refraction 74 | with tf.variable_scope(scope): 75 | fm_bypass = slim.conv2d(fm, 76 | mid_channels, 77 | [1, 1], 78 | padding='SAME', 79 | scope='1x1') 80 | 81 | fm_bypass = slim.separable_conv2d(fm_bypass, 82 | output_dim, 83 | [k_size, k_size], 84 | activation_fn=None, 85 | padding='SAME', 86 | scope='3x3') 87 | 88 | 89 | return fm_bypass 90 | 91 | def _unet_magic(self, fms, dims=cfg.MODEL.head_dims): 92 | 93 | c2, c3, c4, c5 = fms 94 | 95 | ####24, 116, 232, 464, 96 | 97 | c5_upsample = self._complex_upsample(c5, output_dim= dims[0]//2,factor=2, scope='c5_upsample') 98 | c4 = self.revers_conv(c4, dims[0]//2, k_size=5, scope='c4_reverse') 99 | p4=tf.nn.relu(tf.concat([c4,c5_upsample],axis=3)) 100 | 101 | c4_upsample = self._complex_upsample(p4, output_dim= dims[1]//2, factor=2,scope='c4_upsample') 102 | c3 = self.revers_conv(c3, dims[1]//2, k_size=5, scope='c3_reverse') 103 | p3=tf.nn.relu(tf.concat([c3,c4_upsample],axis=3)) 104 | 105 | c3_upsample = self._complex_upsample(p3, output_dim= dims[2]//2,factor=2, scope='c3_upsample') 106 | c2 = self.revers_conv(c2, dims[2]//2,k_size=5,scope='c2_reverse') 107 | p2=tf.nn.relu(tf.concat([c2,c3_upsample],axis=3)) 108 | 109 | final = se(p2, dims[2]) 110 | 111 | return final 112 | 113 | -------------------------------------------------------------------------------- /lib/dataset/augmentor/test.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import cv2 4 | import numpy as np 5 | import random 6 | import augmentor 7 | 8 | 9 | 10 | 11 | ####CAUTION the data is from pytorch tutorial , 12 | ###download from url=https://download.pytorch.org/tutorial/faces.zip 13 | ##### and i find some of them are not labeled very well 14 | 15 | csv_file='faces/face_landmarks.csv' 16 | 17 | ###parse the scv 18 | label_file=csv.reader(open(csv_file,'r')) 19 | 20 | 21 | for _,single_sample in enumerate(label_file): 22 | if _==0: 23 | ##drop the header in csvfile 24 | continue 25 | 26 | image_path=os.path.join('faces',single_sample[0]) 27 | label=np.array(single_sample[1:]).reshape([-1,2]).astype(np.int) 28 | img=cv2.imread(image_path) 29 | for _index in range(label.shape[0]): 30 | x_y=label[_index] 31 | cv2.circle(img,center=(x_y[0],x_y[1]),color=(122,122,122),radius=2,thickness=2) 32 | 33 | cv2.imshow('raw',img) 34 | 35 | ##first make it rotate with label 36 | img = cv2.imread(image_path) 37 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 38 | angle=random.uniform(-180,180) 39 | img,aug_label=augmentor.Rotate_aug(img,label=label,angle=angle) 40 | for _index in range(aug_label.shape[0]): 41 | x_y=aug_label[_index] 42 | cv2.circle(img,center=(x_y[0],x_y[1]),color=(122,122,122),radius=2,thickness=2) 43 | cv2.imshow('rotate with label',img) 44 | 45 | ##first make it rotate without label 46 | img = cv2.imread(image_path) 47 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 48 | angle = random.uniform(-180, 180) 49 | img, _ = augmentor.Rotate_aug(img, angle=angle) 50 | cv2.imshow('rotate without label', img) 51 | 52 | ##first make it Affine_aug with label 53 | img = cv2.imread(image_path) 54 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 55 | strength=random.uniform(0,100) 56 | img, aug_label = augmentor.Affine_aug(img,strength=strength,label=label) 57 | for _index in range(aug_label.shape[0]): 58 | x_y = aug_label[_index] 59 | cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2) 60 | cv2.imshow('Affine transform with label', img) 61 | 62 | 63 | 64 | ###padding with a target shape 65 | img = cv2.imread(image_path) 66 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 67 | img,aug_label = augmentor.Fill_img(img,target_height=480,target_width=640,label=label) 68 | for _index in range(aug_label.shape[0]): 69 | x_y = aug_label[_index] 70 | cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2) 71 | cv2.imshow('padding transform with label', img) 72 | 73 | ##blur 74 | img = cv2.imread(image_path) 75 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 76 | strength = random.uniform(0, 60) 77 | img = augmentor.Blur_aug(img, ksize=(7,7)) 78 | for _index in range(label.shape[0]): 79 | x_y = label[_index] 80 | cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2) 81 | cv2.imshow('blur transform with label', img) 82 | 83 | ##img dropout 84 | img = cv2.imread(image_path) 85 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 86 | strength = random.uniform(0, 60) 87 | img = augmentor.Img_dropout(img, max_pattern_ratio=0.4) 88 | for _index in range(label.shape[0]): 89 | x_y = label[_index] 90 | cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2) 91 | cv2.imshow('img_dropout transform with label', img) 92 | 93 | ##mirror 94 | 95 | img = cv2.imread(image_path) 96 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int) 97 | strength = random.uniform(0, 60) 98 | ####need symmetry to swap from left and right, the symmetry need change for u data 99 | symmetry=[(0, 16), (1, 15), (2, 14), (3, 13), (4, 12), (5, 11), (6, 10), (7, 9),(8,8), 100 | (17,26),(18,25),(19,24),(20,23),(21,22), 101 | (31,35),(32,34), 102 | (36,45),(37,44),(38,43),(39,42),(40,47),(41,46), 103 | (48,54),(49,53),(50,52),(55,59),(56,58),(60,64),(61,63),(65,67)] 104 | img,aug_label = augmentor.Mirror(img, label=label,symmetry=symmetry) 105 | for _index in range(aug_label.shape[0]): 106 | x_y = aug_label[_index] 107 | cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2) 108 | cv2.imshow('flip transform with label', img) 109 | 110 | ###heatmaps 111 | 112 | label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int).T 113 | heat_map_size=img.shape[0:2] 114 | heat=augmentor.produce_heat_maps(label,heat_map_size,1,1) 115 | augmentor.visualize_heatmap_target(heat)##visualise 116 | 117 | 118 | 119 | cv2.waitKey(0) 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /model_eval/fddb.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | import sys 3 | sys.path.append('.') 4 | import numpy as np 5 | import os 6 | import cv2 7 | from tqdm import tqdm 8 | import argparse 9 | 10 | from lib.core.api.face_detector import FaceDetector 11 | 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 13 | 14 | ap = argparse.ArgumentParser() 15 | ap.add_argument( "--model", required=False, default='./model/detector.pb', help="model to eval:") 16 | ap.add_argument( "--is_show", required=False, default=False, help="show result or not?") 17 | ap.add_argument( "--data_dir", required=False, default="./FDDB/img", help="dir to img") 18 | ap.add_argument( "--split_dir", required=False,default='./FDDB/FDDB-folds',help="dir to FDDB-folds") 19 | ap.add_argument( "--result", required=False,default='./result',help="dir to write result") 20 | args = ap.parse_args() 21 | 22 | 23 | IMAGES_DIR = args.data_dir 24 | ANNOTATIONS_PATH = args.split_dir 25 | RESULT_DIR = args.result 26 | MODEL_PATH = args.model 27 | 28 | face_detector = FaceDetector([MODEL_PATH]) 29 | 30 | 31 | annotations = [s for s in os.listdir(ANNOTATIONS_PATH) if s.endswith('ellipseList.txt')] 32 | image_lists = [s for s in os.listdir(ANNOTATIONS_PATH) if not s.endswith('ellipseList.txt')] 33 | annotations = sorted(annotations) 34 | image_lists = sorted(image_lists) 35 | 36 | images_to_use = [] 37 | for n in image_lists: 38 | with open(os.path.join(ANNOTATIONS_PATH, n)) as f: 39 | images_to_use.extend(f.readlines()) 40 | 41 | images_to_use = [s.strip() for s in images_to_use] 42 | with open(os.path.join(RESULT_DIR, 'faceList.txt'), 'w') as f: 43 | for p in images_to_use: 44 | f.write(p + '\n') 45 | 46 | 47 | ellipses = [] 48 | for n in annotations: 49 | with open(os.path.join(ANNOTATIONS_PATH, n)) as f: 50 | ellipses.extend(f.readlines()) 51 | 52 | i = 0 53 | with open(os.path.join(RESULT_DIR, 'ellipseList.txt'), 'w') as f: 54 | for p in ellipses: 55 | 56 | # check image order 57 | if 'big/img' in p: 58 | assert images_to_use[i] in p 59 | i += 1 60 | 61 | f.write(p) 62 | 63 | def bbox_vote(det): 64 | order = det[:, 4].ravel().argsort()[::-1] 65 | det = det[order, :] 66 | while det.shape[0] > 0: 67 | # IOU 68 | area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) 69 | xx1 = np.maximum(det[0, 0], det[:, 0]) 70 | yy1 = np.maximum(det[0, 1], det[:, 1]) 71 | xx2 = np.minimum(det[0, 2], det[:, 2]) 72 | yy2 = np.minimum(det[0, 3], det[:, 3]) 73 | w = np.maximum(0.0, xx2 - xx1 + 1) 74 | h = np.maximum(0.0, yy2 - yy1 + 1) 75 | inter = w * h 76 | o = inter / (area[0] + area[:] - inter) 77 | 78 | # get needed merge det and delete these det 79 | merge_index = np.where(o >= 0.3)[0] 80 | det_accu = det[merge_index, :] 81 | det = np.delete(det, merge_index, 0) 82 | 83 | if merge_index.shape[0] <= 1: 84 | continue 85 | det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) 86 | max_score = np.max(det_accu[:, 4]) 87 | det_accu_sum = np.zeros((1, 5)) 88 | det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) 89 | det_accu_sum[:, 4] = max_score 90 | try: 91 | dets = np.row_stack((dets, det_accu_sum)) 92 | except: 93 | dets = det_accu_sum 94 | try: 95 | dets = dets[0:750, :] 96 | except: 97 | dets=[] 98 | return dets 99 | 100 | predictions = [] 101 | for n in tqdm(images_to_use): 102 | image_array = cv2.imread(os.path.join(IMAGES_DIR, n) + '.jpg') 103 | image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB) 104 | # threshold is important to set low 105 | 106 | 107 | boxes = face_detector(image_array, score_threshold=0.05) 108 | 109 | boxes=boxes[:,0:5] 110 | ##flip det 111 | flip_img=np.flip(image_array,1) 112 | 113 | boxes_flip_ = face_detector(flip_img, score_threshold=0.05) 114 | boxes_flip_ = boxes_flip_[:, 0:5] 115 | 116 | boxes_flip = np.zeros(boxes_flip_.shape) 117 | boxes_flip[:, 0] = flip_img.shape[1] - boxes_flip_[:, 2] 118 | boxes_flip[:, 1] = boxes_flip_[:, 1] 119 | boxes_flip[:, 2] = flip_img.shape[1] - boxes_flip_[:, 0] 120 | boxes_flip[:, 3] = boxes_flip_[:, 3] 121 | boxes_flip[:, 4] = boxes_flip_[:, 4] 122 | 123 | ##### 124 | det = np.row_stack((boxes, boxes_flip)) 125 | 126 | dets = bbox_vote(det) 127 | 128 | if args.is_show: 129 | for bbox in dets: 130 | if bbox[4] > 0.3: 131 | # cv2.circle(img_show,(p[0],p[1]),3,(0,0,213),-1) 132 | cv2.rectangle(image_array, (int(bbox[0]), int(bbox[1])), 133 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 7) 134 | cv2.imshow('tmp', image_array) 135 | cv2.waitKey(0) 136 | 137 | 138 | ### 139 | 140 | 141 | predictions.append((n, dets[:,0:4], dets[:,4])) 142 | 143 | 144 | with open(os.path.join(RESULT_DIR, 'detections.txt'), 'w') as f: 145 | for n, boxes, scores in predictions: 146 | f.write(n + '\n') 147 | f.write(str(len(boxes)) + '\n') 148 | for b, s in zip(boxes, scores): 149 | xmin, ymin, xmax, ymax = b 150 | h, w = int(ymax - ymin+1), int(xmax - xmin+1) 151 | f.write('{0} {1} {2} {3} {4:.4f}\n'.format(int(xmin), int(ymin), w, h, s)) 152 | 153 | 154 | -------------------------------------------------------------------------------- /lib/dataset/augmentor/visual_augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | 5 | def pixel_jitter(src,p=0.5,max_=5.): 6 | 7 | src=src.astype(np.float32) 8 | if random.uniform(0, 1) < p: 9 | pattern=(np.random.rand(src.shape[0], src.shape[1],src.shape[2])-0.5)*2*max_ 10 | img = src + pattern 11 | 12 | img[img<0]=0 13 | img[img >255] = 255 14 | 15 | img = img.astype(np.uint8) 16 | 17 | return img 18 | else: 19 | src = src.astype(np.uint8) 20 | return src 21 | 22 | def gray(src): 23 | g_img=cv2.cvtColor(src,cv2.COLOR_RGB2GRAY) 24 | src[:,:,0]=g_img 25 | src[:,:,1]=g_img 26 | src[:,:,2]=g_img 27 | return src 28 | 29 | def swap_change(src): 30 | a = [0,1,2] 31 | 32 | k = random.sample(a, 3) 33 | 34 | res=src.copy() 35 | res[:,:,0]=src[:,:,k[0]] 36 | res[:, :, 1] = src[:, :, k[1]] 37 | res[:, :, 2] = src[:, :, k[2]] 38 | return res 39 | 40 | 41 | def Img_dropout(src,max_pattern_ratio=0.05): 42 | pattern=np.ones_like(src) 43 | width_ratio = random.uniform(0, max_pattern_ratio) 44 | height_ratio = random.uniform(0, max_pattern_ratio) 45 | width=src.shape[1] 46 | height=src.shape[0] 47 | block_width=width*width_ratio 48 | block_height=height*height_ratio 49 | width_start=int(random.uniform(0,width-block_width)) 50 | width_end=int(width_start+block_width) 51 | height_start=int(random.uniform(0,height-block_height)) 52 | height_end=int(height_start+block_height) 53 | pattern[height_start:height_end,width_start:width_end,:]=0 54 | img=src*pattern 55 | return img 56 | 57 | 58 | 59 | def blur_heatmap(src, ksize=(3, 3)): 60 | for i in range(src.shape[2]): 61 | src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 0) 62 | amin, amax = src[:, :, i].min(), src[:, :, i].max() # 求最大最小值 63 | if amax>0: 64 | src[:, :, i] = (src[:, :, i] - amin) / (amax - amin) # (矩阵元素-最小值)/(最大值-最小值) 65 | return src 66 | def blur(src,ksize=(3,3)): 67 | for i in range(src.shape[2]): 68 | src[:, :, i]=cv2.GaussianBlur(src[:, :, i],ksize,1.5) 69 | return src 70 | 71 | 72 | 73 | 74 | def adjust_contrast(image, factor): 75 | """ Adjust contrast of an image. 76 | 77 | Args 78 | image: Image to adjust. 79 | factor: A factor for adjusting contrast. 80 | """ 81 | mean = image.mean(axis=0).mean(axis=0) 82 | return _clip((image - mean) * factor + mean) 83 | 84 | 85 | def adjust_brightness(image, delta): 86 | """ Adjust brightness of an image 87 | 88 | Args 89 | image: Image to adjust. 90 | delta: Brightness offset between -1 and 1 added to the pixel values. 91 | """ 92 | return _clip(image + delta * 255) 93 | 94 | 95 | def adjust_hue(image, delta): 96 | """ Adjust hue of an image. 97 | 98 | Args 99 | image: Image to adjust. 100 | delta: An interval between -1 and 1 for the amount added to the hue channel. 101 | The values are rotated if they exceed 180. 102 | """ 103 | image[..., 0] = np.mod(image[..., 0] + delta * 180, 180) 104 | return image 105 | 106 | 107 | def adjust_saturation(image, factor): 108 | """ Adjust saturation of an image. 109 | 110 | Args 111 | image: Image to adjust. 112 | factor: An interval for the factor multiplying the saturation values of each pixel. 113 | """ 114 | image[..., 1] = np.clip(image[..., 1] * factor, 0, 255) 115 | return image 116 | 117 | 118 | def _clip(image): 119 | """ 120 | Clip and convert an image to np.uint8. 121 | 122 | Args 123 | image: Image to clip. 124 | """ 125 | return np.clip(image, 0, 255).astype(np.uint8) 126 | def _uniform(val_range): 127 | """ Uniformly sample from the given range. 128 | 129 | Args 130 | val_range: A pair of lower and upper bound. 131 | """ 132 | return np.random.uniform(val_range[0], val_range[1]) 133 | 134 | 135 | class ColorDistort(): 136 | 137 | def __init__( 138 | self, 139 | contrast_range=(0.8, 1.2), 140 | brightness_range=(-.2, .2), 141 | hue_range=(-0.1, 0.1), 142 | saturation_range=(0.8, 1.2) 143 | ): 144 | self.contrast_range = contrast_range 145 | self.brightness_range = brightness_range 146 | self.hue_range = hue_range 147 | self.saturation_range = saturation_range 148 | 149 | def __call__(self, image): 150 | 151 | 152 | if self.contrast_range is not None: 153 | contrast_factor = _uniform(self.contrast_range) 154 | image = adjust_contrast(image,contrast_factor) 155 | if self.brightness_range is not None: 156 | brightness_delta = _uniform(self.brightness_range) 157 | image = adjust_brightness(image, brightness_delta) 158 | 159 | if self.hue_range is not None or self.saturation_range is not None: 160 | 161 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 162 | 163 | if self.hue_range is not None: 164 | hue_delta = _uniform(self.hue_range) 165 | image = adjust_hue(image, hue_delta) 166 | 167 | if self.saturation_range is not None: 168 | saturation_factor = _uniform(self.saturation_range) 169 | image = adjust_saturation(image, saturation_factor) 170 | 171 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 172 | 173 | return image 174 | 175 | 176 | 177 | 178 | class DsfdVisualAug(): 179 | pass -------------------------------------------------------------------------------- /model_eval/custome_eval.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | sys.path.append('.') 5 | 6 | import cv2 7 | import numpy as np 8 | import json 9 | import os 10 | import argparse 11 | from tqdm import tqdm 12 | from pycocotools.coco import COCO 13 | from pycocotools.cocoeval import COCOeval 14 | 15 | from train_config import config as cfg 16 | from lib.core.api.face_detector import FaceDetector 17 | 18 | 19 | 20 | ap = argparse.ArgumentParser() 21 | ap.add_argument("--model", required=True, default='./model/detector.pb', help="model to eval:") 22 | ap.add_argument("--annFile", required=True, default='./model_eval/DatasetTest_cocoStyle.json', help="coco style json") 23 | ap.add_argument("--imgDir", required=True, default='', help="coco style json") 24 | ap.add_argument("--is_show", required=False, default=0,type=int, help="show result or not?") 25 | args = ap.parse_args() 26 | 27 | MODEL_PATH = args.model 28 | IMAGE_DIR = args.imgDir 29 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 30 | detector = FaceDetector(['./model/detector.pb']) 31 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'), 32 | 6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'), 33 | 11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'), 34 | 16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'), 35 | 21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'), 36 | 26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'), 37 | 31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'), 38 | 35: (40, 'baseball glove'), 39 | 36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'), 40 | 40: (46, 'wine glass'), 41 | 41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'), 42 | 46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'), 43 | 51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'), 44 | 56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'), 45 | 61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'), 46 | 66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'), 47 | 71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'), 48 | 76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')} 49 | 50 | 51 | def predict_box(): 52 | annFile = args.annFile 53 | cocoGt = COCO(annFile) 54 | imgIds = sorted(cocoGt.getImgIds()) 55 | 56 | res_coco = [] 57 | 58 | for img_id in tqdm(imgIds): 59 | 60 | fname=os.path.join(IMAGE_DIR,cocoGt.loadImgs(img_id)[0]['file_name']) 61 | 62 | image = cv2.imread(fname) 63 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 64 | h, w, _ = image.shape 65 | image_show = image.copy() 66 | 67 | if args.is_show: 68 | detect_res = detector(image, 0.3, input_shape=(cfg.DATA.hin, cfg.DATA.win),max_boxes=1500) 69 | else: 70 | detect_res =detector(image,0.05,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1500) 71 | 72 | if args.is_show: 73 | for i in range(detect_res.shape[0]): 74 | one_box = detect_res[i] 75 | str_draw = '%s:%.2f' %(coco_map[int(one_box[5])][1],one_box[4]) 76 | 77 | cv2.rectangle(image_show, (int(one_box[0]), int(one_box[1])), (int(one_box[2]), int(one_box[3])), 78 | (0, 255, 0), 2) 79 | cv2.putText(image_show, str_draw, (int(one_box[0]), int(one_box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, 80 | (255, 0, 255), 2) 81 | cv2.namedWindow('ss',0) 82 | cv2.imshow('ss', image_show) 83 | cv2.waitKey(0) 84 | 85 | for i in range(detect_res.shape[0]): 86 | one_box = detect_res[i] 87 | one_box=[float(x) for x in one_box] 88 | box = [one_box[0], one_box[1], one_box[2] - one_box[0], one_box[3] - one_box[1]] 89 | 90 | res_coco.append({ 91 | 'bbox': box, 92 | 'category_id': coco_map[int(one_box[5])][0], 93 | 'image_id': img_id, 94 | 'score': one_box[4] 95 | }) 96 | 97 | with open('bbox_result.json', 'w') as f_dump: 98 | json.dump(res_coco, f_dump, indent=2) 99 | 100 | 101 | def eval_box(): 102 | 103 | import pylab 104 | pylab.rcParams['figure.figsize'] = (10.0, 8.0) 105 | annType = ['segm', 'bbox', 'keypoints'] 106 | annType = annType[1] # specify type here 107 | print('Running for *%s* results.' % (annType)) 108 | # initialize COCO ground truth api 109 | annFile = args.annFile 110 | cocoGt = COCO(annFile) 111 | catIds = cocoGt.getCatIds() 112 | print(catIds) 113 | imgIds = sorted(cocoGt.getImgIds(catIds=catIds)) 114 | # initialize COCO detections api 115 | resFile = './bbox_result.json' 116 | cocoDt = cocoGt.loadRes(resFile) 117 | # running evaluation 118 | cocoEval = COCOeval(cocoGt, cocoDt, annType) 119 | # cocoEval.params.imgIds = imgIds 120 | #catIds=5 121 | cocoEval.params.catIds = catIds 122 | cocoEval.evaluate() 123 | cocoEval.accumulate() 124 | cocoEval.summarize() 125 | 126 | 127 | if __name__ == '__main__': 128 | predict_box() 129 | eval_box() 130 | 131 | 132 | -------------------------------------------------------------------------------- /lib/core/model/centernet.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | import tensorflow as tf 3 | import numpy as np 4 | import tensorflow.contrib.slim as slim 5 | from lib.core.anchor.box_utils import batch_decode,batch_decode_fix 6 | 7 | from lib.core.model.net.shufflenet.shufflenetv2_5x5 import ShuffleNetV2_5x5 8 | from lib.core.model.net.shufflenet.shufflenetv2plus import ShufflenetV2Plus 9 | from lib.core.model.net.mobilenetv3.backbone import mobilenetv3_large_detection 10 | from lib.core.model.net.mobilenet.backbone import mobilenet_ssd 11 | from lib.core.model.net.resnet.backbone import resnet_ssd 12 | from lib.core.model.loss.centernet_loss import loss 13 | 14 | from train_config import config as cfg 15 | 16 | from lib.helper.logger import logger 17 | 18 | from lib.core.model.head.centernet_head import CenternetHead 19 | 20 | class Centernet(): 21 | 22 | def __init__(self,): 23 | if "ShuffleNetV2_Plus" in cfg.MODEL.net_structure: 24 | self.backbone=ShufflenetV2Plus ### it is a func 25 | elif "ShuffleNetV2_5x5" in cfg.MODEL.net_structure: 26 | self.backbone=ShuffleNetV2_5x5 27 | elif "MobilenetV2" in cfg.MODEL.net_structure: 28 | self.backbone = mobilenet_ssd 29 | elif "MobilenetV3" in cfg.MODEL.net_structure: 30 | self.backbone = mobilenetv3_large_detection 31 | elif "resnet_v2_50" in cfg.MODEL.net_structure: 32 | self.backbone = resnet_ssd 33 | self.head=CenternetHead() ### it is a class 34 | 35 | self.top_k_results_output=cfg.MODEL.max_box 36 | 37 | def forward(self,inputs,hm_target, wh_target,weights_,training_flag): 38 | 39 | ## process the label 40 | if cfg.DATA.use_int8_data: 41 | inputs,hm_target,wh_target,weights_=self.process_label(inputs,hm_target,wh_target,weights_) 42 | 43 | ###preprocess 44 | #inputs=self.preprocess(inputs) 45 | 46 | ### extract feature maps 47 | origin_fms=self.backbone(inputs,training_flag) 48 | 49 | kps_predicts,wh_predicts = self.head(origin_fms, training_flag) 50 | kps_predicts= tf.nn.sigmoid(kps_predicts) 51 | ### calculate loss 52 | hm_loss,wh_loss = loss(predicts=[kps_predicts,wh_predicts] ,targets=[hm_target,wh_target,weights_]) 53 | 54 | kps_predicts = tf.identity(kps_predicts, name='keypoints') 55 | wh_predicts = tf.identity(wh_predicts, name='wh') 56 | 57 | self.postprocess(kps_predicts,wh_predicts,self.top_k_results_output) 58 | 59 | return hm_loss,wh_loss 60 | 61 | def preprocess(self,image): 62 | with tf.name_scope('image_preprocess'): 63 | if image.dtype.base_dtype != tf.float32: 64 | image = tf.cast(image, tf.float32) 65 | 66 | image=image/255. 67 | return image 68 | def process_label(self,inputs,cls_hm,wh_target,weights_): 69 | 70 | inputs= tf.cast(inputs, tf.float32) 71 | 72 | cls_hm = tf.cast(cls_hm, tf.float32)/cfg.DATA.use_int8_enlarge 73 | 74 | return inputs,cls_hm,wh_target,weights_ 75 | 76 | 77 | def postprocess(self, keypoints,wh,max_size): 78 | """Postprocess outputs of the network. 79 | 80 | Returns: 81 | boxes: a float tensor with shape [batch_size, N, 4]. 82 | scores: a float tensor with shape [batch_size, N]. 83 | num_boxes: an int tensor with shape [batch_size], it 84 | represents the number of detections on an image. 85 | 86 | where N = max_boxes. 87 | """ 88 | 89 | def nms(heat, kernel=3): 90 | hmax = tf.layers.max_pooling2d(heat, kernel, 1, padding='same') 91 | keep = tf.cast(tf.equal(heat, hmax), tf.float32) 92 | return heat * keep 93 | 94 | def topk(hm, K=100): 95 | batch, height, width, cat = tf.shape(hm)[0], tf.shape(hm)[1], tf.shape(hm)[2], tf.shape(hm)[3] 96 | # [b,h*w*c] 97 | scores = tf.reshape(hm, (batch, -1)) 98 | # [b,k] 99 | topk_scores, topk_inds = tf.nn.top_k(scores, k=K) 100 | # [b,k] 101 | topk_clses = topk_inds % cat 102 | topk_xs = topk_inds // cat % width 103 | topk_ys = topk_inds // cat // width 104 | topk_inds = topk_ys * width + topk_xs 105 | 106 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 107 | 108 | def decode(heat, wh, K=100): 109 | batch, height, width, cat = tf.shape(heat)[0], tf.shape(heat)[1], tf.shape(heat)[2], tf.shape(heat)[3] 110 | heat = nms(heat) 111 | scores, inds, clses, ys, xs = topk(heat, K=K) 112 | 113 | 114 | xs = tf.cast(tf.expand_dims(xs, axis=-1),tf.float32) 115 | ys = tf.cast(tf.expand_dims(ys, axis=-1),tf.float32) 116 | 117 | # [b,h*w,2] 118 | wh = tf.reshape(wh, (batch, -1, tf.shape(wh)[-1])) 119 | # [b,k,2] 120 | wh = tf.batch_gather(wh, inds) 121 | 122 | clses = tf.cast(tf.expand_dims(clses, axis=-1), tf.float32) 123 | scores = tf.expand_dims(scores, axis=-1) 124 | 125 | xmin = xs*cfg.MODEL.global_stride - wh[:,:, 0:1] 126 | ymin = ys*cfg.MODEL.global_stride - wh[:,:, 1:2] 127 | xmax = xs*cfg.MODEL.global_stride + wh[:,:, 2:3] 128 | ymax = ys*cfg.MODEL.global_stride + wh[:,:, 3:4] 129 | 130 | bboxes = tf.concat([xmin, ymin, xmax, ymax], axis=-1) 131 | 132 | 133 | # [b,k,6] 134 | detections = tf.concat([bboxes, scores, clses], axis=-1) 135 | detections = tf.identity(detections, name='detections') 136 | 137 | # bboxes = tf.identity(bboxes, name='boxes') 138 | # scores = tf.identity(scores, name='scores') 139 | # labels = tf.identity(clses, name='labels') ## no use 140 | return detections 141 | 142 | 143 | decode(keypoints,wh,max_size) 144 | 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /prepare_wider_data.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from __future__ import print_function 6 | 7 | 8 | import os 9 | 10 | 11 | 12 | WIDER_ROOT = './WIDER' 13 | train_list_file = os.path.join(WIDER_ROOT, 'wider_face_split', 14 | 'wider_face_train_bbx_gt.txt') 15 | val_list_file = os.path.join(WIDER_ROOT, 'wider_face_split', 16 | 'wider_face_val_bbx_gt.txt') 17 | 18 | WIDER_TRAIN = os.path.join(WIDER_ROOT, 'WIDER_train', 'images') 19 | WIDER_VAL = os.path.join(WIDER_ROOT, 'WIDER_val', 'images') 20 | 21 | 22 | def parse_wider_file(root, file): 23 | with open(file, 'r') as fr: 24 | lines = fr.readlines() 25 | face_count = [] 26 | img_paths = [] 27 | face_loc = [] 28 | img_faces = [] 29 | count = 0 30 | flag = False 31 | for k, line in enumerate(lines): 32 | line = line.strip().strip('\n') 33 | if count > 0: 34 | line = line.split(' ') 35 | count -= 1 36 | loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])] 37 | face_loc += [loc] 38 | if flag: 39 | face_count += [int(line)] 40 | flag = False 41 | count = int(line) 42 | if 'jpg' in line: 43 | img_paths += [os.path.join(root, line)] 44 | flag = True 45 | 46 | total_face = 0 47 | for k in face_count: 48 | face_ = [] 49 | for x in range(total_face, total_face + k): 50 | face_.append(face_loc[x]) 51 | img_faces += [face_] 52 | total_face += k 53 | return img_paths, img_faces 54 | 55 | 56 | def wider_data_file(): 57 | img_paths, bbox = parse_wider_file(WIDER_TRAIN, train_list_file) 58 | fw = open('train.txt', 'w') 59 | for index in range(len(img_paths)): 60 | tmp_str = '' 61 | tmp_str =tmp_str+ img_paths[index]+'|' 62 | boxes = bbox[index] 63 | 64 | for box in boxes: 65 | data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2], box[1]+box[3]) 66 | tmp_str=tmp_str+data 67 | if len(boxes) == 0: 68 | print(tmp_str) 69 | continue 70 | ####err box? 71 | if box[2] <= 0 or box[3] <= 0: 72 | pass 73 | else: 74 | fw.write(tmp_str + '\n') 75 | fw.close() 76 | 77 | img_paths, bbox = parse_wider_file(WIDER_VAL, val_list_file) 78 | fw = open('val.txt', 'w') 79 | for index in range(len(img_paths)): 80 | 81 | tmp_str='' 82 | tmp_str =tmp_str+ img_paths[index]+'|' 83 | boxes = bbox[index] 84 | 85 | for box in boxes: 86 | data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2], box[1]+box[3]) 87 | tmp_str=tmp_str+data 88 | 89 | 90 | 91 | if len(boxes) == 0: 92 | print(tmp_str) 93 | continue 94 | ####err box? 95 | if box[2] <= 0 or box[3] <= 0: 96 | pass 97 | else: 98 | fw.write(tmp_str + '\n') 99 | fw.close() 100 | 101 | 102 | 103 | 104 | 105 | 106 | def wider_data_file_refine(): 107 | 108 | def parse_wider_file(root, file): 109 | with open(file, 'r') as fr: 110 | lines = fr.readlines() 111 | face_count = [] 112 | img_paths = [] 113 | face_loc = [] 114 | img_faces = [] 115 | count = 0 116 | 117 | 118 | one_image_faces=[] 119 | for k, line in enumerate(lines): 120 | if "#" in line: 121 | 122 | img_paths += [os.path.join(root, line[2:].rstrip())] 123 | 124 | one_image_faces=[] 125 | if '#' not in line: 126 | line = line.strip().strip('\n') 127 | 128 | 129 | line = line.split(' ') 130 | 131 | loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])] 132 | 133 | one_image_faces.append(loc) 134 | 135 | 136 | if k 0 13 | weight = tf.cast(weight[pos_mask],tf.float32) 14 | if avg_factor is None: 15 | avg_factor = tf.reduce_sum(pos_mask) + 1e-6 16 | bboxes1 = tf.reshape(pred[pos_mask],(-1, 4)) 17 | bboxes2 = tf.reshape(target[pos_mask],(-1, 4)) 18 | 19 | 20 | lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 21 | rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 22 | wh = tf.maximum((rb - lt + 1),0) # [rows, 2] 23 | enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2]) 24 | enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:]) 25 | enclose_wh = tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0) 26 | 27 | overlap = wh[:, 0] * wh[:, 1] 28 | ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) 29 | ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) 30 | ious = overlap / (ap + ag - overlap) 31 | 32 | enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] # i.e. C in paper 33 | u = ap + ag - overlap 34 | gious = ious - (enclose_area - u) / enclose_area 35 | iou_distances = 1 - gious 36 | return tf.reduce_sum(iou_distances * weight) / avg_factor 37 | 38 | def diou_loss(pred, 39 | target, 40 | weight, 41 | avg_factor=None): 42 | """DIoU loss. 43 | Computing the GIoU loss between a set of predicted bboxes and target bboxes. 44 | """ 45 | pos_mask = weight > 0 46 | weight = tf.cast(weight[pos_mask],tf.float32) 47 | if avg_factor is None: 48 | avg_factor = tf.reduce_sum(pos_mask) + 1e-6 49 | bboxes1 = tf.reshape(pred[pos_mask],(-1, 4)) 50 | bboxes2 = tf.reshape(target[pos_mask],(-1, 4)) 51 | 52 | 53 | lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 54 | rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 55 | wh = tf.maximum((rb - lt + 1),0) # [rows, 2] 56 | # enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2]) 57 | # enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:]) 58 | # enclose_wh = tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0) 59 | 60 | overlap = wh[:, 0] * wh[:, 1] 61 | ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) 62 | ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) 63 | ious = overlap / (ap + ag - overlap) 64 | 65 | 66 | # cal outer boxes 67 | outer_left_up = tf.minimum(bboxes1[:, :2], bboxes2[:, :2]) 68 | outer_right_down = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:]) 69 | outer = tf.maximum(outer_right_down - outer_left_up, 0.0) 70 | outer_diagonal_line = tf.square(outer[:, 0]) + tf.square(outer[:, 1]) 71 | 72 | boxes1_center = (bboxes1[:, :2] + bboxes1[:, 2:]+ 1) * 0.5 73 | boxes2_center = (bboxes2[:, :2] + bboxes2[:, 2:]+ 1) * 0.5 74 | center_dis = tf.square(boxes1_center[:, 0] - boxes2_center[:, 0]) + \ 75 | tf.square(boxes1_center[:, 1] - boxes2_center[:, 1]) 76 | 77 | dious = ious - (center_dis / outer_diagonal_line) 78 | 79 | iou_distances = 1-dious 80 | 81 | return tf.reduce_sum(iou_distances * weight) / avg_factor 82 | def ciou_loss(pred, 83 | target, 84 | weight, 85 | avg_factor=None): 86 | """GIoU loss. 87 | Computing the GIoU loss between a set of predicted bboxes and target bboxes. 88 | """ 89 | pos_mask = weight > 0 90 | weight = tf.cast(weight[pos_mask],tf.float32) 91 | if avg_factor is None: 92 | avg_factor = tf.reduce_sum(pos_mask) + 1e-6 93 | bboxes1 = tf.reshape(pred[pos_mask],(-1, 4)) 94 | bboxes2 = tf.reshape(target[pos_mask],(-1, 4)) 95 | 96 | 97 | lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 98 | rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 99 | wh = tf.maximum((rb - lt + 1),0) # [rows, 2] 100 | # enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2]) 101 | # enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:]) 102 | # enclose_wh = tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0) 103 | 104 | overlap = wh[:, 0] * wh[:, 1] 105 | ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) 106 | ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) 107 | ious = overlap / (ap + ag - overlap) 108 | 109 | 110 | 111 | 112 | # cal outer boxes 113 | outer_left_up = tf.minimum(bboxes1[:, :2], bboxes2[:, :2]) 114 | outer_right_down = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:]) 115 | outer = tf.maximum(outer_right_down - outer_left_up, 0.0) 116 | outer_diagonal_line = tf.square(outer[:, 0]) + tf.square(outer[:, 1]) 117 | 118 | 119 | boxes1_center = (bboxes1[:, :2] + bboxes1[:, 2:]+ 1) * 0.5 120 | boxes2_center = (bboxes2[:, :2] + bboxes2[:, 2:]+ 1) * 0.5 121 | center_dis = tf.square(boxes1_center[:, 0] - boxes2_center[:, 0]) + \ 122 | tf.square(boxes1_center[:, 1] - boxes2_center[:, 1]) 123 | 124 | 125 | 126 | 127 | 128 | boxes1_size = tf.maximum(bboxes1[:,2:]-bboxes1[:,:2],0.0) 129 | boxes2_size = tf.maximum(bboxes2[:, 2:] - bboxes2[:, :2], 0.0) 130 | 131 | v = (4.0 / (np.pi**2)) * \ 132 | tf.square(tf.math.atan(boxes2_size[:, 0] / (boxes2_size[:, 1]+0.00001)) - 133 | tf.math.atan(boxes1_size[:, 0] / (boxes1_size[:, 1]+0.00001))) 134 | 135 | S = tf.cast(tf.greater(ious , 0.5),dtype=tf.float32) 136 | alpha = S * v / (1 - ious + v) 137 | 138 | cious = ious - (center_dis / outer_diagonal_line)-alpha * v 139 | 140 | cious = 1-cious 141 | 142 | return tf.reduce_sum(cious * weight) / avg_factor 143 | 144 | 145 | 146 | if __name__=='__main__': 147 | gt=[[1000,10,100,100]] 148 | pre=[[200,200,1,1]] 149 | weight = [1] 150 | a = tf.constant(gt,dtype=tf.float32) 151 | b = tf.constant(pre,dtype=tf.float32) 152 | 153 | w=tf.constant(weight,dtype=tf.float32) 154 | 155 | session = tf.Session() 156 | 157 | loss,lt=giou_loss(pre,gt,w) 158 | v1 = session.run(loss) # fetches参数为单个张量值,返回值为Numpy数组 159 | print(v1) 160 | lt = session.run(lt[0,:,:,0]) # fetches参数为单个张量值,返回值为Numpy数组 161 | print(lt.shape) 162 | print(lt) 163 | 164 | 165 | -------------------------------------------------------------------------------- /visulization/vis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | 4 | import cv2 5 | import os 6 | import time 7 | 8 | 9 | from lib.core.api.face_detector import FaceDetector 10 | from train_config import config as cfg 11 | 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--style', type=str,default='coco', help='detect with coco or face',required=False) 16 | parser.add_argument('--imgDir', type=str,default='../pubdata/mscoco/val2017', help='the image dir to detect') 17 | parser.add_argument('--thres', type=float,default=0.3, help='the thres for detect') 18 | args = parser.parse_args() 19 | 20 | data_dir=args.imgDir 21 | style=args.style 22 | thres=args.thres 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 25 | detector = FaceDetector(['./model/detector.pb']) 26 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'), 27 | 6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'), 28 | 11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'), 29 | 16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'), 30 | 21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'), 31 | 26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'), 32 | 31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'), 33 | 35: (40, 'baseball glove'), 34 | 36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'), 35 | 40: (46, 'wine glass'), 36 | 41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'), 37 | 46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'), 38 | 51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'), 39 | 56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'), 40 | 61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'), 41 | 66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'), 42 | 71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'), 43 | 76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')} 44 | 45 | def GetFileList(dir, fileList): 46 | newDir = dir 47 | if os.path.isfile(dir): 48 | fileList.append(dir) 49 | elif os.path.isdir(dir): 50 | for s in os.listdir(dir): 51 | # if s == "pts": 52 | # continue 53 | newDir=os.path.join(dir,s) 54 | GetFileList(newDir, fileList) 55 | return fileList 56 | 57 | 58 | def cocodetect(data_dir): 59 | success_cnt=0 60 | count = 0 61 | 62 | pics = [] 63 | GetFileList(data_dir,pics) 64 | 65 | pics = [x for x in pics if 'jpg' in x or 'png' in x or 'jpeg' in x] 66 | #pics.sort() 67 | 68 | for pic in pics: 69 | print(pic) 70 | try: 71 | img=cv2.imread(pic) 72 | #cv2.imwrite('tmp.png',img) 73 | img_show = img.copy() 74 | except: 75 | continue 76 | 77 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 78 | 79 | star=time.time() 80 | boxes=detector(img,thres,input_shape=(cfg.DATA.hin,cfg.DATA.win)) 81 | 82 | print(boxes.shape[0]) 83 | if boxes.shape[0]==0: 84 | print(pic) 85 | 86 | for box_index in range(boxes.shape[0]): 87 | 88 | bbox = boxes[box_index] 89 | 90 | cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])), 91 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) 92 | str_draw = '%s:%.2f' %(coco_map[int(bbox[5])][1],bbox[4]) 93 | cv2.putText(img_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, 94 | (255, 0, 255), 2) 95 | 96 | 97 | cv2.namedWindow('res',0) 98 | cv2.imshow('res',img_show) 99 | cv2.waitKey(0) 100 | 101 | print(success_cnt,'decoded') 102 | print(count) 103 | 104 | 105 | def camdetect(): 106 | cap = cv2.VideoCapture(0) 107 | 108 | while True: 109 | 110 | ret, img = cap.read() 111 | img_show = img.copy() 112 | 113 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 114 | star=time.time() 115 | boxes=detector(img,0.5,input_shape=(640,640)) 116 | 117 | 118 | print(boxes.shape[0]) 119 | 120 | 121 | for box_index in range(boxes.shape[0]): 122 | 123 | bbox = boxes[box_index] 124 | 125 | cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])), 126 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 8) 127 | # cv2.putText(img_show, str(bbox[4]), (int(bbox[0]), int(bbox[1]) + 30), 128 | # cv2.FONT_HERSHEY_SIMPLEX, 1, 129 | # (255, 0, 255), 2) 130 | # 131 | # cv2.putText(img_show, str(int(bbox[5])), (int(bbox[0]), int(bbox[1]) + 40), 132 | # cv2.FONT_HERSHEY_SIMPLEX, 1, 133 | # (0, 0, 255), 2) 134 | 135 | 136 | cv2.namedWindow('res',0) 137 | cv2.imshow('res',img_show) 138 | cv2.waitKey(0) 139 | print(count) 140 | 141 | def facedetect(data_dir): 142 | success_cnt=0 143 | count = 0 144 | 145 | pics = [] 146 | GetFileList(data_dir,pics) 147 | 148 | pics = [x for x in pics if 'jpg' in x or 'png' in x or 'jpeg' in x] 149 | #pics.sort() 150 | 151 | for pic in pics: 152 | print(pic) 153 | try: 154 | img=cv2.imread(pic) 155 | #cv2.imwrite('tmp.png',img) 156 | img_show = img.copy() 157 | except: 158 | continue 159 | 160 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 161 | 162 | star=time.time() 163 | boxes=detector(img,thres,input_shape=(cfg.DATA.hin,cfg.DATA.win)) 164 | 165 | print(boxes.shape[0]) 166 | if boxes.shape[0]==0: 167 | print(pic) 168 | 169 | for box_index in range(boxes.shape[0]): 170 | 171 | bbox = boxes[box_index] 172 | 173 | cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])), 174 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) 175 | str_draw = '%s:%.2f' %(coco_map[int(bbox[5])][1],bbox[4]) 176 | cv2.putText(img_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, 177 | (255, 0, 255), 2) 178 | 179 | 180 | cv2.namedWindow('res',0) 181 | cv2.imshow('res',img_show) 182 | cv2.waitKey(0) 183 | 184 | print(success_cnt,'decoded') 185 | print(count) 186 | if __name__=='__main__': 187 | 188 | if style=='coco': 189 | cocodetect(data_dir) 190 | else: 191 | facedetect(data_dir) 192 | -------------------------------------------------------------------------------- /lib/core/api/face_detector_bk.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | import time 5 | import math 6 | 7 | from train_config import config as cfg 8 | 9 | 10 | 11 | class FaceDetector: 12 | def __init__(self, model_path): 13 | """ 14 | Arguments: 15 | model_path: a string, path to a pb file. 16 | """ 17 | self._graph = tf.Graph() 18 | 19 | with self._graph.as_default(): 20 | self._graph, self._sess = self.init_model(model_path) 21 | 22 | 23 | self.input_image = tf.get_default_graph().get_tensor_by_name('tower_0/images:0') 24 | self.training = tf.get_default_graph().get_tensor_by_name('training_flag:0') 25 | self.output_ops = [ 26 | tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'), 27 | tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'), 28 | tf.expand_dims(tf.cast(tf.get_default_graph().get_tensor_by_name('tower_0/labels:0'),dtype=tf.float32),-1) 29 | ] 30 | self.output_op=tf.concat(self.output_ops,axis=2) 31 | 32 | 33 | 34 | 35 | def __call__(self, image, score_threshold=0.5,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1000): 36 | """Detect faces. 37 | 38 | Arguments: 39 | image: a numpy uint8 array with shape [height, width, 3], 40 | that represents a RGB image. 41 | score_threshold: a float number. 42 | Returns: 43 | boxes: a float numpy array of shape [num_faces, 5]. 44 | 45 | """ 46 | 47 | 48 | # if input_shape is None: 49 | # h, w, c = image.shape 50 | # input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32) 51 | # 52 | # else: 53 | # h, w = input_shape 54 | # input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32) 55 | 56 | image, scale_x, scale_y, dx, dy = self.preprocess(image, 57 | target_height=cfg.DATA.hin, 58 | target_width=cfg.DATA.win) 59 | 60 | 61 | if cfg.DATA.channel==1: 62 | image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY) 63 | image= np.expand_dims(image, -1) 64 | 65 | image_fornet = np.expand_dims(image, 0) 66 | 67 | 68 | bboxes = self._sess.run( 69 | self.output_op, feed_dict={self.input_image: image_fornet,self.training:False} 70 | ) 71 | 72 | bboxes = self.py_nms(np.array(bboxes[0]), iou_thres=0.3, score_thres=score_threshold,max_boxes=max_boxes) 73 | 74 | ###recorver to raw image 75 | boxes_scaler = np.array([(input_shape[1]) / scale_x, 76 | (input_shape[0]) / scale_y, 77 | (input_shape[1]) / scale_x, 78 | (input_shape[0]) / scale_y, 79 | 1.,1.], dtype='float32') 80 | 81 | boxes_bias = np.array([dx / scale_x, 82 | dy / scale_y, 83 | dx / scale_x, 84 | dy / scale_y, 0.,0.], dtype='float32') 85 | bboxes = bboxes * boxes_scaler - boxes_bias 86 | 87 | 88 | 89 | # self.stats_graph(self._sess.graph) 90 | return bboxes 91 | 92 | 93 | def preprocess(self, image, target_height, target_width, label=None): 94 | 95 | ###sometimes use in objs detects 96 | h, w, c = image.shape 97 | 98 | bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype) 99 | 100 | scale_y = target_height / h 101 | scale_x = target_width / w 102 | 103 | scale = min(scale_x, scale_y) 104 | 105 | image = cv2.resize(image, None, fx=scale, fy=scale) 106 | 107 | h_, w_, _ = image.shape 108 | 109 | dx = (target_width - w_) // 2 110 | dy = (target_height - h_) // 2 111 | bimage[dy:h_ + dy, dx:w_ + dx, :] = image 112 | 113 | return bimage, scale, scale, dx, dy 114 | 115 | def py_nms(self, bboxes, iou_thres, score_thres, max_boxes=1000): 116 | 117 | upper_thres = np.where(bboxes[:, 4] > score_thres)[0] 118 | 119 | bboxes = bboxes[upper_thres] 120 | 121 | x1 = bboxes[:, 0] 122 | y1 = bboxes[:, 1] 123 | x2 = bboxes[:, 2] 124 | y2 = bboxes[:, 3] 125 | 126 | order = np.argsort(bboxes[:, 4])[::-1] 127 | 128 | keep=[] 129 | while order.shape[0] > 0: 130 | if len(keep)>max_boxes: 131 | break 132 | cur = order[0] 133 | 134 | keep.append(cur) 135 | 136 | area = (bboxes[cur, 2] - bboxes[cur, 0]) * (bboxes[cur, 3] - bboxes[cur, 1]) 137 | 138 | x1_reain = x1[order[1:]] 139 | y1_reain = y1[order[1:]] 140 | x2_reain = x2[order[1:]] 141 | y2_reain = y2[order[1:]] 142 | 143 | xx1 = np.maximum(bboxes[cur, 0], x1_reain) 144 | yy1 = np.maximum(bboxes[cur, 1], y1_reain) 145 | xx2 = np.minimum(bboxes[cur, 2], x2_reain) 146 | yy2 = np.minimum(bboxes[cur, 3], y2_reain) 147 | 148 | intersection = np.maximum(0, yy2 - yy1) * np.maximum(0, xx2 - xx1) 149 | 150 | iou = intersection / (area + (y2_reain - y1_reain) * (x2_reain - x1_reain) - intersection) 151 | 152 | ##keep the low iou 153 | low_iou_position = np.where(iou < iou_thres)[0] 154 | 155 | order = order[low_iou_position + 1] 156 | 157 | return bboxes[keep] 158 | 159 | def stats_graph(self,graph): 160 | 161 | 162 | 163 | flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) 164 | params = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter()) 165 | print(params) 166 | print('FLOPs: {}M; Trainable params: {}'.format(flops.total_float_ops/1024/1024., params.total_parameters)) 167 | 168 | def init_model(self,args): 169 | 170 | if len(args) == 1: 171 | use_pb = True 172 | pb_path = args[0] 173 | else: 174 | use_pb = False 175 | meta_path = args[0] 176 | restore_model_path = args[1] 177 | 178 | def ini_ckpt(): 179 | graph = tf.Graph() 180 | graph.as_default() 181 | configProto = tf.ConfigProto() 182 | configProto.gpu_options.allow_growth = True 183 | sess = tf.Session(config=configProto) 184 | # load_model(model_path, sess) 185 | saver = tf.train.import_meta_graph(meta_path) 186 | saver.restore(sess, restore_model_path) 187 | 188 | print("Model restred!") 189 | return (graph, sess) 190 | 191 | def init_pb(model_path): 192 | config = tf.ConfigProto() 193 | config.gpu_options.per_process_gpu_memory_fraction = 0.5 194 | compute_graph = tf.Graph() 195 | compute_graph.as_default() 196 | sess = tf.Session(config=config) 197 | with tf.gfile.GFile(model_path, 'rb') as fid: 198 | graph_def = tf.GraphDef() 199 | graph_def.ParseFromString(fid.read()) 200 | tf.import_graph_def(graph_def, name='') 201 | 202 | 203 | # saver = tf.train.Saver(tf.global_variables()) 204 | # saver.save(sess, save_path='./tmp.ckpt') 205 | return (compute_graph, sess) 206 | 207 | if use_pb: 208 | model = init_pb(pb_path) 209 | else: 210 | model = ini_ckpt() 211 | 212 | graph = model[0] 213 | sess = model[1] 214 | 215 | return graph, sess 216 | 217 | 218 | -------------------------------------------------------------------------------- /lib/core/anchor/box_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import sys 4 | sys.path.append('.') 5 | import tensorflow as tf 6 | import numpy as np 7 | # a small value 8 | EPSILON = 1e-8 9 | 10 | 11 | ###becaurefull , the decode use it as one 12 | SCALE_FACTORS = [5.0, 5.0, 5.0, 5.0] 13 | 14 | 15 | 16 | 17 | 18 | def np_iou(boxes1, boxes2): 19 | def area(boxes): 20 | """Computes area of boxes. 21 | 22 | Arguments: 23 | boxes: a float tensor with shape [N, 4]. 24 | Returns: 25 | a float tensor with shape [N] representing box areas. 26 | """ 27 | 28 | xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] 29 | return (ymax - ymin) * (xmax - xmin) 30 | 31 | """Computes pairwise intersection-over-union between two box collections. 32 | 33 | Arguments: 34 | boxes1: a float tensor with shape [N, 4].GT 35 | boxes2: a float tensor with shape [M, 4].ANCHOR 36 | Returns: 37 | a float tensor with shape [N, M] representing pairwise iou scores. 38 | """ 39 | 40 | intersections = intersection(boxes1, boxes2) 41 | 42 | areas1 = area(boxes1) 43 | areas2 = area(boxes2) 44 | unions = np.expand_dims(areas1, 1) + np.expand_dims(areas2, 0) - intersections 45 | 46 | return np.clip(intersections / unions, 0.0, 1.0) 47 | def intersection(boxes1, boxes2): 48 | """Compute pairwise intersection areas between boxes. 49 | 50 | Arguments: 51 | boxes1: a float tensor with shape [N, 4]. 52 | boxes2: a float tensor with shape [M, 4]. 53 | Returns: 54 | a float tensor with shape [N, M] representing pairwise intersections. 55 | """ 56 | 57 | xmin1, ymin1, xmax1, ymax1 = np.split(boxes1, indices_or_sections=4, axis=1) 58 | xmin2, ymin2, xmax2, ymax2 = np.split(boxes2, indices_or_sections=4, axis=1) 59 | # they all have shapes like [None, 1] 60 | 61 | all_pairs_min_ymax = np.minimum(ymax1, np.transpose(ymax2)) 62 | all_pairs_max_ymin = np.maximum(ymin1, np.transpose(ymin2)) 63 | 64 | intersect_heights = np.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin) 65 | all_pairs_min_xmax = np.minimum(xmax1, np.transpose(xmax2)) 66 | all_pairs_max_xmin = np.maximum(xmin1, np.transpose(xmin2)) 67 | intersect_widths = np.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin) 68 | # they all have shape [N, M] 69 | return intersect_heights * intersect_widths 70 | 71 | 72 | 73 | 74 | def encode(boxes, anchors): 75 | 76 | # print(boxes) 77 | # print(anchors) 78 | """Encode boxes with respect to anchors. 79 | Arguments: 80 | boxes: a float tensor with shape [N, 4]. 81 | anchors: a float tensor with shape [N, 4]. 82 | Returns: 83 | a float tensor with shape [N, 4], 84 | anchor-encoded boxes of the format [tx1, ty1, tx2, ty2]. 85 | """ 86 | 87 | anchor_widths = anchors[:, 2] - anchors[:, 0] 88 | anchor_heights = anchors[:, 3] - anchors[:, 1] 89 | 90 | tx1 = (boxes[:, 0] - anchors[:, 0]) / anchor_widths 91 | ty1 = (boxes[:, 1] - anchors[:, 1]) / anchor_heights 92 | tx2 = (boxes[:, 2] - anchors[:, 2]) / anchor_widths 93 | ty2 = (boxes[:, 3] - anchors[:, 3]) / anchor_heights 94 | 95 | tx1 *= SCALE_FACTORS[0] 96 | ty1 *= SCALE_FACTORS[1] 97 | tx2 *= SCALE_FACTORS[2] 98 | ty2 *= SCALE_FACTORS[3] 99 | 100 | return np.stack([tx1, ty1, tx2, ty2], axis=1) 101 | 102 | 103 | 104 | 105 | 106 | def decode(codes, anchors): 107 | """Decode relative codes to boxes. 108 | Arguments: 109 | codes: a float tensor with shape [N, 4], 110 | anchor-encoded boxes of the format [tx1, ty, tx2, ty2]. 111 | anchors: a float tensor with shape [N, 4]. 112 | Returns: 113 | a float tensor with shape [N, 4], 114 | bounding boxes of the format [ymin, xmin, ymax, xmax]. because tf nms needs yxyx 115 | """ 116 | with tf.name_scope('decode_predictions'): 117 | 118 | anchor_widths = anchors[:, 2] - anchors[:, 0] 119 | anchor_heights = anchors[:, 3] - anchors[:, 1] 120 | 121 | tx1, ty1, tx2, ty2 = tf.unstack(codes, axis=1) 122 | 123 | tx1 /= SCALE_FACTORS[0] 124 | ty1 /= SCALE_FACTORS[1] 125 | tx2 /= SCALE_FACTORS[2] 126 | ty2 /= SCALE_FACTORS[3] 127 | 128 | x1=tx1 * anchor_widths + anchors[:,0] 129 | y1=ty1 * anchor_heights + anchors[:,1] 130 | x2=tx2 * anchor_widths + anchors[:,2] 131 | y2=ty2 * anchor_heights + anchors[:,3] 132 | 133 | return tf.stack([x1,y1,x2,y2], axis=1) 134 | 135 | 136 | def decode_fix(codes, anchors,anchors_decode): 137 | """Decode relative codes to boxes. 138 | Arguments: 139 | codes: a float tensor with shape [N, 4], 140 | anchor-encoded boxes of the format [tx1, ty, tx2, ty2]. 141 | anchors: a float tensor with shape [N, 4]. 142 | Returns: 143 | a float tensor with shape [N, 4], 144 | bounding boxes of the format [ymin, xmin, ymax, xmax]. because tf nms needs yxyx 145 | """ 146 | with tf.name_scope('decode_predictions'): 147 | 148 | decodes=codes*anchors_decode+anchors 149 | 150 | 151 | return decodes 152 | # tx1, ty1, tx2, ty2 = tf.unstack(codes, axis=1) 153 | # 154 | # tx1 /= SCALE_FACTORS[0] 155 | # ty1 /= SCALE_FACTORS[1] 156 | # tx2 /= SCALE_FACTORS[2] 157 | # ty2 /= SCALE_FACTORS[3] 158 | # 159 | # x1=tx1 * anchor_widths + anchors[:,0] 160 | # y1=ty1 * anchor_heights + anchors[:,1] 161 | # x2=tx2 * anchor_widths + anchors[:,2] 162 | # y2=ty2 * anchor_heights + anchors[:,3] 163 | # 164 | # return tf.stack([x1,y1,x2,y2], axis=1) 165 | 166 | 167 | 168 | 169 | 170 | def batch_decode(box_encodings, anchors): 171 | """Decodes a batch of box encodings with respect to the anchors. 172 | 173 | Arguments: 174 | box_encodings: a float tensor with shape [batch_size, num_anchors, 4]. 175 | anchors: a float tensor with shape [num_anchors, 4]. 176 | Returns: 177 | a float tensor with shape [batch_size, num_anchors, 4]. 178 | It contains the decoded boxes. 179 | """ 180 | batch_size = tf.shape(box_encodings)[0] 181 | num_anchors = tf.shape(box_encodings)[1] 182 | 183 | tiled_anchor_boxes = tf.tile( 184 | tf.expand_dims(anchors, 0), 185 | [batch_size, 1, 1] 186 | ) # shape [batch_size, num_anchors, 4] 187 | decoded_boxes = decode( 188 | tf.reshape(box_encodings, [-1, 4]), 189 | tf.reshape(tiled_anchor_boxes, [-1, 4]) 190 | ) # shape [batch_size * num_anchors, 4] 191 | 192 | decoded_boxes = tf.reshape( 193 | decoded_boxes, 194 | [batch_size, num_anchors, 4] 195 | ) 196 | decoded_boxes = tf.clip_by_value(decoded_boxes, 0.0, 1.0) 197 | return decoded_boxes 198 | 199 | def batch_decode_fix(box_encodings, anchors,anchors_decode): 200 | """Decodes a batch of box encodings with respect to the anchors. 201 | 202 | Arguments: 203 | box_encodings: a float tensor with shape [batch_size, num_anchors, 4]. 204 | anchors: a float tensor with shape [num_anchors, 4]. 205 | Returns: 206 | a float tensor with shape [batch_size, num_anchors, 4]. 207 | It contains the decoded boxes. 208 | """ 209 | 210 | batch_size = tf.shape(box_encodings)[0] 211 | num_anchors = tf.shape(box_encodings)[1] 212 | 213 | # tiled_anchor_boxes = tf.tile( 214 | # tf.expand_dims(anchors, 0), 215 | # [batch_size, 1, 1] 216 | # ) # shape [batch_size, num_anchors, 4] 217 | #tiled_anchor_boxes=anchors 218 | 219 | decoded_boxes = decode_fix( 220 | tf.reshape(box_encodings, [-1, 4]), 221 | anchors, 222 | anchors_decode 223 | ) # shape [batch_size * num_anchors, 4] 224 | 225 | decoded_boxes = tf.reshape( 226 | decoded_boxes, 227 | [batch_size, num_anchors, 4] 228 | ) 229 | #decoded_boxes = tf.clip_by_value(decoded_boxes, 0.0, 1.0) 230 | return decoded_boxes 231 | -------------------------------------------------------------------------------- /model_eval/wider.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('.') 4 | import os 5 | import scipy.io as sio 6 | import argparse 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | import time 12 | 13 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 14 | 15 | from lib.core.api.face_detector import FaceDetector 16 | 17 | ap = argparse.ArgumentParser() 18 | ap.add_argument("--model", required=False, default='./model/detector.pb', help="model to eval:") 19 | ap.add_argument("--is_show", required=False, default=False, help="show result or not?") 20 | ap.add_argument("--data_dir", required=False, default="./WIDER/WIDER_val", help="dir to img") 21 | ap.add_argument("--multiscale", required=False, default=0, type=int, help="test in multiscales 0-False 1-True") 22 | 23 | ap.add_argument("--input_shape", required=False, type=int, default=512, help="input shape") 24 | ap.add_argument("--result", required=False, default='./result', help="dir to write result") 25 | 26 | args = ap.parse_args() 27 | 28 | IMAGES_DIR = args.data_dir 29 | RESULT_DIR = args.result 30 | MODEL_PATH = args.model 31 | INPUT_SHAPE = (args.input_shape, args.input_shape) 32 | MULTISCALETEST = True if args.multiscale == 1 else False 33 | 34 | face_detector = FaceDetector([MODEL_PATH]) 35 | 36 | 37 | def get_data(): 38 | subset = 'val' 39 | if subset is 'val': 40 | wider_face = sio.loadmat( 41 | './eval_tools/ground_truth/wider_face_val.mat') 42 | else: 43 | wider_face = sio.loadmat( 44 | './eval_tools/ground_truth/wider_face_test.mat') 45 | event_list = wider_face['event_list'] 46 | file_list = wider_face['file_list'] 47 | del wider_face 48 | 49 | imgs_path = os.path.join(IMAGES_DIR, 'images') 50 | save_path = RESULT_DIR 51 | 52 | return event_list, file_list, imgs_path, save_path 53 | 54 | 55 | def bbox_vote(det): 56 | order = det[:, 4].ravel().argsort()[::-1] 57 | det = det[order, :] 58 | while det.shape[0] > 0: 59 | # IOU 60 | area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) 61 | xx1 = np.maximum(det[0, 0], det[:, 0]) 62 | yy1 = np.maximum(det[0, 1], det[:, 1]) 63 | xx2 = np.minimum(det[0, 2], det[:, 2]) 64 | yy2 = np.minimum(det[0, 3], det[:, 3]) 65 | w = np.maximum(0.0, xx2 - xx1 + 1) 66 | h = np.maximum(0.0, yy2 - yy1 + 1) 67 | inter = w * h 68 | o = inter / (area[0] + area[:] - inter) 69 | 70 | # get needed merge det and delete these det 71 | merge_index = np.where(o >= 0.3)[0] 72 | det_accu = det[merge_index, :] 73 | det = np.delete(det, merge_index, 0) 74 | 75 | if merge_index.shape[0] <= 1: 76 | continue 77 | det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) 78 | max_score = np.max(det_accu[:, 4]) 79 | det_accu_sum = np.zeros((1, 5)) 80 | det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) 81 | det_accu_sum[:, 4] = max_score 82 | try: 83 | dets = np.row_stack((dets, det_accu_sum)) 84 | except: 85 | dets = det_accu_sum 86 | try: 87 | dets = dets[0:750, :] 88 | except: 89 | dets = det 90 | 91 | return dets 92 | 93 | 94 | def detect_face(img, shrink): 95 | if shrink != 1: 96 | img = cv2.resize(img, None, None, fx=shrink, fy=shrink, 97 | interpolation=cv2.INTER_LINEAR) 98 | 99 | if not MULTISCALETEST: 100 | detections = face_detector(img, score_threshold=0.05, input_shape=(args.input_shape, args.input_shape)) 101 | else: 102 | INPUT_SHAPE = (img.shape[0], img.shape[1]) 103 | detections = face_detector(img, score_threshold=0.05, input_shape=INPUT_SHAPE) 104 | 105 | det_xmin = detections[:, 0] / shrink 106 | det_ymin = detections[:, 1] / shrink 107 | det_xmax = detections[:, 2] / shrink 108 | det_ymax = detections[:, 3] / shrink 109 | det_conf = detections[:, 4] 110 | det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) 111 | 112 | return det 113 | 114 | 115 | def multi_scale_test(image, max_im_shrink): 116 | # shrink detecting and shrink only detect big face 117 | st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink 118 | det_s = detect_face(image, st) 119 | index = np.where(np.maximum( 120 | det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] 121 | det_s = det_s[index, :] 122 | 123 | # enlarge one times 124 | bt = min(2, max_im_shrink) if max_im_shrink > 1 else ( 125 | st + max_im_shrink) / 2 126 | det_b = detect_face(image, bt) 127 | 128 | # enlarge small image x times for small face 129 | if max_im_shrink > 2: 130 | bt *= 2 131 | while bt < max_im_shrink: 132 | det_b = np.row_stack((det_b, detect_face(image, bt))) 133 | bt *= 2 134 | det_b = np.row_stack((det_b, detect_face(image, max_im_shrink))) 135 | 136 | # enlarge only detect small face 137 | if bt > 1: 138 | index = np.where(np.minimum( 139 | det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] 140 | det_b = det_b[index, :] 141 | else: 142 | index = np.where(np.maximum( 143 | det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] 144 | det_b = det_b[index, :] 145 | 146 | return det_s, det_b 147 | 148 | 149 | def flip_test(image, shrink): 150 | image_f = cv2.flip(image, 1) 151 | det_f = detect_face(image_f, shrink) 152 | 153 | det_t = np.zeros(det_f.shape) 154 | det_t[:, 0] = image.shape[1] - det_f[:, 2] 155 | det_t[:, 1] = det_f[:, 1] 156 | det_t[:, 2] = image.shape[1] - det_f[:, 0] 157 | det_t[:, 3] = det_f[:, 3] 158 | det_t[:, 4] = det_f[:, 4] 159 | return det_t 160 | 161 | 162 | event_list, file_list, imgs_path, save_path = get_data() 163 | 164 | for index, event in enumerate(event_list): 165 | print(event) 166 | filelist = file_list[index][0] 167 | path = os.path.join(save_path, event[0][0]) 168 | if not os.path.exists(path): 169 | os.makedirs(path) 170 | 171 | for num, file in enumerate(filelist): 172 | im_name = file[0][0] 173 | in_file = os.path.join(imgs_path, event[0][0], im_name[:] + '.jpg') 174 | 175 | image_array = cv2.imread(in_file) 176 | img = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB) 177 | 178 | # max_im_shrink = (0x7fffffff / 200.0 / (img.shape[0] * img.shape[1])) ** 0.5 179 | max_im_shrink = np.sqrt( 180 | 2000 * 2000 / (img.shape[0] * img.shape[1])) 181 | max_im_shrink = 3 if max_im_shrink > 3 else max_im_shrink 182 | 183 | shrink = max_im_shrink if max_im_shrink < 1 else 1 184 | 185 | det0 = detect_face(img, shrink) 186 | 187 | ##flip det 188 | det1 = flip_test(img, shrink) 189 | 190 | if MULTISCALETEST: 191 | [det2, det3] = multi_scale_test(img, max_im_shrink) 192 | det = np.row_stack((det0, det1, det2, det3)) 193 | else: 194 | det = np.row_stack((det0, det1)) 195 | 196 | dets = bbox_vote(det) 197 | 198 | if args.is_show: 199 | for bbox in dets: 200 | if bbox[4] > 0.3: 201 | # cv2.circle(img_show,(p[0],p[1]),3,(0,0,213),-1) 202 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 203 | (int(bbox[2]), int(bbox[3])), (255, 0, 0), 7) 204 | cv2.imshow('tmp', img) 205 | cv2.waitKey(0) 206 | 207 | fout = open(os.path.join(save_path, event[0][0], im_name + '.txt'), 'w') 208 | fout.write('{:s}\n'.format(event[0][0] + '/' + im_name + '.jpg')) 209 | fout.write('{:d}\n'.format(dets.shape[0])) 210 | for i in range(dets.shape[0]): 211 | xmin = dets[i][0] 212 | ymin = dets[i][1] 213 | xmax = dets[i][2] 214 | ymax = dets[i][3] 215 | score = dets[i][4] 216 | fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. 217 | format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) 218 | fout.close() 219 | 220 | -------------------------------------------------------------------------------- /lib/core/api/face_detector.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | import time 5 | import math 6 | 7 | from train_config import config as cfg 8 | 9 | 10 | 11 | class FaceDetector: 12 | def __init__(self, model_path): 13 | """ 14 | Arguments: 15 | model_path: a string, path to a pb file. 16 | """ 17 | self._graph = tf.Graph() 18 | 19 | with self._graph.as_default(): 20 | self._graph, self._sess = self.init_model(model_path) 21 | 22 | 23 | self.input_image = tf.get_default_graph().get_tensor_by_name('tower_0/images:0') 24 | #self.training = tf.get_default_graph().get_tensor_by_name('training_flag:0') 25 | self.output_op=tf.get_default_graph().get_tensor_by_name('tower_0/detections:0') 26 | 27 | self.output_kps=tf.get_default_graph().get_tensor_by_name('tower_0/keypoints:0') 28 | 29 | self.wh = tf.get_default_graph().get_tensor_by_name('tower_0/wh:0') 30 | 31 | def __call__(self, image, score_threshold=0.5,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1000): 32 | """Detect faces. 33 | 34 | Arguments: 35 | image: a numpy uint8 array with shape [height, width, 3], 36 | that represents a RGB image. 37 | score_threshold: a float number. 38 | Returns: 39 | boxes: a float numpy array of shape [num_faces, 5]. 40 | 41 | """ 42 | 43 | 44 | if input_shape is None: 45 | h, w, c = image.shape 46 | input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32) 47 | 48 | else: 49 | h, w = input_shape 50 | input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32) 51 | 52 | image, scale_x, scale_y, dx, dy = self.preprocess(image, 53 | target_height=input_shape[0], 54 | target_width=input_shape[1]) 55 | 56 | 57 | if cfg.DATA.channel==1: 58 | image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY) 59 | image= np.expand_dims(image, -1) 60 | 61 | image_fornet = np.expand_dims(image, 0) 62 | 63 | outputs,kps,wh = self._sess.run( 64 | [self.output_op,self.output_kps,self.wh], feed_dict={self.input_image: image_fornet} 65 | ) 66 | 67 | bboxes=outputs[0] 68 | 69 | # print(kps.shape) 70 | # kps=kps[0][:,:,0] 71 | # 72 | # label =kps 73 | # #label = (label / np.max(label) * 255).astype(np.uint8) 74 | # cv2.namedWindow('label', 0) 75 | # cv2.imshow('label', label) 76 | # 77 | # wh = wh[0][:, :, 0] 78 | # 79 | # print(np.min(wh)) 80 | # print(np.max(wh)) 81 | # wh = wh / np.max(wh) 82 | # wh = wh 83 | # # label = (label / np.max(label) * 255).astype(np.uint8) 84 | # cv2.namedWindow('wh', 0) 85 | # cv2.imshow('wh', wh) 86 | 87 | 88 | bboxes = self.py_nms(np.array(bboxes), iou_thres=None, score_thres=score_threshold,max_boxes=max_boxes) 89 | 90 | ###recorver to raw image 91 | boxes_scaler = np.array([1 / scale_x, 92 | 1 / scale_y, 93 | 1 / scale_x, 94 | 1 / scale_y, 95 | 1.,1.], dtype='float32') 96 | 97 | boxes_bias = np.array([dx , 98 | dy , 99 | dx , 100 | dy , 0.,0.], dtype='float32') 101 | bboxes = (bboxes - boxes_bias)*boxes_scaler 102 | 103 | 104 | 105 | # self.stats_graph(self._sess.graph) 106 | return bboxes 107 | 108 | 109 | def preprocess(self, image, target_height, target_width, label=None): 110 | 111 | ###sometimes use in objs detects 112 | h, w, c = image.shape 113 | 114 | bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype) 115 | 116 | scale_y = target_height / h 117 | scale_x = target_width / w 118 | 119 | scale = min(scale_x, scale_y) 120 | 121 | image = cv2.resize(image, None, fx=scale, fy=scale) 122 | 123 | h_, w_, _ = image.shape 124 | 125 | dx = (target_width - w_) // 2 126 | dy = (target_height - h_) // 2 127 | bimage[dy:h_ + dy, dx:w_ + dx, :] = image 128 | 129 | return bimage, scale, scale, dx, dy 130 | 131 | def py_nms(self, bboxes, iou_thres, score_thres, max_boxes=1000): 132 | 133 | upper_thres = np.where(bboxes[:, 4] > score_thres)[0] 134 | 135 | bboxes = bboxes[upper_thres] 136 | if iou_thres is None: 137 | return bboxes 138 | 139 | x1 = bboxes[:, 0] 140 | y1 = bboxes[:, 1] 141 | x2 = bboxes[:, 2] 142 | y2 = bboxes[:, 3] 143 | 144 | order = np.argsort(bboxes[:, 4])[::-1] 145 | 146 | keep=[] 147 | while order.shape[0] > 0: 148 | if len(keep)>max_boxes: 149 | break 150 | cur = order[0] 151 | 152 | keep.append(cur) 153 | 154 | area = (bboxes[cur, 2] - bboxes[cur, 0]) * (bboxes[cur, 3] - bboxes[cur, 1]) 155 | 156 | x1_reain = x1[order[1:]] 157 | y1_reain = y1[order[1:]] 158 | x2_reain = x2[order[1:]] 159 | y2_reain = y2[order[1:]] 160 | 161 | xx1 = np.maximum(bboxes[cur, 0], x1_reain) 162 | yy1 = np.maximum(bboxes[cur, 1], y1_reain) 163 | xx2 = np.minimum(bboxes[cur, 2], x2_reain) 164 | yy2 = np.minimum(bboxes[cur, 3], y2_reain) 165 | 166 | intersection = np.maximum(0, yy2 - yy1) * np.maximum(0, xx2 - xx1) 167 | 168 | iou = intersection / (area + (y2_reain - y1_reain) * (x2_reain - x1_reain) - intersection) 169 | 170 | ##keep the low iou 171 | low_iou_position = np.where(iou < iou_thres)[0] 172 | 173 | order = order[low_iou_position + 1] 174 | 175 | return bboxes[keep] 176 | 177 | def stats_graph(self,graph): 178 | 179 | 180 | 181 | flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) 182 | params = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter()) 183 | print(params) 184 | print('FLOPs: {}M; Trainable params: {}'.format(flops.total_float_ops/1024/1024., params.total_parameters)) 185 | 186 | def init_model(self,args): 187 | 188 | if len(args) == 1: 189 | use_pb = True 190 | pb_path = args[0] 191 | else: 192 | use_pb = False 193 | meta_path = args[0] 194 | restore_model_path = args[1] 195 | 196 | def ini_ckpt(): 197 | graph = tf.Graph() 198 | graph.as_default() 199 | configProto = tf.ConfigProto() 200 | configProto.gpu_options.allow_growth = True 201 | sess = tf.Session(config=configProto) 202 | # load_model(model_path, sess) 203 | saver = tf.train.import_meta_graph(meta_path) 204 | saver.restore(sess, restore_model_path) 205 | 206 | print("Model restred!") 207 | return (graph, sess) 208 | 209 | def init_pb(model_path): 210 | config = tf.ConfigProto() 211 | config.gpu_options.per_process_gpu_memory_fraction = 0.5 212 | compute_graph = tf.Graph() 213 | compute_graph.as_default() 214 | sess = tf.Session(config=config) 215 | with tf.gfile.GFile(model_path, 'rb') as fid: 216 | graph_def = tf.GraphDef() 217 | graph_def.ParseFromString(fid.read()) 218 | tf.import_graph_def(graph_def, name='') 219 | 220 | 221 | # saver = tf.train.Saver(tf.global_variables()) 222 | # saver.save(sess, save_path='./tmp.ckpt') 223 | return (compute_graph, sess) 224 | 225 | if use_pb: 226 | model = init_pb(pb_path) 227 | else: 228 | model = ini_ckpt() 229 | 230 | graph = model[0] 231 | sess = model[1] 232 | 233 | return graph, sess 234 | 235 | 236 | -------------------------------------------------------------------------------- /lib/core/model/net/mobilenet/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Implementation of Mobilenet V2. 16 | Architecture: https://arxiv.org/abs/1801.04381 17 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds, 18 | 3.4 M parameters. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import copy 26 | import functools 27 | 28 | import tensorflow as tf 29 | 30 | from lib.core.model.net.mobilenet import conv_blocks as ops 31 | from lib.core.model.net.mobilenet import mobilenet as lib 32 | 33 | slim = tf.contrib.slim 34 | op = lib.op 35 | 36 | expand_input = ops.expand_input_by_factor 37 | 38 | # pyformat: disable 39 | # Architecture: https://arxiv.org/abs/1801.04381 40 | V2_DEF = dict( 41 | defaults={ 42 | # Note: these parameters of batch norm affect the architecture 43 | # that's why they are here and not in training_scope. 44 | (slim.batch_norm,): {'center': True, 'scale': True}, 45 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 46 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 47 | }, 48 | (ops.expanded_conv,): { 49 | 'expansion_size': expand_input(6), 50 | 'split_expansion': 1, 51 | 'normalizer_fn': slim.batch_norm, 52 | 'residual': True 53 | }, 54 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 55 | }, 56 | spec=[ 57 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), 58 | op(ops.expanded_conv, 59 | expansion_size=expand_input(1, divisible_by=1), 60 | num_outputs=16), 61 | op(ops.expanded_conv, stride=2, num_outputs=24), 62 | op(ops.expanded_conv, stride=1, num_outputs=24), 63 | op(ops.expanded_conv, stride=2, num_outputs=32), 64 | op(ops.expanded_conv, stride=1, num_outputs=32), 65 | op(ops.expanded_conv, stride=1, num_outputs=32), 66 | op(ops.expanded_conv, stride=2, num_outputs=64), 67 | op(ops.expanded_conv, stride=1, num_outputs=64), 68 | op(ops.expanded_conv, stride=1, num_outputs=64), 69 | op(ops.expanded_conv, stride=1, num_outputs=64), 70 | op(ops.expanded_conv, stride=1, num_outputs=96), 71 | op(ops.expanded_conv, stride=1, num_outputs=96), 72 | op(ops.expanded_conv, stride=1, num_outputs=96), 73 | op(ops.expanded_conv, stride=2, num_outputs=160), 74 | op(ops.expanded_conv, stride=1, num_outputs=160), 75 | op(ops.expanded_conv, stride=1, num_outputs=160), 76 | op(ops.expanded_conv, stride=1, num_outputs=320), 77 | #op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280) 78 | ], 79 | ) 80 | # pyformat: enable 81 | 82 | 83 | @slim.add_arg_scope 84 | def mobilenet(input_tensor, 85 | num_classes=1001, 86 | depth_multiplier=1.0, 87 | scope='MobilenetV2', 88 | conv_defs=None, 89 | finegrain_classification_mode=False, 90 | min_depth=None, 91 | divisible_by=None, 92 | activation_fn=None, 93 | **kwargs): 94 | """Creates mobilenet V2 network. 95 | Inference mode is created by default. To create training use training_scope 96 | below. 97 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 98 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 99 | Args: 100 | input_tensor: The input tensor 101 | num_classes: number of classes 102 | depth_multiplier: The multiplier applied to scale number of 103 | channels in each layer. 104 | scope: Scope of the operator 105 | conv_defs: Allows to override default conv def. 106 | finegrain_classification_mode: When set to True, the model 107 | will keep the last layer large even for small multipliers. Following 108 | https://arxiv.org/abs/1801.04381 109 | suggests that it improves performance for ImageNet-type of problems. 110 | *Note* ignored if final_endpoint makes the builder exit earlier. 111 | min_depth: If provided, will ensure that all layers will have that 112 | many channels after application of depth multiplier. 113 | divisible_by: If provided will ensure that all layers # channels 114 | will be divisible by this number. 115 | activation_fn: Activation function to use, defaults to tf.nn.relu6 if not 116 | specified. 117 | **kwargs: passed directly to mobilenet.mobilenet: 118 | prediction_fn- what prediction function to use. 119 | reuse-: whether to reuse variables (if reuse set to true, scope 120 | must be given). 121 | Returns: 122 | logits/endpoints pair 123 | Raises: 124 | ValueError: On invalid arguments 125 | """ 126 | if conv_defs is None: 127 | conv_defs = V2_DEF 128 | if 'multiplier' in kwargs: 129 | raise ValueError('mobilenetv2 doesn\'t support generic ' 130 | 'multiplier parameter use "depth_multiplier" instead.') 131 | if finegrain_classification_mode: 132 | conv_defs = copy.deepcopy(conv_defs) 133 | if depth_multiplier < 1: 134 | conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier 135 | if activation_fn: 136 | conv_defs = copy.deepcopy(conv_defs) 137 | defaults = conv_defs['defaults'] 138 | conv_defaults = ( 139 | defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)]) 140 | conv_defaults['activation_fn'] = activation_fn 141 | 142 | depth_args = {} 143 | # NB: do not set depth_args unless they are provided to avoid overriding 144 | # whatever default depth_multiplier might have thanks to arg_scope. 145 | if min_depth is not None: 146 | depth_args['min_depth'] = min_depth 147 | if divisible_by is not None: 148 | depth_args['divisible_by'] = divisible_by 149 | 150 | with slim.arg_scope((lib.depth_multiplier,), **depth_args): 151 | return lib.mobilenet( 152 | input_tensor, 153 | num_classes=num_classes, 154 | conv_defs=conv_defs, 155 | scope=scope, 156 | multiplier=depth_multiplier, 157 | **kwargs) 158 | 159 | mobilenet.default_image_size = 224 160 | 161 | 162 | def wrapped_partial(func, *args, **kwargs): 163 | partial_func = functools.partial(func, *args, **kwargs) 164 | functools.update_wrapper(partial_func, func) 165 | return partial_func 166 | 167 | # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that 168 | # 'finegrain_classification_mode' is set to True, which means the embedding 169 | # layer will not be shrinked when given a depth-multiplier < 1.0. 170 | mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4) 171 | mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50, 172 | finegrain_classification_mode=True) 173 | mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35, 174 | finegrain_classification_mode=True) 175 | 176 | mobilenet_v2_025 = wrapped_partial(mobilenet, depth_multiplier=0.25, 177 | finegrain_classification_mode=True) 178 | 179 | @slim.add_arg_scope 180 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs): 181 | """Creates base of the mobilenet (no pooling and no logits) .""" 182 | return mobilenet(input_tensor, 183 | depth_multiplier=depth_multiplier, 184 | base_only=True, **kwargs) 185 | 186 | 187 | def training_scope(**kwargs): 188 | """Defines MobilenetV2 training scope. 189 | Usage: 190 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 191 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 192 | with slim. 193 | Args: 194 | **kwargs: Passed to mobilenet.training_scope. The following parameters 195 | are supported: 196 | weight_decay- The weight decay to use for regularizing the model. 197 | stddev- Standard deviation for initialization, if negative uses xavier. 198 | dropout_keep_prob- dropout keep probability 199 | bn_decay- decay for the batch norm moving averages. 200 | Returns: 201 | An `arg_scope` to use for the mobilenet v2 model. 202 | """ 203 | return lib.training_scope(**kwargs) 204 | 205 | 206 | __all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF'] -------------------------------------------------------------------------------- /lib/dataset/augmentor/data_aug/bbox_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def draw_rect(im, cords, color = None): 6 | """Draw the rectangle on the image 7 | 8 | Parameters 9 | ---------- 10 | 11 | im : numpy.ndarray 12 | numpy image 13 | 14 | cords: numpy.ndarray 15 | Numpy array containing bounding boxes of shape `N X 4` where N is the 16 | number of bounding boxes and the bounding boxes are represented in the 17 | format `x1 y1 x2 y2` 18 | 19 | Returns 20 | ------- 21 | 22 | numpy.ndarray 23 | numpy image with bounding boxes drawn on it 24 | 25 | """ 26 | 27 | im = im.copy() 28 | 29 | cords = cords[:,:4] 30 | cords = cords.reshape(-1,4) 31 | if not color: 32 | color = [255,255,255] 33 | for cord in cords: 34 | 35 | pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3]) 36 | 37 | pt1 = int(pt1[0]), int(pt1[1]) 38 | pt2 = int(pt2[0]), int(pt2[1]) 39 | 40 | im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200)) 41 | return im 42 | 43 | def bbox_area(bbox): 44 | return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1]) 45 | 46 | def clip_box(bbox, clip_box, alpha): 47 | """Clip the bounding boxes to the borders of an image 48 | 49 | Parameters 50 | ---------- 51 | 52 | bbox: numpy.ndarray 53 | Numpy array containing bounding boxes of shape `N X 4` where N is the 54 | number of bounding boxes and the bounding boxes are represented in the 55 | format `x1 y1 x2 y2` 56 | 57 | clip_box: numpy.ndarray 58 | An array of shape (4,) specifying the diagonal co-ordinates of the image 59 | The coordinates are represented in the format `x1 y1 x2 y2` 60 | 61 | alpha: float 62 | If the fraction of a bounding box left in the image after being clipped is 63 | less than `alpha` the bounding box is dropped. 64 | 65 | Returns 66 | ------- 67 | 68 | numpy.ndarray 69 | Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the 70 | number of bounding boxes left are being clipped and the bounding boxes are represented in the 71 | format `x1 y1 x2 y2` 72 | 73 | """ 74 | ar_ = (bbox_area(bbox)) 75 | x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1) 76 | y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1) 77 | x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1) 78 | y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1) 79 | 80 | bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:])) 81 | 82 | delta_area = ((ar_ - bbox_area(bbox))/ar_) 83 | 84 | mask = (delta_area < (1 - alpha)).astype(int) 85 | 86 | bbox = bbox[mask == 1,:] 87 | 88 | 89 | return bbox 90 | 91 | 92 | def rotate_im(image, angle): 93 | """Rotate the image. 94 | 95 | Rotate the image such that the rotated image is enclosed inside the tightest 96 | rectangle. The area not occupied by the pixels of the original image is colored 97 | black. 98 | 99 | Parameters 100 | ---------- 101 | 102 | image : numpy.ndarray 103 | numpy image 104 | 105 | angle : float 106 | angle by which the image is to be rotated 107 | 108 | Returns 109 | ------- 110 | 111 | numpy.ndarray 112 | Rotated Image 113 | 114 | """ 115 | # grab the dimensions of the image and then determine the 116 | # centre 117 | (h, w) = image.shape[:2] 118 | (cX, cY) = (w // 2, h // 2) 119 | 120 | # grab the rotation matrix (applying the negative of the 121 | # angle to rotate clockwise), then grab the sine and cosine 122 | # (i.e., the rotation components of the matrix) 123 | M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0) 124 | cos = np.abs(M[0, 0]) 125 | sin = np.abs(M[0, 1]) 126 | 127 | # compute the new bounding dimensions of the image 128 | nW = int((h * sin) + (w * cos)) 129 | nH = int((h * cos) + (w * sin)) 130 | 131 | # adjust the rotation matrix to take into account translation 132 | M[0, 2] += (nW / 2) - cX 133 | M[1, 2] += (nH / 2) - cY 134 | 135 | # perform the actual rotation and return the image 136 | image = cv2.warpAffine(image, M, (nW, nH)) 137 | 138 | # image = cv2.resize(image, (w,h)) 139 | return image 140 | 141 | def get_corners(bboxes): 142 | 143 | """Get corners of bounding boxes 144 | 145 | Parameters 146 | ---------- 147 | 148 | bboxes: numpy.ndarray 149 | Numpy array containing bounding boxes of shape `N X 4` where N is the 150 | number of bounding boxes and the bounding boxes are represented in the 151 | format `x1 y1 x2 y2` 152 | 153 | returns 154 | ------- 155 | 156 | numpy.ndarray 157 | Numpy array of shape `N x 8` containing N bounding boxes each described by their 158 | corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` 159 | 160 | """ 161 | width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1) 162 | height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1) 163 | 164 | x1 = bboxes[:,0].reshape(-1,1) 165 | y1 = bboxes[:,1].reshape(-1,1) 166 | 167 | x2 = x1 + width 168 | y2 = y1 169 | 170 | x3 = x1 171 | y3 = y1 + height 172 | 173 | x4 = bboxes[:,2].reshape(-1,1) 174 | y4 = bboxes[:,3].reshape(-1,1) 175 | 176 | corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4)) 177 | 178 | return corners 179 | 180 | def rotate_box(corners,angle, cx, cy, h, w): 181 | 182 | """Rotate the bounding box. 183 | 184 | 185 | Parameters 186 | ---------- 187 | 188 | corners : numpy.ndarray 189 | Numpy array of shape `N x 8` containing N bounding boxes each described by their 190 | corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` 191 | 192 | angle : float 193 | angle by which the image is to be rotated 194 | 195 | cx : int 196 | x coordinate of the center of image (about which the box will be rotated) 197 | 198 | cy : int 199 | y coordinate of the center of image (about which the box will be rotated) 200 | 201 | h : int 202 | height of the image 203 | 204 | w : int 205 | width of the image 206 | 207 | Returns 208 | ------- 209 | 210 | numpy.ndarray 211 | Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their 212 | corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` 213 | """ 214 | 215 | corners = corners.reshape(-1,2) 216 | corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0])))) 217 | 218 | M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) 219 | 220 | 221 | cos = np.abs(M[0, 0]) 222 | sin = np.abs(M[0, 1]) 223 | 224 | nW = int((h * sin) + (w * cos)) 225 | nH = int((h * cos) + (w * sin)) 226 | # adjust the rotation matrix to take into account translation 227 | M[0, 2] += (nW / 2) - cx 228 | M[1, 2] += (nH / 2) - cy 229 | # Prepare the vector to be transformed 230 | calculated = np.dot(M,corners.T).T 231 | 232 | calculated = calculated.reshape(-1,8) 233 | 234 | return calculated 235 | 236 | 237 | def get_enclosing_box(corners): 238 | """Get an enclosing box for ratated corners of a bounding box 239 | 240 | Parameters 241 | ---------- 242 | 243 | corners : numpy.ndarray 244 | Numpy array of shape `N x 8` containing N bounding boxes each described by their 245 | corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` 246 | 247 | Returns 248 | ------- 249 | 250 | numpy.ndarray 251 | Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the 252 | number of bounding boxes and the bounding boxes are represented in the 253 | format `x1 y1 x2 y2` 254 | 255 | """ 256 | x_ = corners[:,[0,2,4,6]] 257 | y_ = corners[:,[1,3,5,7]] 258 | 259 | xmin = np.min(x_,1).reshape(-1,1) 260 | ymin = np.min(y_,1).reshape(-1,1) 261 | xmax = np.max(x_,1).reshape(-1,1) 262 | ymax = np.max(y_,1).reshape(-1,1) 263 | 264 | final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:])) 265 | 266 | return final 267 | 268 | 269 | def letterbox_image(img, inp_dim): 270 | '''resize image with unchanged aspect ratio using padding 271 | 272 | Parameters 273 | ---------- 274 | 275 | img : numpy.ndarray 276 | Image 277 | 278 | inp_dim: tuple(int) 279 | shape of the reszied image 280 | 281 | Returns 282 | ------- 283 | 284 | numpy.ndarray: 285 | Resized image 286 | 287 | ''' 288 | 289 | inp_dim = (inp_dim, inp_dim) 290 | img_w, img_h = img.shape[1], img.shape[0] 291 | w, h = inp_dim 292 | new_w = int(img_w * min(w/img_w, h/img_h)) 293 | new_h = int(img_h * min(w/img_w, h/img_h)) 294 | resized_image = cv2.resize(img, (new_w,new_h)) 295 | 296 | canvas = np.full((inp_dim[1], inp_dim[0], 3), 0) 297 | 298 | canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image 299 | 300 | return canvas -------------------------------------------------------------------------------- /lib/core/model/loss/centernet_loss.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.contrib.slim as slim 6 | from tensorflow.python.ops import array_ops 7 | 8 | from train_config import config as cfg 9 | 10 | from lib.core.model.loss.iouloss import * 11 | 12 | def loss(predicts,targets): 13 | pred_hm, pred_wh=predicts 14 | hm_target, wh_target,weights_=targets 15 | 16 | with tf.name_scope('losses'): 17 | # whether anchor is matched 18 | # shape [batch_size, num_anchors] 19 | 20 | with tf.name_scope('classification_loss'): 21 | hm_loss = focal_loss( 22 | pred_hm, 23 | hm_target 24 | ) 25 | 26 | 27 | 28 | with tf.name_scope('iou_loss'): 29 | H, W = tf.shape(pred_hm)[1],tf.shape(pred_hm)[2] 30 | 31 | weights_=tf.transpose(weights_,perm=[0,3,1,2]) 32 | mask = tf.reshape(weights_,shape=(-1, H, W)) 33 | avg_factor = tf.reduce_sum(mask) + 1e-4 34 | 35 | base_step = cfg.MODEL.global_stride 36 | shifts_x = tf.range(0, (W - 1) * base_step + 1, base_step, 37 | dtype=tf.int32) 38 | shifts_x=tf.cast(shifts_x,dtype=tf.float32) 39 | shifts_y = tf.range(0, (H - 1) * base_step + 1, base_step, 40 | dtype=tf.int32) 41 | shifts_y = tf.cast(shifts_y, dtype=tf.float32) 42 | 43 | x_range, y_range = tf.meshgrid(shifts_x, shifts_y) 44 | 45 | base_loc = tf.stack((x_range, y_range), axis=2) # (2, h, w) 46 | 47 | base_loc = tf.expand_dims(base_loc, axis=0) 48 | 49 | pred_boxes = tf.concat((base_loc[:,:,:,0:1] - pred_wh[:,:,:, 0:1], 50 | base_loc[:,:,:,1:2] - pred_wh[:,:,:, 1:2], 51 | base_loc[:,:,:,0:1] + pred_wh[:,:,:, 2:3], 52 | base_loc[:,:,:,1:2] + pred_wh[:,:,:, 3:4]), axis=3) 53 | 54 | # (batch, h, w, 4) 55 | boxes = wh_target#.permute(0, 2, 3, 1) 56 | 57 | wh_loss = ciou_loss(pred_boxes, boxes, mask, avg_factor=avg_factor) 58 | 59 | return hm_loss, wh_loss*5 60 | 61 | def _reg_l1_loss(pred, 62 | target, 63 | weight, 64 | avg_factor=None): 65 | pos_mask = weight > 0 66 | weight = tf.cast(weight[pos_mask], tf.float32) 67 | if avg_factor is None: 68 | avg_factor = tf.reduce_sum(pos_mask) + 1e-6 69 | bboxes1 = tf.reshape(pred[pos_mask], (-1, 4)) 70 | bboxes2 = tf.reshape(target[pos_mask], (-1, 4)) 71 | 72 | 73 | loss=tf.reduce_mean(tf.abs(bboxes1-bboxes2),axis=1) 74 | return tf.reduce_sum(loss * weight) / avg_factor 75 | 76 | 77 | def classification_loss(predictions, targets): 78 | """ 79 | Arguments: 80 | predictions: a float tensor with shape [batch_size, num_anchors, num_classes + 1], 81 | representing the predicted logits for each class. 82 | targets: an int tensor with shape [batch_size, num_anchors]. 83 | Returns: 84 | a float tensor with shape [batch_size, num_anchors]. 85 | """ 86 | 87 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 88 | labels=targets, logits=predictions 89 | ) 90 | return cross_entropy 91 | 92 | 93 | def localization_loss(predictions, targets, indices, mask,sigma=9): 94 | """A usual L1 smooth loss. 95 | 96 | Arguments: 97 | predictions: a float tensor with shape [batch_size, num_anchors, 4], 98 | representing the (encoded) predicted locations of objects. 99 | targets: a float tensor with shape [batch_size, num_anchors, 4], 100 | representing the regression targets. 101 | weights: a float tensor with shape [batch_size, num_anchors]. 102 | Returns: 103 | a float tensor with shape [batch_size, num_anchors]. 104 | """ 105 | 106 | indices = tf.where(tf.greater(targets, 0.)) 107 | predictions = tf.gather_nd(predictions, indices) 108 | targets = tf.gather_nd(targets, indices) 109 | 110 | 111 | abs_diff = tf.abs(predictions - targets) 112 | abs_diff_lt_1 = tf.less(abs_diff, 1.0/sigma) 113 | 114 | # compute the normalizer: the number of positive anchors 115 | normalizer = tf.maximum(1, tf.shape(indices)[0]) 116 | normalizer = tf.cast(normalizer, dtype=tf.float32) 117 | 118 | return tf.reduce_sum(tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5/sigma))/normalizer 119 | 120 | def reg_l1_loss(y_pred, y_true, indices, mask): 121 | b = tf.shape(y_pred)[0] 122 | k = tf.shape(indices)[1] 123 | c = tf.shape(y_pred)[-1] 124 | y_pred = tf.reshape(y_pred, (b, -1, c)) 125 | indices = tf.cast(indices, tf.int32) 126 | y_pred = tf.gather(y_pred, indices, batch_dims=1) 127 | mask = tf.tile(tf.expand_dims(mask, axis=-1), (1, 1, 2)) 128 | total_loss = tf.reduce_sum(tf.abs(y_true * mask - y_pred * mask)) 129 | reg_loss = total_loss / (tf.reduce_sum(mask) + 1e-4) 130 | return reg_loss 131 | 132 | 133 | 134 | # def focal_loss(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2): 135 | # r"""Compute focal loss for predictions. 136 | # Multi-labels Focal loss formula: 137 | # FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p) 138 | # ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor. 139 | # Args: 140 | # prediction_tensor: A float tensor of shape [batch_size, num_anchors, 141 | # num_classes] representing the predicted logits for each class 142 | # target_tensor: A float tensor of shape [batch_size, num_anchors, 143 | # num_classes] representing one-hot encoded classification targets 144 | # weights: A float tensor of shape [batch_size, num_anchors] 145 | # alpha: A scalar tensor for focal loss alpha hyper-parameter 146 | # gamma: A scalar tensor for focal loss gamma hyper-parameter 147 | # Returns: 148 | # loss: A (scalar) tensor representing the value of the loss function 149 | # """ 150 | # 151 | # 152 | # sigmoid_p = tf.nn.sigmoid(prediction_tensor) 153 | # zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype) 154 | # 155 | # # For poitive prediction, only need consider front part loss, back part is 0; 156 | # # target_tensor > zeros <=> z=1, so poitive coefficient = z - p. 157 | # pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros) 158 | # 159 | # # For negative prediction, only need consider back part loss, front part is 0; 160 | # # target_tensor > zeros <=> z=1, so negative coefficient = 0. 161 | # neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p) 162 | # per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \ 163 | # - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0)) 164 | # 165 | # 166 | # # compute the normalizer: the number of positive anchors 167 | # # normalizer = tf.where(tf.greater(target_tensor, 0)) 168 | # # normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32) 169 | # # normalizer = tf.maximum(1., normalizer) 170 | # 171 | # 172 | # return tf.reduce_sum(per_entry_cross_ent) 173 | 174 | 175 | def focal_loss(pred, gt): 176 | ''' Modified focal loss. Exactly the same as CornerNet. 177 | Runs faster and costs a little bit more memory 178 | Arguments: 179 | pred (batch,h,w,c) 180 | gt_regr (batch,h,w,c) 181 | ''' 182 | pos_inds = tf.cast(tf.equal(gt, 1.0), dtype=tf.float32) 183 | neg_inds = 1.0 - pos_inds 184 | neg_weights = tf.pow(1.0 - gt, 4.0) 185 | 186 | pred = tf.clip_by_value(pred, 1e-6, 1.0 - 1e-6) 187 | pos_loss = tf.log(pred) * tf.pow(1.0 - pred, 2.0) * pos_inds 188 | neg_loss = tf.log(1.0 - pred) * tf.pow(pred, 2.0) * neg_weights * neg_inds 189 | 190 | num_pos = tf.reduce_sum(pos_inds) 191 | pos_loss = tf.reduce_sum(pos_loss) 192 | neg_loss = tf.reduce_sum(neg_loss) 193 | 194 | normalizer = tf.maximum(1., num_pos) 195 | loss = - (pos_loss + neg_loss) / normalizer 196 | 197 | return loss 198 | 199 | 200 | def ohem_loss(logits, targets, weights): 201 | 202 | 203 | indices = tf.where(tf.not_equal(weights, -1)) 204 | targets = tf.gather_nd(targets, indices) 205 | logits = tf.gather_nd(logits, indices) 206 | 207 | 208 | logits=tf.reshape(logits,shape=[-1,cfg.DATA.num_class]) 209 | targets = tf.reshape(targets, shape=[-1]) 210 | 211 | weights=tf.reshape(weights,shape=[-1]) 212 | 213 | 214 | dtype = logits.dtype 215 | 216 | pmask = weights 217 | fpmask = tf.cast(pmask, dtype) 218 | n_positives = tf.reduce_sum(fpmask) 219 | 220 | 221 | no_classes = tf.cast(pmask, tf.int32) 222 | 223 | predictions = slim.softmax(logits) 224 | 225 | 226 | nmask = tf.logical_not(tf.cast(pmask,tf.bool)) 227 | 228 | fnmask = tf.cast(nmask, dtype) 229 | 230 | nvalues = tf.where(nmask, 231 | predictions[:, 0], 232 | 1. - fnmask) 233 | nvalues_flat = tf.reshape(nvalues, [-1]) 234 | # Number of negative entries to select. 235 | max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) 236 | n_neg = tf.cast(cfg.MODEL.max_negatives_per_positive * n_positives, tf.int32) + cfg.TRAIN.batch_size 237 | 238 | n_neg = tf.minimum(n_neg, max_neg_entries) 239 | 240 | val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) 241 | max_hard_pred = -val[-1] 242 | # Final negative mask. 243 | nmask = tf.logical_and(nmask, nvalues < max_hard_pred) 244 | fnmask = tf.cast(nmask, dtype) 245 | 246 | # Add cross-entropy loss. 247 | with tf.name_scope('cross_entropy_pos'): 248 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, 249 | labels=targets) 250 | 251 | neg_loss = tf.reduce_sum(loss * fpmask) 252 | 253 | with tf.name_scope('cross_entropy_neg'): 254 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, 255 | labels=no_classes) 256 | pos_loss = tf.reduce_sum(loss * fnmask) 257 | 258 | # compute the normalizer: the number of positive anchors 259 | normalizer = tf.where(tf.equal(weights, 1)) 260 | normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32) 261 | normalizer = tf.maximum(1., normalizer) 262 | 263 | return (neg_loss+pos_loss)/normalizer 264 | 265 | 266 | 267 | 268 | 269 | 270 | -------------------------------------------------------------------------------- /lib/core/anchor/anchor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | 5 | import sys 6 | sys.path.append('.') 7 | 8 | 9 | from lib.core.anchor.box_utils import encode,np_iou 10 | 11 | from train_config import config as cfg 12 | 13 | 14 | 15 | class CellAnchor(): 16 | 17 | def __init__(self): 18 | pass 19 | 20 | @classmethod 21 | def generate_cell_anchor(self,base_size=16,ratios=[0.5,1.,2.],scales=2**np.arange(3,6),rect=cfg.ANCHOR.rect): 22 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 23 | anchors_in_ratios = self.make_anchor_in_ratios(base_anchor, ratios, rect) 24 | anchors_in_scales = self.make_anchor_in_sclaes(anchors_in_ratios, scales) 25 | return anchors_in_scales 26 | 27 | @classmethod 28 | def _to_whxy(self,anchors): 29 | w=anchors[2]-anchors[0]+1 30 | h=anchors[3]-anchors[1]+1 31 | 32 | x=anchors[0]+(w-1)/2 33 | y=anchors[1]+(h-1)/2 34 | return w,h,x,y 35 | 36 | @classmethod 37 | def _to_xyxy(self,w,h,x,y): 38 | 39 | x0=x-(w-1)/2 40 | y0=y-(h-1)/2 41 | x1=x+(w-1)/2 42 | y1 = y + (h-1) / 2 43 | 44 | return np.stack((x0,y0,x1,y1),axis=-1) 45 | 46 | @classmethod 47 | def make_anchor_in_ratios(self,base_anchor,ratios,rect=False): 48 | 49 | anchors_in_ratios=[] 50 | w,h,x,y=self._to_whxy(base_anchor) 51 | area=w*h 52 | 53 | for ratio in ratios: 54 | 55 | ### choose the face anchor ratio h/w ==1.5 or 1 56 | if rect: 57 | w=h=np.round(np.sqrt(area/ratio)) 58 | if cfg.ANCHOR.rect_longer: 59 | h=np.round(1.5*w) 60 | else: 61 | w=np.round(np.sqrt(area/ratio)) 62 | h=np.round(ratio*w) 63 | 64 | anchors_in_ratios.append(self._to_xyxy(w,h,x,y)) 65 | 66 | 67 | return np.array(anchors_in_ratios) 68 | 69 | @classmethod 70 | def make_anchor_in_sclaes(self,anchors,scales): 71 | anchors_res=[] 72 | 73 | for anchor in anchors: 74 | w,h,x,y=self._to_whxy(anchor) 75 | w=w*scales 76 | h=h*scales 77 | anchors_sclase=self._to_xyxy(w,h,x,y) 78 | anchors_res.append(anchors_sclase) 79 | return np.array(anchors_res).reshape([-1,4]) 80 | 81 | class Anchor(): 82 | 83 | def __init__(self): 84 | 85 | self.strides=cfg.ANCHOR.ANCHOR_STRIDES 86 | self.sizes = cfg.ANCHOR.ANCHOR_SIZES 87 | 88 | self.ratios=cfg.ANCHOR.ANCHOR_RATIOS 89 | self.scales=cfg.ANCHOR.ANCHOR_SCALES 90 | 91 | self.max_size=cfg.DATA.max_size ##use to calculate the anchor 92 | 93 | self.anchors=self.produce_anchors() 94 | 95 | self.decode_anchors=self.get_decode_anchor() 96 | 97 | def produce_anchors(self): 98 | anchors_per_level = self.get_all_anchors_fpn() 99 | flatten_anchors_per_level = [k.reshape((-1, 4)) for k in anchors_per_level] 100 | all_anchors_flatten = np.concatenate(flatten_anchors_per_level, axis=0) 101 | return all_anchors_flatten 102 | 103 | def get_all_anchors(self,stride=None, sizes=None): 104 | """ 105 | Get all anchors in the largest possible image, shifted, floatbox 106 | Args: 107 | stride (int): the stride of anchors. 108 | sizes (tuple[int]): the sizes (sqrt area) of anchors 109 | 110 | Returns: 111 | anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox 112 | The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. 113 | 114 | """ 115 | 116 | # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 117 | # are centered on stride / 2, have (approximate) sqrt areas of the specified 118 | # sizes, and aspect ratios as given. 119 | cell_anchors = CellAnchor.generate_cell_anchor( 120 | stride, 121 | scales=np.array(sizes, dtype=np.float) / stride, 122 | ratios=np.array(self.ratios, dtype=np.float)) 123 | # anchors are intbox here. 124 | # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) 125 | 126 | field_size_y = int(np.ceil(self.max_size[0] / stride)) 127 | field_size_x = int(np.ceil(self.max_size[1] / stride)) 128 | 129 | shifts_x = np.arange(0, field_size_x) * stride 130 | shifts_y = np.arange(0, field_size_y) * stride 131 | shift_x, shift_y = np.meshgrid(shifts_x, shifts_y) 132 | shift_x = shift_x.flatten() 133 | shift_y = shift_y.flatten() 134 | shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() 135 | # Kx4, K = field_size * field_size 136 | K = shifts.shape[0] 137 | 138 | A = cell_anchors.shape[0] 139 | field_of_anchors = ( 140 | cell_anchors.reshape((1, A, 4)) + 141 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 142 | field_of_anchors = field_of_anchors.reshape((field_size_y, field_size_x, A, 4)) 143 | # FSxFSxAx4 144 | # Many rounding happens inside the anchor code anyway 145 | # assert np.all(field_of_anchors == field_of_anchors.astype('int32')) 146 | field_of_anchors = field_of_anchors.astype('float32') 147 | field_of_anchors[:, :, :, [2, 3]] += 1 148 | return field_of_anchors 149 | 150 | def get_all_anchors_fpn(self): 151 | """ 152 | Returns: 153 | [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array. 154 | """ 155 | strides =self.strides 156 | sizes = self.sizes 157 | 158 | assert len(strides) == len(sizes) 159 | foas = [] 160 | for stride, size in zip(strides, sizes): 161 | sizes_=size*np.array(self.scales) 162 | foa = self.get_all_anchors(stride=stride, sizes=sizes_) 163 | foas.append(foa) 164 | 165 | 166 | return foas 167 | 168 | def produce_target(self, boxes, labels): 169 | boxes = boxes.copy() 170 | 171 | all_anchors_flatten =self.anchors 172 | 173 | # inside_ind, inside_anchors = filter_boxes_inside_shape(all_anchors_flatten, image.shape[:2]) 174 | inside_anchors = all_anchors_flatten 175 | 176 | # obtain anchor labels and their corresponding gt boxes 177 | anchor_labels, anchor_gt_boxes = self.get_anchor_labels(inside_anchors, boxes, labels) 178 | 179 | # start = 0 180 | # multilevel_inputs = [] 181 | # for level_anchor in anchors_per_level: 182 | # assert level_anchor.shape[2] == len(cfg.ANCHOR.ANCHOR_RATIOS) 183 | # anchor_shape = level_anchor.shape[:3] # fHxfWxNUM_ANCHOR_RATIOS 184 | # num_anchor_this_level = np.prod(anchor_shape) 185 | # end = start + num_anchor_this_level 186 | # multilevel_inputs.append( 187 | # (all_labels[start: end].reshape(anchor_shape), 188 | # all_boxes[start: end, :].reshape(anchor_shape + (4,)) 189 | # )) 190 | # start = end 191 | # assert end == num_all_anchors, "{} != {}".format(end, num_all_anchors) 192 | # return multilevel_inputs 193 | return anchor_gt_boxes, anchor_labels 194 | 195 | def get_anchor_labels(self,anchors, gt_boxes, labels): 196 | # This function will modify labels and return the filtered inds 197 | 198 | NA, NB = len(anchors), len(gt_boxes) 199 | assert NB > 0 # empty images should have been filtered already 200 | # ########## 201 | 202 | anchor_state= np.zeros((NA,), dtype='int32')-1 203 | 204 | anchor_labels = np.zeros((NA,), dtype='int32') 205 | anchor_boxes = np.zeros((NA, 4), dtype='float32') 206 | 207 | box_ious = np_iou(anchors, gt_boxes) # NA x NB 208 | 209 | # for each anchor box choose the groundtruth box with largest iou, set iou<0.4 as backgroud, ignore 0.4-0.5 210 | max_iou = box_ious.max(axis=1) # NA 211 | 212 | positive_anchor_indices = np.where(max_iou > cfg.ANCHOR.POSITIVE_ANCHOR_THRESH)[0] 213 | negative_anchor_indices = np.where(max_iou < cfg.ANCHOR.NEGATIVE_ANCHOR_THRESH)[0] 214 | 215 | positive_iou = box_ious[positive_anchor_indices] 216 | matched_gt_box_indices = positive_iou.argmax(axis=1) 217 | 218 | anchor_labels[positive_anchor_indices] = labels[matched_gt_box_indices] 219 | anchor_state[positive_anchor_indices]=1 220 | anchor_boxes[positive_anchor_indices] = gt_boxes[matched_gt_box_indices] 221 | 222 | anchor_state[negative_anchor_indices] = 0 223 | 224 | fg_boxes = anchor_boxes[anchor_state==1] 225 | 226 | matched_anchors = anchors[anchor_state==1] 227 | 228 | ##select and normlised the box coordinate 229 | fg_boxes[:,0::2] = fg_boxes[:,0::2] / self.max_size[1] 230 | fg_boxes[:, 1::2] = fg_boxes[:, 1::2] / self.max_size[0] 231 | 232 | matched_anchors[:,0::2] = matched_anchors[:,0::2] / self.max_size[1] 233 | matched_anchors[:, 1::2] = matched_anchors[:, 1::2] / self.max_size[0] 234 | 235 | 236 | 237 | encode_fg_boxes = encode(fg_boxes, matched_anchors) 238 | anchor_boxes[anchor_state==1] = encode_fg_boxes 239 | 240 | anchor_labels=np.stack([anchor_labels,anchor_state]) 241 | 242 | # 243 | return anchor_labels, anchor_boxes 244 | 245 | 246 | 247 | def reset_anchors(self,max_size=(512,512)): 248 | ''' 249 | 250 | :param max_size: h,w 251 | :return: 252 | ''' 253 | self.max_size=max_size 254 | 255 | self.anchors = self.produce_anchors() 256 | 257 | self.decode_anchors = self.get_decode_anchor() 258 | 259 | 260 | def get_decode_anchor(self): 261 | ''' 262 | 263 | :return: the anchor decode [w,h,w,h] 264 | 265 | the right way to decode the bbox is res *[w,h,w,h]+ raw_anchor, which means anchors_bias +anchors 266 | ''' 267 | anchor_widths = self.anchors[:, 2] - self.anchors[:, 0] 268 | anchor_heights = self.anchors[:, 3] - self.anchors[:, 1] 269 | tmp_anchor_details = np.stack([anchor_widths, anchor_heights, anchor_widths, anchor_heights], axis=1) 270 | 271 | return tmp_anchor_details 272 | 273 | 274 | 275 | anchor_tools=Anchor() 276 | 277 | if __name__=='__main__': 278 | ##model_eval the anchor codes there 279 | import cv2 280 | 281 | cell_anchor = CellAnchor.generate_cell_anchor() 282 | print(cell_anchor) 283 | 284 | 285 | anchor_maker=Anchor() 286 | 287 | all_anchor= anchor_maker.anchors 288 | print(len(all_anchor)) 289 | image=np.ones(shape=[cfg.DATA.max_size[0],cfg.DATA.max_size[1],3])*255 290 | 291 | # for x in anchors: 292 | # print(x.shape) 293 | 294 | anchors=np.array(all_anchor) 295 | # cv2.namedWindow('anchors', 0) 296 | # for i in range(10000,anchors.shape[0]): 297 | # box=anchors[i] 298 | # print(box[2]-box[0]) 299 | # cv2.rectangle(image, (int(box[0]), int(box[1])), 300 | # (int(box[2]), int(box[3])), (255, 0, 0), 1) 301 | # 302 | # 303 | # cv2.imshow('anchors',image) 304 | # cv2.waitKey(0) 305 | 306 | anchor_labels, anchor_boxes=anchor_maker.produce_target(np.array([[34., 396., 58., 508.],[20,140,50,160]]),np.array([1,1])) 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /lib/dataset/centernet_data_sampler.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | import numpy as np 3 | import math 4 | import cv2 5 | 6 | from train_config import config as cfg 7 | 8 | def gaussian_radius(det_size, min_overlap=cfg.MODEL.min_overlap): 9 | height, width = det_size 10 | 11 | a1 = 1 12 | b1 = (height + width) 13 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 14 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 15 | r1 = (b1 + sq1) / 2 16 | 17 | a2 = 4 18 | b2 = 2 * (height + width) 19 | c2 = (1 - min_overlap) * width * height 20 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 21 | r2 = (b2 + sq2) / 2 22 | 23 | a3 = 4 * min_overlap 24 | b3 = -2 * min_overlap * (height + width) 25 | c3 = (min_overlap - 1) * width * height 26 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 27 | r3 = (b3 + sq3) / 2 28 | return min(r1, r2, r3) 29 | 30 | def draw_umich_gaussian(heatmap, center, radius, k=1): 31 | diameter = 2 * radius + 1 32 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 33 | 34 | x, y = int(center[0]), int(center[1]) 35 | 36 | height, width = heatmap.shape[0:2] 37 | 38 | left, right = min(x, radius), min(width - x, radius + 1) 39 | top, bottom = min(y, radius), min(height - y, radius + 1) 40 | 41 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 42 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 43 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 44 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 45 | return heatmap 46 | def draw_msra_gaussian(heatmap, center, sigma): 47 | #heatmap=np.transpose(heatmap,axes=[1,0]) 48 | tmp_size = sigma * 3 49 | mu_x = int(center[0] + 0.5) 50 | mu_y = int(center[1] + 0.5) 51 | w, h = heatmap.shape[0], heatmap.shape[1] 52 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 53 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 54 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 55 | return heatmap 56 | size = 2 * tmp_size + 1 57 | x = np.arange(0, size, 1, np.float32) 58 | y = x[:, np.newaxis] 59 | x0 = y0 = size // 2 60 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 61 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 62 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 63 | img_x = max(0, ul[0]), min(br[0], h) 64 | img_y = max(0, ul[1]), min(br[1], w) 65 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 66 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 67 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 68 | #heatmap = np.transpose(heatmap, axes=[1, 0]) 69 | return heatmap 70 | 71 | def gaussian2D(shape, sigma=1): 72 | m, n = [(ss - 1.) / 2. for ss in shape] 73 | y, x = np.ogrid[-m:m+1,-n:n+1] 74 | 75 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 76 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 77 | return h 78 | 79 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 80 | diameter = 2 * radius + 1 81 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 82 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 83 | dim = value.shape[0] 84 | reg = np.ones((dim, diameter * 2 + 1, diameter * 2 + 1), dtype=np.float32) * value 85 | if is_offset and dim == 2: 86 | delta = np.arange(diameter * 2 + 1) - radius 87 | reg[0] = reg[0] - delta.reshape(1, -1) 88 | reg[1] = reg[1] - delta.reshape(-1, 1) 89 | 90 | x, y = int(center[0]), int(center[1]) 91 | 92 | height, width = heatmap.shape[0:2] 93 | 94 | left, right = min(x, radius), min(width - x, radius + 1) 95 | top, bottom = min(y, radius), min(height - y, radius + 1) 96 | 97 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 98 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 99 | masked_gaussian = gaussian[radius - top:radius + bottom, 100 | radius - left:radius + right] 101 | masked_reg = reg[:, radius - top:radius + bottom, 102 | radius - left:radius + right] 103 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 104 | idx = (masked_gaussian >= masked_heatmap).reshape( 105 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 106 | masked_regmap = (1 - idx) * masked_regmap + idx * masked_reg 107 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 108 | return regmap 109 | 110 | 111 | def produce_heat_map(center, map_size, stride,objects_size, sigma,magic_divide=100): 112 | grid_y = map_size[0] // stride 113 | grid_x = map_size[1] // stride 114 | start = stride / 2.0 - 0.5 115 | y_range = [i for i in range(grid_y)] 116 | x_range = [i for i in range(grid_x)] 117 | xx, yy = np.meshgrid(x_range, y_range) 118 | xx = xx * stride + start 119 | yy = yy * stride + start 120 | 121 | radis=gaussian_radius(objects_size) 122 | ratio=((objects_size[0]*objects_size[1]+0.000005)/(map_size[1]*map_size[0]))*magic_divide 123 | 124 | #d2 = (yy - center[0]) ** 2 / 2. / sigma_y / sigma_y + (xx - center[1]) ** 2 / 2. / sigma_x / sigma_x 125 | d2 = (yy - center[0]) ** 2 + (xx - center[1]) ** 2 126 | exponent = d2 / 2.0 / sigma / sigma/ratio 127 | heatmap = np.exp(-exponent) 128 | 129 | am = np.amax(heatmap) 130 | if am > 0: 131 | heatmap /= am 132 | 133 | return heatmap 134 | 135 | # def produce_heatmaps_with_bbox_official(image,boxes,klass,num_klass=cfg.DATA.num_class): 136 | # h_out, w_out, _ = image.shape 137 | # ## stride equal to 4 138 | # h_out //= 4 139 | # w_out //= 4 140 | # boxes[:, :4] //= 4 141 | # 142 | # heatmap = np.zeros(shape=[h_out, w_out, num_klass],dtype=np.float32) 143 | # 144 | # regression_map = np.zeros(shape=[h_out, w_out, 2],dtype=np.float32) 145 | # 146 | # each_klass = set(klass) 147 | # for one_klass in each_klass: 148 | # 149 | # for single_box, single_klass in zip(boxes, klass): 150 | # if single_klass == one_klass: 151 | # ####box center (y,x) 152 | # center = [round((single_box[1] + single_box[3]) / 2), 153 | # round((single_box[0] + single_box[2]) / 2)] ###0-1 154 | # center = [int(x) for x in center] 155 | # 156 | # object_width = single_box[2] - single_box[0] 157 | # object_height = single_box[3] - single_box[1] 158 | # 159 | # 160 | # if center[0] >= h_out: 161 | # center[0] -= 1 162 | # if center[1] >= w_out: 163 | # center[1] -= 1 164 | # radius = gaussian_radius((math.ceil(object_height), math.ceil(object_width))) 165 | # radius = max(0, int(radius)) 166 | # draw_msra_gaussian(heatmap[:, :, int(one_klass)],center,radius) 167 | # 168 | # regression_map[center[0], center[1], 0] = object_width 169 | # regression_map[center[0], center[1], 1] = object_height 170 | # 171 | # 172 | # if cfg.DATA.use_int8_data: 173 | # h_am = np.amax(heatmap) 174 | # 175 | # heatmap = (heatmap/h_am*cfg.DATA.use_int8_enlarge).astype(np.uint8) 176 | # 177 | # regression_map=regression_map.astype(np.uint8) 178 | # return heatmap, regression_map 179 | # else: 180 | # 181 | # return heatmap.astype(np.float16), regression_map.astype(np.float16) 182 | 183 | def produce_heatmaps_with_bbox_official(image,boxes,klass,num_klass=cfg.DATA.num_class): 184 | return _official_centernet_datasampler(image,boxes,klass,num_klass) 185 | 186 | def _official_centernet_datasampler(image,boxes,klass,num_classes=cfg.DATA.num_class,max_objs=cfg.DATA.max_objs): 187 | 188 | 189 | num_obj=min(max_objs,len(boxes)) 190 | h_out, w_out, _ = image.shape 191 | ## stride equal to 4 192 | output_h=h_out / cfg.MODEL.global_stride 193 | output_w=w_out / cfg.MODEL.global_stride 194 | 195 | if len(boxes)>0: 196 | boxes[:, :4] /= cfg.MODEL.global_stride 197 | 198 | hm = np.zeros((num_classes, math.ceil(output_h), math.ceil(output_w)), dtype=np.float32) 199 | wh = np.zeros((max_objs, 2), dtype=np.float32) 200 | 201 | reg = np.zeros((max_objs, 2), dtype=np.float32) 202 | ind = np.zeros((max_objs), dtype=np.int64) 203 | reg_mask = np.zeros((max_objs), dtype=np.uint8) 204 | 205 | for k in range(num_obj): 206 | 207 | bbox = boxes[k] 208 | cls_id = klass[k] 209 | 210 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 211 | if h > 0 and w > 0: 212 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 213 | 214 | radius = max(0, int(radius)) 215 | if radius == 0: 216 | continue 217 | 218 | ct = np.array( 219 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 220 | ct_int = ct.astype(np.int32) 221 | draw_umich_gaussian(hm[cls_id], ct_int, radius) 222 | wh[k] = 1. * w, 1. * h 223 | ind[k] = ct_int[1] * output_w + ct_int[0] 224 | reg[k] = ct - ct_int 225 | reg_mask[k] = 1 226 | 227 | heatmap=np.transpose(hm,axes=[1,2,0]) 228 | 229 | if cfg.DATA.use_int8_data: 230 | 231 | heatmap = (heatmap*cfg.DATA.use_int8_enlarge).astype(np.uint8) 232 | 233 | return heatmap, wh,reg,ind,reg_mask 234 | else: 235 | return heatmap, wh,reg,ind,reg_mask 236 | 237 | 238 | 239 | def get_3rd_point(a, b): 240 | direct = a - b 241 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 242 | 243 | def get_dir(src_point, rot_rad): 244 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 245 | 246 | src_result = [0, 0] 247 | src_result[0] = src_point[0] * cs - src_point[1] * sn 248 | src_result[1] = src_point[0] * sn + src_point[1] * cs 249 | 250 | return src_result 251 | def get_affine_transform(center, 252 | scale, 253 | rot, 254 | output_size, 255 | shift=np.array([0, 0], dtype=np.float32), 256 | inv=0): 257 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 258 | scale = np.array([scale, scale], dtype=np.float32) 259 | 260 | scale_tmp = scale 261 | src_w = scale_tmp[0] 262 | dst_w = output_size[0] 263 | dst_h = output_size[1] 264 | 265 | rot_rad = np.pi * rot / 180 266 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 267 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 268 | 269 | src = np.zeros((3, 2), dtype=np.float32) 270 | dst = np.zeros((3, 2), dtype=np.float32) 271 | src[0, :] = center + scale_tmp * shift 272 | src[1, :] = center + src_dir + scale_tmp * shift 273 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 274 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 275 | 276 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 277 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 278 | 279 | if inv: 280 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 281 | else: 282 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 283 | 284 | return trans 285 | 286 | 287 | 288 | def affine_transform(pt, t): 289 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 290 | new_pt = np.dot(t, new_pt) 291 | return new_pt[:2] -------------------------------------------------------------------------------- /lib/dataset/ttf_net_data_sampler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('.') 4 | 5 | import numpy as np 6 | 7 | import math 8 | import cv2 9 | from train_config import config as cfg 10 | 11 | def safe_box(bboxes,klasses): 12 | safe_box=[] 13 | safe_klass=[] 14 | for i in range(bboxes.shape[0]): 15 | cur_box=bboxes[i] 16 | cur_klass=klasses[i] 17 | x_min, y_min, x_max, y_max = cur_box[0], cur_box[1], cur_box[ 2], cur_box[ 3] 18 | 19 | if x_min 0 and min(masked_heatmap.shape) > 0: # TODO debug 125 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 126 | return heatmap 127 | def ttfnet_centernet_datasampler(self,image, gt_boxes, gt_labels, num_classes=cfg.DATA.num_class, max_objs=cfg.DATA.max_objs): 128 | 129 | """ 130 | 131 | Args: 132 | gt_boxes: tensor, tensor <=> img, (num_gt, 4). 133 | gt_labels: tensor, tensor <=> img, (num_gt,). 134 | feat_shape: tuple. 135 | 136 | Returns: 137 | heatmap: tensor, tensor <=> img, (80, h, w). 138 | box_target: tensor, tensor <=> img, (4, h, w) or (80 * 4, h, w). 139 | reg_weight: tensor, same as box_target 140 | """ 141 | gt_boxes,gt_labels=safe_box(gt_boxes, gt_labels) 142 | 143 | 144 | img_h,img_w,_c=image.shape 145 | 146 | output_h, output_w = img_h//self.down_ratio,img_w//self.down_ratio 147 | 148 | 149 | heatmap_channel = num_classes 150 | 151 | heatmap = np.zeros((heatmap_channel, output_h, output_w),dtype=np.float32) 152 | fake_heatmap =np.zeros((output_h, output_w),dtype=np.float32) 153 | box_target = np.ones((self.wh_planes, output_h, output_w),dtype=np.float32) * -1 154 | reg_weight = np.zeros((self.wh_planes // 4, output_h, output_w),dtype=np.float32) 155 | 156 | 157 | if gt_boxes.shape[0]>0: 158 | 159 | if self.wh_area_process == 'log': 160 | boxes_areas_log = np.log(bbox_areas(gt_boxes)) 161 | elif self.wh_area_process == 'sqrt': 162 | boxes_areas_log = np.sqrt(bbox_areas(gt_boxes)) 163 | else: 164 | boxes_areas_log = bbox_areas(gt_boxes) 165 | 166 | boxes_area_topk_log, boxes_ind = torch_style_topK(boxes_areas_log, boxes_areas_log.shape[0]) 167 | 168 | if self.wh_area_process == 'norm': 169 | boxes_area_topk_log[:] = 1. 170 | 171 | gt_boxes = gt_boxes[boxes_ind] 172 | gt_labels = gt_labels[boxes_ind] 173 | 174 | feat_gt_boxes = gt_boxes / self.down_ratio 175 | feat_gt_boxes[:, [0, 2]] = np.clip(feat_gt_boxes[:, [0, 2]], a_min=0, 176 | a_max=output_w - 1) 177 | feat_gt_boxes[:, [1, 3]] = np.clip(feat_gt_boxes[:, [1, 3]], a_min=0, 178 | a_max=output_h - 1) 179 | feat_hs, feat_ws = (feat_gt_boxes[:, 3] - feat_gt_boxes[:, 1], 180 | feat_gt_boxes[:, 2] - feat_gt_boxes[:, 0]) 181 | 182 | # we calc the center and ignore area based on the gt-boxes of the origin scale 183 | # no peak will fall between pixels 184 | ct_ints = (np.stack([(gt_boxes[:, 0] + gt_boxes[:, 2]) / 2, 185 | (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2], 186 | axis=1) / self.down_ratio).astype(np.int) 187 | 188 | 189 | h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype(np.int) 190 | w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype(np.int) 191 | 192 | if self.wh_gaussian and self.alpha != self.beta: 193 | h_radiuses_beta = (feat_hs / 2. * self.beta).astype(np.int) 194 | w_radiuses_beta = (feat_ws / 2. * self.beta).astype(np.int) 195 | 196 | if not self.wh_gaussian: 197 | # calculate positive (center) regions 198 | r1 = (1 - self.beta) / 2 199 | ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s = calc_region(gt_boxes.transpose(0, 1), r1) 200 | ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s = [np.round(x.float() / self.down_ratio).int() 201 | for x in [ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s]] 202 | ctr_x1s, ctr_x2s = [np.clamp(x, max=output_w - 1) for x in [ctr_x1s, ctr_x2s]] 203 | ctr_y1s, ctr_y2s = [np.clamp(y, max=output_h - 1) for y in [ctr_y1s, ctr_y2s]] 204 | else: 205 | boxes_ind=np.array([]) 206 | # larger boxes have lower priority than small boxes. 207 | for k in range(boxes_ind.shape[0]): 208 | cls_id = gt_labels[k] 209 | 210 | fake_heatmap = fake_heatmap*0 211 | 212 | self.draw_truncate_gaussian(fake_heatmap, ct_ints[k], 213 | h_radiuses_alpha[k], w_radiuses_alpha[k]) 214 | 215 | heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap) 216 | 217 | 218 | if self.wh_gaussian: 219 | if self.alpha != self.beta: 220 | fake_heatmap = fake_heatmap*0 221 | self.draw_truncate_gaussian(fake_heatmap, 222 | ct_ints[k], 223 | h_radiuses_beta[k], 224 | w_radiuses_beta[k]) 225 | box_target_inds = fake_heatmap > 0 226 | else: 227 | ctr_x1, ctr_y1, ctr_x2, ctr_y2 = ctr_x1s[k], ctr_y1s[k], ctr_x2s[k], ctr_y2s[k] 228 | box_target_inds = np.zeros_like(fake_heatmap, dtype=np.uint8) 229 | box_target_inds[ctr_y1:ctr_y2 + 1, ctr_x1:ctr_x2 + 1] = 1 230 | 231 | if self.wh_agnostic: 232 | 233 | box_target[:, box_target_inds] =np.expand_dims(gt_boxes[k],-1) 234 | 235 | cls_id = 0 236 | else: 237 | box_target[(cls_id * 4):((cls_id + 1) * 4), box_target_inds] = np.expand_dims(gt_boxes[k],-1) 238 | 239 | if self.wh_gaussian: 240 | local_heatmap = fake_heatmap[box_target_inds] 241 | 242 | 243 | 244 | ct_div = local_heatmap.sum() 245 | local_heatmap *= boxes_area_topk_log[k] 246 | reg_weight[cls_id, box_target_inds] = local_heatmap / ct_div 247 | else: 248 | reg_weight[cls_id, box_target_inds] = \ 249 | boxes_area_topk_log[k] / box_target_inds.sum() 250 | 251 | 252 | heatmap = np.transpose(heatmap, axes=[1, 2, 0]) 253 | box_target= np.transpose(box_target, axes=[1, 2, 0]) 254 | reg_weight = np.transpose(reg_weight, axes=[1, 2, 0]) 255 | 256 | 257 | 258 | 259 | 260 | if cfg.DATA.use_int8_data: 261 | 262 | heatmap = (heatmap * cfg.DATA.use_int8_enlarge).astype(np.uint8) 263 | return heatmap, box_target, reg_weight 264 | else: 265 | return heatmap, box_target, reg_weight 266 | 267 | 268 | 269 | 270 | 271 | if __name__=='__main__': 272 | 273 | 274 | from train_config import config as cfg 275 | 276 | 277 | data_sampler=CenternetDatasampler() 278 | 279 | for i in range(1000): 280 | image = cv2.imread('./lib/dataset/augmentor/test.jpg') 281 | boxes = np.array([[165, 60, 233, 138],[5, 60, 133, 138]], dtype=np.float) 282 | 283 | cls=np.array([0,0]) 284 | 285 | heatmap, box_target, reg_weight=data_sampler.ttfnet_centernet_datasampler(image,boxes,cls) 286 | 287 | hm=heatmap[:,:,0] 288 | wh = box_target[:, :, 1]+1 289 | 290 | weight=reg_weight[:, :, 0] 291 | 292 | print(np.max(wh)) 293 | print(np.max(weight)) 294 | cv2.namedWindow('image', 0) 295 | cv2.imshow('image', image) 296 | 297 | cv2.namedWindow('hm',0) 298 | cv2.imshow('hm',hm) 299 | 300 | cv2.namedWindow('weight', 0) 301 | cv2.imshow('weight', weight) 302 | 303 | cv2.namedWindow('wh', 0) 304 | cv2.imshow('wh', wh) 305 | cv2.waitKey(0) --------------------------------------------------------------------------------