├── lib
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── head
    │   │   │   ├── __init__.py
    │   │   │   └── centernet_head.py
    │   │   ├── loss
    │   │   │   ├── __init__.py
    │   │   │   ├── iouloss.py
    │   │   │   └── centernet_loss.py
    │   │   ├── sqeeze_excitation
    │   │   │   ├── __init__.py
    │   │   │   └── se.py
    │   │   ├── fpn
    │   │   │   ├── __init__.py
    │   │   │   ├── plain_fpn.py
    │   │   │   └── seperateconv_fpn.py
    │   │   ├── net
    │   │   │   ├── __init__.py
    │   │   │   ├── resnet
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── backbone.py
    │   │   │   ├── shufflenet
    │   │   │   │   └── backbone.py
    │   │   │   ├── arg_scope
    │   │   │   │   └── resnet_args_cope.py
    │   │   │   ├── mobilenet
    │   │   │   │   ├── backbone.py
    │   │   │   │   └── mobilenet_v2.py
    │   │   │   └── mobilenetv3
    │   │   │   │   └── backbone.py
    │   │   └── centernet.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── nms.py
    │   │   ├── tf_anchors.py
    │   │   ├── box_utils.py
    │   │   └── anchor.py
    │   ├── .DS_Store
    │   └── api
    │   │   ├── face_detector_bk.py
    │   │   └── face_detector.py
    ├── dataset
    │   ├── augmentor
    │   │   ├── data_aug
    │   │   │   ├── __init__.py
    │   │   │   └── bbox_util.py
    │   │   ├── README.md
    │   │   ├── test.jpg
    │   │   ├── test2.jpg
    │   │   ├── test.py
    │   │   └── visual_augmentation.py
    │   ├── .DS_Store
    │   ├── centernet_data_sampler.py
    │   └── ttf_net_data_sampler.py
    ├── helper
    │   ├── __init__.py
    │   └── logger.py
    └── .DS_Store
├── tools
    ├── __init__.py
    ├── .DS_Store
    ├── convert_to_coreml.py
    └── auto_freeze.py
├── configs
    ├── __init__.py
    ├── face
    │   ├── __init__.py
    │   ├── face_mbv3_config.py
    │   └── face_shufflenet_5x5_config.py
    └── mscoco
    │   ├── __init__.py
    │   ├── mbv3_config.py
    │   ├── shufflenetplus_config.py
    │   ├── shufflenet_5x5_config.py
    │   └── resnet_config.py
├── visulization
    ├── __init__.py
    ├── coco_id_map.py
    ├── vis_with_coreml.py
    ├── vis_with_mnn.py
    └── vis.py
├── model_eval
    ├── .DS_Store
    ├── fddb_plot.py
    ├── xml_2_coco.py
    ├── fddb.py
    ├── custome_eval.py
    └── wider.py
├── train.py
├── train_config.py
├── prepare_coco_data.py
├── xml_2_txt.py
├── README.md
└── prepare_wider_data.py


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/configs/face/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/configs/mscoco/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/visulization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/model/head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/model/loss/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/dataset/augmentor/data_aug/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/helper/__init__.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*- 


--------------------------------------------------------------------------------
/lib/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*- 


--------------------------------------------------------------------------------
/lib/core/model/sqeeze_excitation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/model/fpn/__init__.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*- 


--------------------------------------------------------------------------------
/lib/core/model/net/__init__.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*- 


--------------------------------------------------------------------------------
/lib/core/model/net/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*- 


--------------------------------------------------------------------------------
/lib/dataset/augmentor/README.md:
--------------------------------------------------------------------------------
1 | # augmentor
2 | A simple image augmentor
3 | 


--------------------------------------------------------------------------------
/lib/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/.DS_Store


--------------------------------------------------------------------------------
/tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/tools/.DS_Store


--------------------------------------------------------------------------------
/lib/core/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/core/.DS_Store


--------------------------------------------------------------------------------
/model_eval/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/model_eval/.DS_Store


--------------------------------------------------------------------------------
/lib/dataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/.DS_Store


--------------------------------------------------------------------------------
/lib/dataset/augmentor/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/augmentor/test.jpg


--------------------------------------------------------------------------------
/lib/dataset/augmentor/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/610265158/mobilenetv3_centernet/HEAD/lib/dataset/augmentor/test2.jpg


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from lib.helper.logger import logger
 2 | from lib.core.base_trainer.net_work import trainner
 3 | import setproctitle
 4 | 
 5 | 
 6 | 
 7 | logger.info('train start')
 8 | setproctitle.setproctitle("detect")
 9 | 
10 | trainner=trainner()
11 | 
12 | trainner.train()
13 | 


--------------------------------------------------------------------------------
/train_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | 
 5 | from configs.mscoco.mbv3_config import config as mb3_config
 6 | from configs.face.face_mbv3_config import config as face_mbv3_config
 7 | from configs.face.face_shufflenet_5x5_config import config as face_shufflenet_5x5_config
 8 | from configs.mscoco.shufflenetplus_config import config as shufflenet_plus_config
 9 | from configs.mscoco.shufflenet_5x5_config import config as shufflenet_5x5_config
10 | ##### the config for different task
11 | config=mb3_config
12 | 
13 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
14 | config.TRAIN.num_gpu = 1
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/lib/helper/logger.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | 
 4 | 
 5 | 
 6 | #-*-coding:utf-8-*-
 7 | 
 8 | import logging
 9 | 
10 | 
11 | def get_logger(LEVEL,log_file=None):
12 |     head = '[%(asctime)-15s] [%(levelname)s] %(message)s '
13 |     if LEVEL=='info':
14 |         logging.basicConfig(level=logging.INFO, format=head)
15 |     elif LEVEL=='debug':
16 |         logging.basicConfig(level=logging.DEBUG, format=head)
17 |     logger = logging.getLogger()
18 | 
19 |     if log_file !=None:
20 | 
21 |         fh = logging.FileHandler(log_file)
22 |         logger.addHandler(fh)
23 |     return logger
24 | 
25 | logger=get_logger('info')
26 | 


--------------------------------------------------------------------------------
/model_eval/fddb_plot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import seaborn as sns
 4 | sns.set_style('whitegrid')
 5 | 
 6 | roc = pd.read_csv('/home/lz/WiderFace-Evaluation/fddb/evaluation/tempDiscROC.txt', sep=' ', header=None)
 7 | roc.columns = ['tpr', 'fp', 'threshold']
 8 | 
 9 | 
10 | def plot_roc():
11 |     _, axis = plt.subplots(nrows=1, ncols=1, figsize=(7, 4), dpi=120)
12 |     axis.plot(roc.fp, roc.tpr, c='r', linewidth=2.0);
13 |     axis.set_title('Discrete Score ROC')
14 |     axis.set_xlim([0, 2000.0])
15 |     axis.set_ylim([0.6, 1.0])
16 |     axis.set_xlabel('False Positives')
17 |     axis.set_ylabel('True Positive Rate');
18 |     plt.show()
19 | plot_roc()


--------------------------------------------------------------------------------
/lib/core/model/net/shufflenet/backbone.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | 
 4 | import tensorflow as tf
 5 | import tensorflow.contrib.slim as slim
 6 | 
 7 | from train_config import config as cfg
 8 | 
 9 | from lib.core.model.net.shufflenet.shufflenetv2 import ShufflenetV2
10 | from lib.core.model.net.shufflenet.shufflenetv2 import shufflenet_arg_scope
11 | 
12 | from lib.core.model.fpn.seperateconv_fpn import create_fpn_net
13 | 
14 | def shufflenetv2_ssd(image,is_training=True):
15 | 
16 |     arg_scope = shufflenet_arg_scope(weight_decay=cfg.TRAIN.weight_decay_factor)
17 | 
18 |     with tf.contrib.slim.arg_scope(arg_scope):
19 |         with slim.arg_scope([slim.batch_norm], is_training=is_training):
20 |             shufflenet_fms = ShufflenetV2(image,is_training=is_training)
21 | 
22 | 
23 |     return shufflenet_fms
24 | 


--------------------------------------------------------------------------------
/lib/core/model/sqeeze_excitation/se.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | def se(fm,input_dim,refraction=4):
 5 |     se=tf.reduce_mean(fm,axis=[1,2],keep_dims=True)
 6 |     se = slim.conv2d(se,
 7 |                      input_dim//refraction,
 8 |                      [1, 1],
 9 |                      stride=1,
10 |                      activation_fn=tf.nn.relu,
11 |                      biases_initializer=None,
12 |                      normalizer_fn=slim.batch_norm,
13 |                      scope='conv1x1_se_a')
14 |     se = slim.conv2d(se,
15 |                      input_dim,
16 |                      [1, 1],
17 |                      stride=1,
18 |                      activation_fn=None,
19 |                      normalizer_fn=None,
20 |                      biases_initializer=None,
21 |                      scope='conv1x1_se_b')
22 | 
23 |     se=tf.nn.sigmoid(se)
24 | 
25 |     return fm*se


--------------------------------------------------------------------------------
/tools/convert_to_coreml.py:
--------------------------------------------------------------------------------
 1 | import coremltools as ct
 2 | import coremltools
 3 | from coremltools.models.neural_network import quantization_utils
 4 | from coremltools.models.neural_network.quantization_utils import AdvancedQuantizedLayerSelector
 5 | 
 6 | frozen_graph_file='./model/detector.pb'
 7 | 
 8 | 
 9 | 
10 | fp_16_file='./centernet.mlmodel'
11 | 
12 | 
13 | 
14 | mlmodel = ct.convert(frozen_graph_file,inputs=[ct.ImageType()])
15 | 
16 | spec = mlmodel.get_spec()
17 | 
18 | print(mlmodel)
19 | 
20 | selector = AdvancedQuantizedLayerSelector(
21 |     skip_layer_types=['batchnorm', 'depthwiseConv'],
22 |     minimum_conv_kernel_channels=4,
23 |     minimum_conv_weight_count=4096
24 | )
25 | 
26 | model_fp16 = quantization_utils.quantize_weights(mlmodel, nbits=16,quantization_mode='linear',selector=selector)
27 | 
28 | model_fp16.save(fp_16_file)
29 | 
30 | print(model_fp16)
31 | 
32 | print('convert over, model was saved as ',fp_16_file)


--------------------------------------------------------------------------------
/lib/core/model/net/resnet/backbone.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | from train_config import config as cfg
 5 | 
 6 | from lib.core.model.net.resnet.resnet_v2 import resnet_v2_50,resnet_v2_18
 7 | from lib.core.model.net.resnet.resnet_utils import resnet_arg_scope
 8 | 
 9 | from lib.core.model.fpn.plain_fpn import create_fpn_net
10 | 
11 | def resnet_ssd(image,is_training=True):
12 | 
13 |     arg_scope = resnet_arg_scope(weight_decay=cfg.TRAIN.weight_decay_factor)
14 | 
15 |     with tf.contrib.slim.arg_scope(arg_scope):
16 |         with slim.arg_scope([slim.batch_norm], is_training=is_training):
17 |             _,endpoints = resnet_v2_18(image, is_training=is_training,global_pool=False,num_classes=None)
18 | 
19 |             for k, v in endpoints.items():
20 |                 print('resnet backbone output:', k, v)
21 | 
22 |             resnet_fms=[endpoints['resnet_v2_50/block2'],
23 |                         endpoints['resnet_v2_50/block3'],
24 |                         endpoints['resnet_v2_50/block4']]
25 | 
26 | 
27 | 
28 | 
29 |     return resnet_fms
30 | 


--------------------------------------------------------------------------------
/lib/core/model/fpn/plain_fpn.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import tensorflow as tf
 4 | import tensorflow.contrib.slim as slim
 5 | 
 6 | 
 7 | def create_fpn_net(blocks,dims_list):
 8 | 
 9 |     c3, c4, c5= blocks
10 | 
11 |     p5 = slim.conv2d(c5, dims_list[2], [1, 1],padding='SAME',scope='C5_reduced')
12 |     p5_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p5)
13 |     p5 = slim.conv2d(p5, dims_list[1], [3, 3],padding='SAME',scope='P5')
14 | 
15 |     p4 = slim.conv2d(c4, dims_list[1], [1, 1],padding='SAME',scope='C4_reduced')
16 |     p4 = p4 + p5_upsampled
17 |     p4_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p4)
18 |     p4 = slim.conv2d(p4, dims_list[1], [3, 3],padding='SAME',scope='P4')
19 | 
20 |     p3 = slim.conv2d(c3, dims_list[0], [1, 1], padding='SAME', scope='C3_reduced')
21 |     p3 = p3 + p4_upsampled
22 |     p3 = slim.conv2d(p3, dims_list[1], [3, 3], padding='SAME', scope='P3')
23 | 
24 |     p6 = slim.conv2d(c5,  dims_list[3], [3, 3], stride=2, scope='p6')
25 |     p7 = slim.conv2d(p6,  dims_list[4], [3, 3], stride=2, scope='p7')
26 | 
27 |     fpn_fms = [p3,p4,p5,p6,p7]
28 |     for fm in fpn_fms:
29 |         print(fm)
30 |     return fpn_fms


--------------------------------------------------------------------------------
/lib/core/model/fpn/seperateconv_fpn.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import tensorflow as tf
 4 | import tensorflow.contrib.slim as slim
 5 | 
 6 | 
 7 | def create_fpn_net(blocks,dims_list):
 8 | 
 9 |     c3, c4, c5= blocks
10 | 
11 |     p5 = slim.conv2d(c5, dims_list[2], [1, 1],padding='SAME',scope='C5_reduced')
12 |     p5_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p5)
13 |     p5 = slim.separable_conv2d(p5, dims_list[1], [3, 3],padding='SAME',scope='P5')
14 | 
15 |     p4 = slim.conv2d(c4, dims_list[1], [1, 1],padding='SAME',scope='C4_reduced')
16 |     p4 = p4 + p5_upsampled
17 |     p4_upsampled = tf.keras.layers.UpSampling2D(data_format='channels_last')(p4)
18 |     p4 = slim.separable_conv2d(p4, dims_list[1], [3, 3],padding='SAME',scope='P4')
19 | 
20 |     p3 = slim.conv2d(c3, dims_list[0], [1, 1], padding='SAME', scope='C3_reduced')
21 |     p3 = p3 + p4_upsampled
22 |     p3 = slim.separable_conv2d(p3, dims_list[1], [3, 3], padding='SAME', scope='P3')
23 | 
24 |     p6 = slim.separable_conv2d(c5,  dims_list[3], [3, 3], stride=2, scope='p6')
25 |     p7 = slim.separable_conv2d(p6,  dims_list[4], [3, 3], stride=2, scope='p7')
26 | 
27 |     fpn_fms = [p3,p4,p5,p6,p7]
28 |     for fm in fpn_fms:
29 |         print(fm)
30 |     return fpn_fms


--------------------------------------------------------------------------------
/tools/auto_freeze.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import tensorflow as tf
 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 5 | 
 6 | 
 7 | 
 8 | 
 9 | import argparse
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("--pretrained_model", help="the trained file,  end with .ckpt",
12 |                     type=str)
13 | args = parser.parse_args()
14 | pretrained_model=args.pretrained_model
15 | 
16 | print(pretrained_model)
17 | 
18 | command="python tools/centernet_for_freeze_bn.py --pretrained_model %s "%pretrained_model
19 | os.system(command)
20 | print('save ckpt with bn defaut False')
21 | 
22 | 
23 | 
24 | 
25 | #### freeze again
26 | model_folder = './model'
27 | checkpoint = tf.train.get_checkpoint_state(model_folder)
28 | 
29 | ##input_checkpoint
30 | input_checkpoint = checkpoint.model_checkpoint_path
31 | ##input_graph
32 | input_meta_graph = input_checkpoint + '.meta'
33 | 
34 | ##output_node_names
35 | output_node_names='tower_0/images,tower_0/detections'
36 | 
37 | #output_graph
38 | output_graph='./model/detector.pb'
39 | 
40 | print('excuted')
41 | 
42 | command="python tools/freeze.py --input_checkpoint %s --input_meta_graph %s --output_node_names %s --output_graph %s"\
43 | %(input_checkpoint,input_meta_graph,output_node_names,output_graph)
44 | os.system(command)
45 | 
46 | 
47 | print('detector.pb is saved with all feeeze')


--------------------------------------------------------------------------------
/prepare_coco_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | from lib.dataset.coco_data import BoxInfo
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument('--mscocodir', type=str,default='../pubdata/mscoco', help='detect with coco or face',required=False)
10 | args = parser.parse_args()
11 | 
12 | coco_dir=args.mscocodir
13 | 
14 | train_im_path = os.path.join(coco_dir,'train2017')
15 | train_ann_path =  os.path.join(coco_dir,'annotations/instances_train2017.json')
16 | val_im_path =  os.path.join(coco_dir,'val2017')
17 | val_ann_path =  os.path.join(coco_dir,'annotations/instances_val2017.json')
18 | 
19 | 
20 | 
21 | train_data=BoxInfo(train_im_path,train_ann_path)
22 | 
23 | 
24 | fw = open('train.txt', 'w')
25 | for meta in train_data.metas:
26 |     fname, boxes = meta.img_url, meta.bbox
27 | 
28 | 
29 | 
30 |     tmp_str = ''
31 |     tmp_str =tmp_str+ fname+'|'
32 | 
33 |     for box in boxes:
34 |         data = ' %d,%d,%d,%d,%d'%(box[0], box[1], box[2],  box[3],box[4])
35 |         tmp_str=tmp_str+data
36 |     if len(boxes) == 0:
37 |         print(tmp_str)
38 |         continue
39 |     ####err box?
40 |     if box[2] <= 0 or box[3] <= 0:
41 |         pass
42 |     else:
43 |         fw.write(tmp_str + '\n')
44 | fw.close()
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | val_data=BoxInfo(val_im_path,val_ann_path)
52 | 
53 | fw = open('val.txt', 'w')
54 | for meta in val_data.metas:
55 |     fname, boxes = meta.img_url, meta.bbox
56 | 
57 |     tmp_str = ''
58 |     tmp_str = tmp_str + fname + '|'
59 | 
60 |     for box in boxes:
61 |         data = ' %d,%d,%d,%d,%d' % (box[0], box[1], box[2], box[3], box[4])
62 |         tmp_str = tmp_str + data
63 |     if len(boxes) == 0:
64 |         print(tmp_str)
65 |         continue
66 |     ####err box?
67 |     if box[2] <= 0 or box[3] <= 0:
68 |         pass
69 |     else:
70 |         fw.write(tmp_str + '\n')
71 | fw.close()
72 | 


--------------------------------------------------------------------------------
/visulization/coco_id_map.py:
--------------------------------------------------------------------------------
 1 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'),
 2 |             6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'),
 3 |             11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'),
 4 |             16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'),
 5 |             21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'),
 6 |             26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'),
 7 |             31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'),
 8 |             35: (40, 'baseball glove'),
 9 |             36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'),
10 |             40: (46, 'wine glass'),
11 |             41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'),
12 |             46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'),
13 |             51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'),
14 |             56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'),
15 |             61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'),
16 |             66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'),
17 |             71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'),
18 |             76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')}


--------------------------------------------------------------------------------
/lib/core/model/net/arg_scope/resnet_args_cope.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow.contrib.slim as slim
 3 | from tensorflow.contrib.slim import arg_scope
 4 | from tensorflow.python.framework import ops
 5 | from tensorflow.python.ops import nn_ops
 6 | from tensorflow.contrib.layers.python.layers import regularizers, \
 7 |     layers
 8 | from train_config import config
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | def resnet_arg_scope(bn_is_training,
15 |                      bn_trainable=True,
16 |                      trainable=True,
17 |                      weight_decay=config.TRAIN.weight_decay_factor,
18 |                      batch_norm_decay=0.997,
19 |                      batch_norm_scale=True,
20 |                      bn_method='BN',
21 |                      data_format='NHWC'):
22 |     batch_norm_params = {
23 |         'is_training': bn_is_training,
24 |         'decay': batch_norm_decay,
25 |         'scale': batch_norm_scale,
26 |         'trainable': bn_trainable,
27 |         'updates_collections': ops.GraphKeys.UPDATE_OPS,
28 |         'fused':True
29 |     }
30 |     if 'BN' in bn_method:
31 |         norm_func=slim.batch_norm
32 |         norm_params=batch_norm_params
33 |     elif 'None' in bn_method :
34 |         norm_func = None
35 |         norm_params = None
36 | 
37 |     with arg_scope(
38 |             [slim.conv2d,slim.separable_conv2d,slim.conv2d_transpose],
39 |             weights_regularizer=regularizers.l2_regularizer(weight_decay),
40 |             weights_initializer=slim.xavier_initializer(),
41 |             trainable=trainable,
42 |             activation_fn=nn_ops.relu,
43 |             normalizer_fn=norm_func,
44 |             normalizer_params=norm_params,
45 |             data_format=data_format,):
46 |         with arg_scope(
47 |                 [layers.batch_norm,layers.max_pool2d], data_format=data_format):
48 |             with arg_scope([layers.batch_norm], **batch_norm_params) as arg_sc:
49 | 
50 |                 return arg_sc
51 | 


--------------------------------------------------------------------------------
/lib/core/anchor/nms.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from train_config import config as cfg
 3 | 
 4 | def batch_non_max_suppression(
 5 |         boxes, scores,labels,
 6 |         score_threshold, iou_threshold,
 7 |         max_boxes):
 8 |     """
 9 |     Arguments:
10 |         boxes: a float tensor with shape [batch_size, N, 4].
11 |         scores: a float tensor with shape [batch_size, N].
12 |         score_threshold: a float number.
13 |         iou_threshold: a float number, threshold for IoU.
14 |         max_boxes: an integer, maximum number of retained boxes.
15 |     Returns:
16 |         boxes: a float tensor with shape [batch_size, max_boxes, 4].
17 |         scores: a float tensor with shape [batch_size, max_boxes].
18 |         num_detections: an int tensor with shape [batch_size].
19 |     """
20 |     def fn(x):
21 |         boxes, scores,labels = x
22 | 
23 |         # low scoring boxes are removed
24 |         ids = tf.where(tf.greater_equal(scores, score_threshold))
25 |         ids = tf.squeeze(ids, axis=1)
26 |         boxes = tf.gather(boxes, ids)
27 |         scores = tf.gather(scores, ids)
28 |         labels = tf.gather(labels, ids)
29 |         selected_indices = tf.image.non_max_suppression(
30 |             boxes, scores, max_boxes, iou_threshold
31 |         )
32 |         boxes = tf.gather(boxes, selected_indices)
33 |         scores = tf.gather(scores, selected_indices)
34 |         labels = tf.gather(labels, selected_indices)
35 |         num_boxes = tf.to_int32(tf.shape(boxes)[0])
36 | 
37 |         zero_padding = max_boxes - num_boxes
38 |         boxes = tf.pad(boxes, [[0, zero_padding], [0, 0]])
39 |         scores = tf.pad(scores, [[0, zero_padding]])
40 |         labels = tf.pad(labels, [[0, zero_padding]],constant_values=-1)
41 | 
42 |         boxes.set_shape([max_boxes, 4])
43 |         scores.set_shape([max_boxes])
44 |         labels.set_shape([max_boxes])
45 |         return boxes, scores,labels, num_boxes
46 | 
47 |     boxes, scores, labels, num_detections = tf.map_fn(
48 |         fn, [boxes, scores,labels],
49 |         dtype=(tf.float32, tf.float32,tf.int64, tf.int32),
50 |         parallel_iterations=cfg.TEST.parallel_iterations,
51 |         back_prop=False, swap_memory=False, infer_shape=True
52 |     )
53 |     return boxes, scores,labels, num_detections
54 | 
55 | 


--------------------------------------------------------------------------------
/lib/core/model/net/mobilenet/backbone.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | from train_config import config as cfg
 5 | 
 6 | from lib.core.model.net.mobilenet.mobilenet_v2 import mobilenet_v2_050,mobilenet_v2_035,mobilenet_v2_025
 7 | from lib.core.model.net.mobilenet.mobilenet import training_scope
 8 | 
 9 | 
10 | from lib.core.model.net.arg_scope.resnet_args_cope import resnet_arg_scope
11 | 
12 | 
13 | 
14 | 
15 | def create_fpn_net(blocks,dims_list):
16 | 
17 |     of1, of2, of3= blocks
18 | 
19 |     # lateral2 = slim.conv2d(of2, dims_list[1], [1, 1],
20 |     #                       padding='SAME',
21 |     #                       scope='lateral/res{}'.format(2))
22 |     #
23 |     # upsample2_of3 = slim.conv2d(of3, dims_list[1], [1, 1],
24 |     #                        padding='SAME',
25 |     #                        scope='merge/res{}'.format(2))
26 |     # upsample2 = tf.keras.layers.UpSampling2D(data_format='channels_last' )(upsample2_of3)
27 | 
28 |     # fem_2 = lateral2 + upsample2
29 | 
30 |     lateral1 = slim.conv2d(of1, dims_list[0], [1, 1],
31 |                            padding='SAME',
32 |                            scope='lateral/res{}'.format(1))
33 | 
34 |     upsample1_of2 = slim.conv2d(of2, dims_list[0], [1, 1],
35 |                             padding='SAME',
36 |                             scope='merge/res{}'.format(1))
37 |     upsample1 = tf.keras.layers.UpSampling2D(data_format='channels_last')(upsample1_of2)
38 | 
39 |     fem_1 = lateral1 + upsample1
40 | 
41 |     #####enhance model
42 |     fpn_fms = [fem_1, upsample1_of2, of3]
43 | 
44 |     return fpn_fms
45 | 
46 | def mobilenet_ssd(image,L2_reg,is_training=True):
47 | 
48 |     arg_scope = training_scope(weight_decay=L2_reg, is_training=is_training)
49 | 
50 |     with tf.contrib.slim.arg_scope(arg_scope):
51 |         _,endpoint = mobilenet_v2_035(image,is_training=is_training,base_only=True,finegrain_classification_mode=False)
52 | 
53 |         for k,v in endpoint.items():
54 |             print('mobile backbone output:',k,v)
55 | 
56 |         mobilebet_fms=[
57 |                        endpoint['layer_8/expansion_output'],
58 |                        endpoint['layer_15/expansion_output'],
59 |                        endpoint['layer_18/output']]
60 | 
61 |         if cfg.MODEL.fpn:
62 |             mobilebet_fms=create_fpn_net(mobilebet_fms,dims_list=cfg.MODEL.fpn_dims)
63 | 
64 |     return mobilebet_fms
65 | 


--------------------------------------------------------------------------------
/model_eval/xml_2_coco.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | ####transform from xml to json
  3 | 
  4 | import os
  5 | import xml.etree.cElementTree as et
  6 | import json
  7 | import argparse
  8 | import shutil
  9 | import traceback
 10 | import random
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def GetFileList(dir, fileList):
 15 |     newDir = dir
 16 |     if os.path.isfile(dir):
 17 |         fileList.append(dir)
 18 |     elif os.path.isdir(dir):
 19 |         for s in os.listdir(dir):
 20 |             #如果需要忽略某些文件夹，使用以下代码
 21 |             # if s == "pts":
 22 |             #     continue
 23 |             newDir=os.path.join(dir,s)
 24 |             GetFileList(newDir, fileList)
 25 |     return fileList
 26 | 
 27 | 
 28 | # load train/val split used in the training
 29 | annotation_path = './val.txt'
 30 | 
 31 | 
 32 | with open(annotation_path) as f:
 33 |     lines = f.readlines()
 34 | 
 35 | # initialize the json data for the dataset
 36 | data = {}
 37 | cls_person = 0
 38 | 
 39 | test_data = {}
 40 | test_data['licenses'] = []
 41 | test_data['info'] = []
 42 | test_data['categories'] = [{'id': cls_person, 'name': 'person', 'supercategory': 'person'}]
 43 | test_data['images'] = []
 44 | test_data['annotations'] = []
 45 | 
 46 | # process xml files
 47 | counter=1
 48 | anno_id = 0
 49 | img_id = 0
 50 | for line in lines:
 51 |     counter+=1
 52 |     if counter%1000==0:
 53 |         print('%d/%d images processed'%(counter, len(lines)))
 54 |     try:
 55 | 
 56 |         file_str,label = line.rstrip().rsplit('| ')
 57 | 
 58 |         labels = label.split(' ')
 59 |         boxes = []
 60 | 
 61 |         for label in labels:
 62 |             
 63 |             bbox = np.array(label.split(','), dtype=np.float)
 64 |             boxes.append([bbox[0], bbox[1], bbox[2], bbox[3], bbox[4]])
 65 | 
 66 | 
 67 |         #file_name = root.find('filename').text
 68 |         file_name = file_str
 69 |         image_id = img_id
 70 | 
 71 |         img=cv2.imread(file_name)
 72 |         img_height,img_width,_=img.shape
 73 | 
 74 | 
 75 |         img_entry = {'file_name': file_name, 'id': image_id, 'height': img_height, 'width': img_width}
 76 |         test_data['images'].append(img_entry)
 77 | 
 78 |         img_id += 1
 79 | 
 80 |         for box in boxes:
 81 | 
 82 |             xmin = int(box[0])
 83 |             ymin = int(box[1])
 84 |             xmax = int(box[2])
 85 |             ymax = int(box[3])
 86 | 
 87 |             anno_entry = {'image_id': image_id, 'category_id': cls_person, 'id': anno_id,\
 88 |                         'iscrowd': 0, 'area': int(xmax-xmin) * int(ymax-ymin),\
 89 |                         'bbox': [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]}
 90 |             test_data['annotations'].append(anno_entry)
 91 | 
 92 |             anno_id += 1
 93 |     except Exception as ex:
 94 |         msg = "err:%s" % ex
 95 |         print(msg)
 96 |         traceback.print_exc()
 97 | 
 98 | 
 99 | with open('./model_eval/DatasetTest_cocoStyle.json', 'w') as outfile:
100 |     json.dump(test_data, outfile)


--------------------------------------------------------------------------------
/xml_2_txt.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.cElementTree as et   # 读取xml文件的包
  2 | import os
  3 | 
  4 | def GetFileList(dir, fileList):
  5 |     newDir = dir
  6 |     if os.path.isfile(dir):
  7 |         fileList.append(dir)
  8 |     elif os.path.isdir(dir):
  9 |         for s in os.listdir(dir):
 10 |             # if s == "pts":
 11 |             #     continue
 12 |             newDir=os.path.join(dir,s)
 13 |             GetFileList(newDir, fileList)
 14 |     return fileList
 15 | 
 16 | 
 17 | data_dir1='./data1209'
 18 | data_dir2='./data1203'
 19 | ratio=0.9
 20 | 
 21 | xml_list1=[]
 22 | GetFileList(data_dir1,xml_list1)
 23 | xml_list1=[x for x in xml_list1 if 'xml' in x]
 24 | 
 25 | xml_list2=[]
 26 | GetFileList(data_dir2,xml_list2)
 27 | xml_list2=[x for x in xml_list2 if 'xml' in x]
 28 | 
 29 | xml_list=xml_list1+xml_list2
 30 | 
 31 | 
 32 | xml_list=list(set(xml_list))
 33 | train_list=xml_list[:int(len(xml_list)*ratio)]
 34 | val_list=xml_list[int(len(xml_list)*ratio):]
 35 | 
 36 | train_file=open('train.txt',mode='w')
 37 | val_file=open('val.txt',mode='w')
 38 | 
 39 | 
 40 | for xml_name in train_list:
 41 |     try:
 42 |         tree = et.parse(xml_name)
 43 |     except:
 44 |         print(xml_name,'err')
 45 |         continue
 46 |     root = tree.getroot()  # 使用getroot()获取根节点，得到的是一个Element对象
 47 | 
 48 |     img_name=root.find('filename').text
 49 | 
 50 |     print(img_name)
 51 |     tmp_str=''
 52 |     img_path=xml_name.replace('.xml','.jpg')
 53 |     tmp_str+=img_path+'|'
 54 | 
 55 | 
 56 |     obj=root.find('object')
 57 | 
 58 | 
 59 |     label=obj.find('name').text
 60 | 
 61 |     if label=='qrcode':
 62 | 
 63 |         xml_box = obj.find('bndbox')
 64 |         xmin = (int(float(xml_box.find('xmin').text)) )
 65 |         ymin = (int(float(xml_box.find('ymin').text)) )
 66 |         xmax = (int(float(xml_box.find('xmax').text)) )
 67 |         ymax = (int(float(xml_box.find('ymax').text)) )
 68 | 
 69 |         tmp_str+=' %d,%d,%d,%d,%d'%(xmin,ymin,xmax,ymax,1)
 70 | 
 71 |     tmp_str+='\n'
 72 | 
 73 |     train_file.write(tmp_str)
 74 | 
 75 | train_file.close()
 76 | 
 77 | for xml_name in val_list:
 78 |     try:
 79 |         tree = et.parse(xml_name)
 80 |     except:
 81 |         continue
 82 |     root = tree.getroot()  # 使用getroot()获取根节点，得到的是一个Element对象
 83 | 
 84 |     img_name = root.find('filename').text
 85 | 
 86 |     tmp_str = ''
 87 |     img_path=xml_name.replace('.xml','.jpg')
 88 |     tmp_str += img_path + '|'
 89 | 
 90 |     obj = root.find('object')
 91 |     label = obj.find('name').text
 92 | 
 93 |     if label == 'qrcode':
 94 |         xml_box = obj.find('bndbox')
 95 |         xmin = (int(float(xml_box.find('xmin').text)))
 96 |         ymin = (int(float(xml_box.find('ymin').text)))
 97 |         xmax = (int(float(xml_box.find('xmax').text)))
 98 |         ymax = (int(float(xml_box.find('ymax').text)) )
 99 | 
100 | 
101 | 
102 |         tmp_str += ' %d,%d,%d,%d,%d' % (xmin, ymin, xmax, ymax, 1)
103 | 
104 |     tmp_str += '\n'
105 | 
106 |     val_file.write(tmp_str)
107 | 
108 | val_file.close()
109 | 


--------------------------------------------------------------------------------
/visulization/vis_with_coreml.py:
--------------------------------------------------------------------------------
 1 | # Copyright @ 2019 Alibaba. All rights reserved.
 2 | # Created by ruhuan on 2019.09.09
 3 | """ python demo usage about MNN API """
 4 | import sys
 5 | sys.path.append('.')
 6 | from train_config import config as cfg
 7 | import tfcoreml
 8 | import coremltools
 9 | import cv2
10 | import numpy as np
11 | import os
12 | import PIL.Image
13 | from visulization.coco_id_map import coco_map
14 | from train_config import config as cfg
15 | 
16 | def preprocess( image, target_height, target_width, label=None):
17 |     ###sometimes use in objs detects
18 |     h, w, c = image.shape
19 | 
20 |     bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype)
21 | 
22 |     scale_y = target_height / h
23 |     scale_x = target_width / w
24 | 
25 |     scale = min(scale_x, scale_y)
26 | 
27 |     image = cv2.resize(image, None, fx=scale, fy=scale)
28 | 
29 |     h_, w_, _ = image.shape
30 | 
31 |     dx = (target_width - w_) // 2
32 |     dy = (target_height - h_) // 2
33 |     bimage[dy:h_ + dy, dx:w_ + dx, :] = image
34 | 
35 |     return bimage, scale, scale, dx, dy
36 | 
37 | def inference(model_path,img_dir,thres=0.3):
38 |     """ inference mobilenet_v1 using a specific picture """
39 |     centernet_model =coremltools.models.MLModel(model_path)
40 | 
41 | 
42 |     img_list=os.listdir(img_dir)
43 |     for pic in img_list:
44 |         image = cv2.imread(os.path.join(img_dir,pic))
45 |         #cv2 read as bgr format #change to rgb format
46 |         image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
47 | 
48 |         image,_,_,_,_ = preprocess(image,target_height=cfg.DATA.hin,target_width=cfg.DATA.win)
49 | 
50 |         image_show=image.copy()
51 | 
52 |         image = image.astype(np.uint8)
53 |         pil_img = PIL.Image.fromarray(image)
54 | 
55 |         coreml_inputs = {'tower_0/images': pil_img}
56 | 
57 |         coreml_outputs = centernet_model.predict(coreml_inputs, useCPUOnly=True)
58 | 
59 |         boxes=coreml_outputs['tower_0/detections']
60 | 
61 |         boxes=boxes[0]
62 | 
63 |         for i in range(len(boxes)):
64 |             bbox = boxes[i]
65 | 
66 |             if bbox[4]>thres:
67 | 
68 |                 cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])),
69 |                               (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4)
70 | 
71 |                 str_draw = '%s:%.2f' % (coco_map[int(bbox[5])%80][1], bbox[4])
72 |                 cv2.putText(image_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2,
73 |                             (255, 0, 255), 2)
74 | 
75 |         cv2.imshow('coreml result',image_show)
76 |         cv2.waitKey(0)
77 | 
78 | if __name__ == "__main__":
79 | 
80 |     import argparse
81 | 
82 |     parser = argparse.ArgumentParser()
83 |     parser.add_argument('--coreml_model', type=str, default='./centernet.mlmodel', help='the mnn model ', required=False)
84 |     parser.add_argument('--imgDir', type=str, default='../pubdata/mscoco/val2017', help='the image dir to detect')
85 |     parser.add_argument('--thres', type=float, default=0.3, help='the thres for detect')
86 |     args = parser.parse_args()
87 | 
88 |     data_dir = args.imgDir
89 |     model_path=args.coreml_model
90 |     thres=args.thres
91 |     inference(model_path,data_dir,thres)
92 | 


--------------------------------------------------------------------------------
/visulization/vis_with_mnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright @ 2019 Alibaba. All rights reserved.
  2 | # Created by ruhuan on 2019.09.09
  3 | """ python demo usage about MNN API """
  4 | import sys
  5 | sys.path.append('.')
  6 | 
  7 | import numpy as np
  8 | import MNN
  9 | import cv2
 10 | import os
 11 | 
 12 | from visulization.coco_id_map import coco_map
 13 | from train_config import config as cfg
 14 | 
 15 | def preprocess( image, target_height, target_width, label=None):
 16 |     ###sometimes use in objs detects
 17 |     h, w, c = image.shape
 18 | 
 19 |     bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype)
 20 | 
 21 |     scale_y = target_height / h
 22 |     scale_x = target_width / w
 23 | 
 24 |     scale = min(scale_x, scale_y)
 25 | 
 26 |     image = cv2.resize(image, None, fx=scale, fy=scale)
 27 | 
 28 |     h_, w_, _ = image.shape
 29 | 
 30 |     dx = (target_width - w_) // 2
 31 |     dy = (target_height - h_) // 2
 32 |     bimage[dy:h_ + dy, dx:w_ + dx, :] = image
 33 | 
 34 |     return bimage, scale, scale, dx, dy
 35 | 
 36 | 
 37 | 
 38 | def inference(mnn_model_path,img_dir,thres=0.3):
 39 |     """ inference mobilenet_v1 using a specific picture """
 40 |     interpreter = MNN.Interpreter(mnn_model_path)
 41 |     session = interpreter.createSession()
 42 |     input_tensor = interpreter.getSessionInput(session)
 43 | 
 44 |     img_list=os.listdir(img_dir)
 45 |     for pic in img_list:
 46 |         image = cv2.imread(os.path.join(img_dir,pic))
 47 |         #cv2 read as bgr format
 48 |         image = image[..., ::-1]
 49 |         #change to rgb format
 50 | 
 51 |         image,_,_,_,_ = preprocess(image,target_height=cfg.DATA.hin,target_width=cfg.DATA.win)
 52 |         image_show=image.copy()
 53 | 
 54 |         image = image.astype(np.float32)
 55 | 
 56 |         tmp_input = MNN.Tensor((1, cfg.DATA.hin, cfg.DATA.win,3 ), MNN.Halide_Type_Float,\
 57 |                         image, MNN.Tensor_DimensionType_Tensorflow)
 58 |         #construct tensor from np.ndarray
 59 |         input_tensor.copyFrom(tmp_input)
 60 | 
 61 |         ### caution!!!!!!!!!!!!!!!! the model is nhwc
 62 | 
 63 |         interpreter.resizeSession(session)
 64 |         interpreter.runSession(session)
 65 | 
 66 |         output_tensor = interpreter.getSessionOutputAll(session)
 67 | 
 68 |         boxes=output_tensor['tower_0/concat_1'].getData()
 69 |         print(boxes)
 70 |         boxes=np.reshape(boxes,newshape=[100,6])
 71 |         print(boxes.shape)
 72 |         for i in range(len(boxes)):
 73 |             bbox = boxes[i]
 74 |             print(bbox)
 75 |             if bbox[4]>thres:
 76 | 
 77 | 
 78 | 
 79 |                 cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])),
 80 |                               (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4)
 81 |                 str_draw = '%s:%.2f' % (coco_map[int(bbox[5])][1], bbox[4])
 82 |                 cv2.putText(image_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2,
 83 |                             (255, 0, 255), 2)
 84 | 
 85 |         cv2.imshow('mnn result',image_show)
 86 |         cv2.waitKey(0)
 87 | 
 88 | if __name__ == "__main__":
 89 | 
 90 |     import argparse
 91 | 
 92 |     parser = argparse.ArgumentParser()
 93 |     parser.add_argument('--mnn_model', type=str, default='./centernet.mnn', help='the mnn model ', required=False)
 94 |     parser.add_argument('--imgDir', type=str, default='../pubdata/mscoco/val2017', help='the image dir to detect')
 95 |     parser.add_argument('--thres', type=float, default=0.3, help='the thres for detect')
 96 |     args = parser.parse_args()
 97 | 
 98 |     data_dir = args.imgDir
 99 |     model_path=args.mnn_model
100 |     thres=args.thres
101 |     inference(model_path,data_dir,thres)
102 | 


--------------------------------------------------------------------------------
/configs/face/face_mbv3_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 2                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 20                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 24                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                      ###just keep training , evaluation shoule be take care by yourself,
19 |                                                ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=13000            ###widerface train size
22 | config.TRAIN.val_set_size=3000             ###widerface val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.0001,0.001,0.01,0.001,0.00001,0.0000025]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [500,1000,40000,50000,60000]
29 | config.TRAIN.lr_decay='cos'
30 | config.TRAIN.opt='adam'
31 | config.TRAIN.weight_decay_factor = 1.e-5                ##l2 regular
32 | config.TRAIN.vis=False                                    ##check data flag
33 | config.TRAIN.mix_precision=False
34 | config.TRAIN.gradient_clip=False
35 | 
36 | 
37 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
38 | config.TRAIN.lock_basenet_bn=False
39 | config.TRAIN.frozen_stages=-1   ##no freeze
40 | 
41 | config.DATA = edict()
42 | config.DATA.root_path=''
43 | config.DATA.train_txt_path='train.txt'
44 | config.DATA.val_txt_path='val.txt'
45 | config.DATA.num_category=1                                  ###face 1  voc 20 coco 80
46 | config.DATA.num_class = config.DATA.num_category       # +1 background
47 | 
48 | config.DATA.PIXEL_MEAN = [127.]                 ###rgb
49 | config.DATA.PIXEL_STD = [127.]
50 | 
51 | config.DATA.hin = 512  # input size
52 | config.DATA.win = 512
53 | config.DATA.channel = 3
54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
55 | config.DATA.cover_obj=6                        ###cover the small objs
56 | config.DATA.max_objs=1333
57 | 
58 | 
59 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
60 | config.DATA.scales=(320,640)
61 | config.DATA.use_int8_data=True            ### we use uint8 data to decrease memery access to speed up
62 | config.DATA.use_int8_enlarge=255.
63 | config.DATA.cracy_crop=0.3
64 | config.DATA.alpha=0.54*2
65 | config.DATA.beta=0.54
66 | ##mobilenetv3 as basemodel
67 | config.MODEL = edict()
68 | config.MODEL.continue_train=False ### revover from a trained model
69 | config.MODEL.model_path = './model/'  # save directory
70 | config.MODEL.net_structure='MobilenetV3' ######'resnet_v1_50,resnet_v1_101,MobilenetV2
71 | config.MODEL.pretrained_model='./v3-small-minimalistic_224_1.0_float/ema/model-498000'
72 | config.MODEL.task='face'
73 | config.MODEL.min_overlap=0.6
74 | config.MODEL.max_box= 1333
75 | 
76 | config.MODEL.global_stride=4
77 | 
78 | config.MODEL.head_dims=[32,32,32,32]
79 | config.MODEL.prehead_dims=[96,48]
80 | 
81 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
82 | if config.MODEL.deployee:
83 |     config.TRAIN.batch_size = 1
84 |     config.TRAIN.lock_basenet_bn=True
85 | 
86 | 


--------------------------------------------------------------------------------
/configs/mscoco/mbv3_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 4                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 50                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 16                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                      ###just keep training , evaluation shoule be take care by yourself,
19 |                                                ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=117266            ###widerface train size
22 | config.TRAIN.val_set_size=5000             ###widerface val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.001,0.0001,0.00001,0.000001]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [500,1000,300000,400000,450000]
29 | config.TRAIN.lr_decay_every_step = [int(x//config.TRAIN.num_gpu) for x  in config.TRAIN.lr_decay_every_step]
30 | 
31 | config.TRAIN.lr_decay='step'
32 | 
33 | config.TRAIN.opt='adam'
34 | config.TRAIN.weight_decay_factor = 1.e-5                  ##l2 regular
35 | config.TRAIN.vis=False                                    ##check data flag
36 | config.TRAIN.mix_precision=False
37 | 
38 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
39 | config.TRAIN.lock_basenet_bn=False
40 | config.TRAIN.frozen_stages=-1   ##no freeze
41 | config.TRAIN.gradient_clip=False
42 | 
43 | config.DATA = edict()
44 | config.DATA.root_path=''
45 | config.DATA.train_txt_path='train.txt'
46 | config.DATA.val_txt_path='val.txt'
47 | config.DATA.num_category=80                                  ###face 1  voc 20 coco 80
48 | config.DATA.num_class = config.DATA.num_category
49 | 
50 | 
51 | config.DATA.hin = 512  # input size
52 | config.DATA.win = 512
53 | config.DATA.channel = 3
54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
55 | config.DATA.cover_obj=8                          ###cover the small objs
56 | 
57 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
58 | config.DATA.scales=(320,640)
59 | config.DATA.use_int8_data=True
60 | config.DATA.use_int8_enlarge=255.           ### use uint8 for heatmap generate for less memory acc, to speed up
61 | config.DATA.max_objs=128
62 | config.DATA.cracy_crop=0.3
63 | config.DATA.alpha=0.54
64 | config.DATA.beta=0.54
65 | ##mobilenetv3 as basemodel
66 | config.MODEL = edict()
67 | config.MODEL.continue_train=False          ### revover from a trained model
68 | config.MODEL.model_path = './model/'  # save directory
69 | config.MODEL.net_structure='MobilenetV3'
70 | config.MODEL.size=0.75
71 | config.MODEL.pretrained_model='./v3-large_224_0.75_float/ema/model-220000'
72 | config.MODEL.task='mscoco'
73 | config.MODEL.min_overlap=0.7
74 | config.MODEL.max_box= 100
75 | 
76 | config.MODEL.global_stride=4
77 | config.MODEL.head_dims=[256,192,128]
78 | config.MODEL.prehead_dims=[128,48]   ##no pre head
79 | 
80 | 
81 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
82 | if config.MODEL.deployee:
83 |     config.TRAIN.batch_size = 1
84 |     config.TRAIN.lock_basenet_bn=True
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/configs/face/face_shufflenet_5x5_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 2                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 20                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 32                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                      ###just keep training , evaluation shoule be take care by yourself,
19 |                                                ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=13000            ###widerface train size
22 | config.TRAIN.val_set_size=3000             ###widerface val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.0001,0.001,0.01,0.001,0.00001,0.0000025]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [500,1000,60000,80000,100000]
29 | config.TRAIN.lr_decay='cos'
30 | config.TRAIN.opt='adam'
31 | config.TRAIN.weight_decay_factor = 1.e-4                ##l2 regular
32 | config.TRAIN.vis=False                                    ##check data flag
33 | config.TRAIN.mix_precision=False
34 | config.TRAIN.gradient_clip=False
35 | 
36 | 
37 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
38 | config.TRAIN.lock_basenet_bn=False
39 | config.TRAIN.frozen_stages=-1   ##no freeze
40 | 
41 | config.DATA = edict()
42 | config.DATA.root_path=''
43 | config.DATA.train_txt_path='train.txt'
44 | config.DATA.val_txt_path='val.txt'
45 | config.DATA.num_category=1                                  ###face 1  voc 20 coco 80
46 | config.DATA.num_class = config.DATA.num_category       # +1 background
47 | 
48 | config.DATA.PIXEL_MEAN = [127.]                 ###rgb
49 | config.DATA.PIXEL_STD = [127.]
50 | 
51 | config.DATA.hin = 384  # input size
52 | config.DATA.win = 384
53 | config.DATA.channel = 3
54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
55 | config.DATA.cover_obj=6                        ###cover the small objs
56 | config.DATA.max_objs=1333
57 | 
58 | 
59 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
60 | config.DATA.scales=(320,640)
61 | config.DATA.use_int8_data=True            ### we use uint8 data to decrease memery access to speed up
62 | config.DATA.use_int8_enlarge=255.
63 | config.DATA.cracy_crop=0.3
64 | config.DATA.alpha=0.54*2
65 | config.DATA.beta=0.54*2
66 | ##mobilenetv3 as basemodel
67 | config.MODEL = edict()
68 | config.MODEL.continue_train=False ### revover from a trained model
69 | config.MODEL.model_path = './model/'  # save directory
70 | config.MODEL.net_structure='ShuffleNetV2_5x5' ######'resnet_v1_50,resnet_v1_101,MobilenetV2
71 | config.MODEL.size='0.5x'
72 | config.MODEL.pretrained_model='./model/cls_for_convert.ckpt'
73 | config.MODEL.task='face'
74 | config.MODEL.min_overlap=0.6
75 | config.MODEL.max_box= 1333
76 | 
77 | config.MODEL.global_stride=4
78 | 
79 | config.MODEL.head_dims=[96,48,32]
80 | config.MODEL.prehead_dims=[128,48]   ##no pre head
81 | 
82 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
83 | if config.MODEL.deployee:
84 |     config.TRAIN.batch_size = 1
85 |     config.TRAIN.lock_basenet_bn=True
86 | 
87 | 


--------------------------------------------------------------------------------
/configs/mscoco/shufflenetplus_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 3                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 20                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 16                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                      ###just keep training , evaluation shoule be take care by yourself,
19 |                                                ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=117266            ###widerface train size
22 | config.TRAIN.val_set_size=5000             ###widerface val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.00025,0.000025,0.0000025,0.00000025]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [200,400,200000,300000,400000]
29 | 
30 | config.TRAIN.lr_decay='cos'
31 | config.TRAIN.opt='adam'
32 | config.TRAIN.weight_decay_factor = 1.e-4                  ##l2 regular
33 | config.TRAIN.vis=False                                    ##check data flag
34 | config.TRAIN.mix_precision=False
35 | 
36 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
37 | config.TRAIN.lock_basenet_bn=False
38 | config.TRAIN.frozen_stages=-1   ##no freeze
39 | config.TRAIN.gradient_clip=False
40 | 
41 | config.DATA = edict()
42 | config.DATA.root_path=''
43 | config.DATA.train_txt_path='train.txt'
44 | config.DATA.val_txt_path='val.txt'
45 | config.DATA.num_category=80                                  ###face 1  voc 20 coco 80
46 | config.DATA.num_class = config.DATA.num_category
47 | 
48 | config.DATA.PIXEL_MEAN = [127.]                 ###rgb
49 | config.DATA.PIXEL_STD = [127.]
50 | 
51 | config.DATA.hin = 520  # input size
52 | config.DATA.win = 520
53 | config.DATA.channel = 3
54 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
55 | config.DATA.cover_obj=4                          ###cover the small objs
56 | 
57 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
58 | config.DATA.scales=(320,640)
59 | config.DATA.use_int8_data=True
60 | config.DATA.use_int8_enlarge=255.           ### use uint8 for heatmap generate for less memory acc, to speed up
61 | config.DATA.max_objs=128
62 | config.DATA.cracy_crop=0.5
63 | config.DATA.alpha=0.54
64 | config.DATA.beta=0.54
65 | 
66 | 
67 | ##mobilenetv3 as basemodel
68 | config.MODEL = edict()
69 | config.MODEL.continue_train=False ### revover from a trained model
70 | config.MODEL.model_path = './model/'  # save directory
71 | config.MODEL.net_structure='ShuffleNetV2_Plus' ######'resnet_v1_50,resnet_v1_101,MobilenetV2
72 | config.MODEL.size='Small'
73 | config.MODEL.pretrained_model=None#'ShuffleNetV2+Small/ShuffleNetV2+Small.ckpt'
74 | config.MODEL.task='mscoco'
75 | config.MODEL.min_overlap=0.7
76 | config.MODEL.max_box= 100
77 | config.MODEL.offset= True
78 | config.MODEL.global_stride=4
79 | config.MODEL.head_dims=[64*3,64*3,32*3]
80 | 
81 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
82 | if config.MODEL.deployee:
83 |     config.TRAIN.batch_size = 1
84 |     config.TRAIN.lock_basenet_bn=True
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/configs/mscoco/shufflenet_5x5_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 3                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 50                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 16                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                      ###just keep training , evaluation shoule be take care by yourself,
19 |                                                ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=117266            ###widerface train size
22 | config.TRAIN.val_set_size=5000             ###widerface val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.001,0.0001,0.00001,0.000001]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [500,1000,300000,400000,500000]
29 | config.TRAIN.lr_decay_every_step = [int(x//config.TRAIN.num_gpu) for x  in config.TRAIN.lr_decay_every_step]
30 | 
31 | 
32 | config.TRAIN.lr_decay='step'
33 | config.TRAIN.opt='adam'
34 | config.TRAIN.weight_decay_factor = 1.e-5                  ##l2 regular
35 | config.TRAIN.vis=True
36 | ##check data flag
37 | config.TRAIN.mix_precision=False
38 | 
39 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
40 | config.TRAIN.lock_basenet_bn=False
41 | config.TRAIN.frozen_stages=-1   ##no freeze
42 | config.TRAIN.gradient_clip=False
43 | 
44 | config.DATA = edict()
45 | config.DATA.root_path=''
46 | config.DATA.train_txt_path='train.txt'
47 | config.DATA.val_txt_path='val.txt'
48 | config.DATA.num_category=80                                  ###face 1  voc 20 coco 80
49 | config.DATA.num_class = config.DATA.num_category
50 | 
51 | config.DATA.PIXEL_MEAN = [127.]                 ###rgb
52 | config.DATA.PIXEL_STD = [127.]
53 | 
54 | config.DATA.hin = 416  # input size
55 | config.DATA.win = 416
56 | config.DATA.channel = 3
57 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
58 | config.DATA.cover_obj=4                          ###cover the small objs
59 | 
60 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
61 | config.DATA.scales=(320,640)
62 | config.DATA.use_int8_data=True
63 | config.DATA.use_int8_enlarge=255.           ### use uint8 for heatmap generate for less memory acc, to speed up
64 | config.DATA.max_objs=128
65 | config.DATA.cracy_crop=0.3
66 | config.DATA.alpha=0.54
67 | config.DATA.beta=0.54
68 | 
69 | 
70 | ##mobilenetv3 as basemodel
71 | config.MODEL = edict()
72 | config.MODEL.continue_train=False ### revover from a trained model
73 | config.MODEL.model_path = './model/'  # save directory
74 | config.MODEL.net_structure='ShuffleNetV2_5x5' ######'resnet_v1_50,resnet_v1_101,MobilenetV2
75 | config.MODEL.size='1.0x'
76 | config.MODEL.pretrained_model=None
77 | config.MODEL.task='mscoco'
78 | config.MODEL.min_overlap=0.7
79 | config.MODEL.max_box= 100
80 | config.MODEL.offset= True
81 | config.MODEL.global_stride=4
82 | 
83 | config.MODEL.head_dims=[192,160,128]
84 | config.MODEL.prehead_dims=[128,48]   ##no pre head
85 | 
86 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
87 | if config.MODEL.deployee:
88 |     config.TRAIN.batch_size = 1
89 |     config.TRAIN.lock_basenet_bn=True
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/lib/core/model/net/mobilenetv3/backbone.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | from train_config import config as cfg
 5 | 
 6 | from lib.core.model.net.mobilenetv3 import mobilnet_v3
 7 | from lib.core.model.net.mobilenet.mobilenet import training_scope
 8 | from lib.core.model.net.mobilenetv3.mobilnet_v3 import hard_swish
 9 | 
10 | def mobilenetv3_large_detection(image,is_training=True):
11 | 
12 |     arg_scope = training_scope(weight_decay=cfg.TRAIN.weight_decay_factor, is_training=is_training)
13 | 
14 |     with tf.contrib.slim.arg_scope(arg_scope):
15 | 
16 |         _, endpoints = mobilnet_v3.large(image,
17 |                                         depth_multiplier=cfg.MODEL.size,
18 |                                         is_training=is_training,
19 |                                         base_only=True,
20 |                                         finegrain_classification_mode=False)
21 | 
22 |         for k,v in endpoints.items():
23 |             print('mobile backbone output:',k,v)
24 | 
25 |         extern_conv = slim.conv2d(_,
26 |                                   480,
27 |                                   [1, 1],
28 |                                   stride=1,
29 |                                   padding='SAME',
30 |                                   activation_fn=hard_swish,
31 |                                   scope='extern1')
32 | 
33 |         print(extern_conv)
34 |         mobilebet_fms = [endpoints['layer_5/expansion_output'],
35 |                          endpoints['layer_7/expansion_output'],
36 |                          endpoints['layer_13/output'],
37 |                          extern_conv]
38 | 
39 |     return mobilebet_fms
40 | 
41 | 
42 | def mobilenetv3_small_minimalistic(image,is_training=True):
43 | 
44 |     arg_scope = training_scope(weight_decay=cfg.TRAIN.weight_decay_factor, is_training=is_training)
45 | 
46 |     with tf.contrib.slim.arg_scope(arg_scope):
47 |         if cfg.DATA.channel==1:
48 |             if cfg.MODEL.global_stride==8:
49 |                 stride=2
50 |             else:
51 |                 stride=1
52 |             image = slim.separable_conv2d(image,
53 |                                           3,
54 |                                           [3, 3],
55 |                                           stride=stride,
56 |                                           padding='SAME',
57 |                                           scope='preconv')
58 |             
59 |         final_feature, endpoints = mobilnet_v3.small_minimalistic(image,
60 |                                         depth_multiplier=1.0,
61 |                                         is_training=is_training,
62 |                                         base_only=True,
63 |                                         finegrain_classification_mode=False)
64 | 
65 |         extern_conv=slim.separable_conv2d(final_feature, 128,
66 |                                           [3, 3],
67 |                                           stride=2,
68 |                                           padding='SAME',
69 |                                           scope='extern1')
70 |         extern_conv = slim.separable_conv2d(extern_conv, 96,
71 |                                             [3, 3],
72 |                                             padding='SAME',
73 |                                             scope='extern2')
74 |         extern_conv = slim.separable_conv2d(extern_conv, 128,
75 |                                             [3, 3],
76 |                                             padding='SAME',
77 |                                             scope='extern3')
78 | 
79 | 
80 |         for k,v in endpoints.items():
81 |             print('mobile backbone output:',k,v)
82 | 
83 |         mobilebet_fms=[endpoints['layer_3/expansion_output'],
84 |                        endpoints['layer_5/expansion_output'],
85 |                        endpoints['layer_9/expansion_output'],
86 |                        #final_feature,
87 |                        extern_conv]
88 | 
89 | 
90 |     return mobilebet_fms
91 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mobilenetv3_centernet
  2 | 
  3 | 
  4 | 
  5 | 
  6 | ### there is a [pytorch version](https://github.com/610265158/mobile_centernet), trained with mobilenetv2, it is more simple.
  7 | 
  8 | ## introduction
  9 | 
 10 | This is a tensorflow implement mobilenetv3-centernet framework,
 11 | which can be easily deployeed on Android(MNN) and IOS(CoreML) mobile devices, end to end.
 12 | 
 13 | Purpose: Light detection algorithms that work on mobile devices is widely used, 
 14 | such as face detection.
 15 | So there is an easy project contains model training and model converter. 
 16 | 
 17 | ** contact me if u have question 2120140200@mail.nankai.edu.cn **
 18 | 
 19 | 
 20 | 
 21 | ## pretrained model , and preformance
 22 | 
 23 | ### mscoco
 24 | 
 25 | no test time augmentation.
 26 | | model                     |input_size |map      | map@0.5|map@0.75|
 27 | | :------:                  |:------:   |:------:  |:------:  |:------:  |
 28 | |[mbv3-large-0.75-modified_head](https://drive.google.com/drive/folders/13zvokhOmfSexXNt6fDeFvjedllvLMJfZ?usp=sharing)  |512x512     | 0.251| 0.423|0.258  |
 29 | 
 30 | 
 31 | ## requirment
 32 | 
 33 | + tensorflow 1.14
 34 | 
 35 | + tensorpack 0.9.9  (for data provider)
 36 | 
 37 | + opencv
 38 | 
 39 | + python 3.6
 40 | 
 41 | + MNNConverter
 42 | 
 43 | + coremltools
 44 | 
 45 | ## useage
 46 | 
 47 | ### MSCOCO
 48 | 
 49 | #### train
 50 | 1. download mscoco data, then run `python prepare_coco_data.py --mscocodir ./mscoco`
 51 | 
 52 | 2. download pretrained model from
 53 | [mbv3-large0.75](https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_0.75_float.tgz)
 54 | relese it in the current dir.
 55 | 
 56 | 3. then, modify in config=mb3_config in train_config.py,  then run:
 57 | 
 58 |    ```python train.py```
 59 |    
 60 |    and if u want to check the data when training, u could set vis in confifs/mscoco/mbv3_config.py as True
 61 | 
 62 | 4. After training, freeze the model as .pb  by
 63 | 
 64 |     ` python tools/auto_freeze.py --pretrained_mobile ./model/yourmodel.ckpt`
 65 | 
 66 |     it will produce a detector.pb
 67 | 
 68 | 
 69 | #### evaluation
 70 | 
 71 | ```
 72 | python model_eval/custome_eval.py [--model [TRAINED_MODEL]] [--annFile [cocostyle annFile]]
 73 |                           [--imgDir [the images dir]] [--is_show [show the result]]
 74 | 
 75 | python model_eval/custome_eval.py --model model/detector.pb
 76 |                                 --annFile ../mscoco/annotations/instances_val2017.json
 77 |                                 --imgDir ../mscoco/val2017
 78 |                                 --is_show 1
 79 | 
 80 | ps, no test time augmentation is used.
 81 | ```
 82 | 
 83 | 
 84 | ### finetune
 85 | 1. download the trained model,
 86 | modify the config config.MODEL.pretrained_model='yourmodel.ckpt',
 87 | and set config.MODEL.continue_train=True
 88 | 2. `python train.py`
 89 | 
 90 | 
 91 | ### visualization
 92 | 
 93 | if u get a trained model and dont need to work on mobile device, run `python tools/auto_freeze.py`, it will read the checkpoint file in ./model, and produce detector.pb, then
 94 | 
 95 | `python visualization/vis.py`
 96 | 
 97 | u can check th code in visualization to make it runable, it's simple.
 98 | 
 99 | 
100 | ### model convert for mobile device
101 | I have carefully processed the postprocess, and it can works within the model, so it could be deployed end to end.
102 | 
103 | 4.1 MNN
104 | 
105 |     + 4.1.1 convert model
106 | 
107 |         just use the MNN converter, for example:
108 |         `./MNNConvert -f TF --modelFile detector.pb --MNNModel centernet.mnn --bizCode biz  --fp16 1`
109 | 
110 |     + 4.1.2 visualization with mnn python wrapper
111 | 
112 |         `python visualization/vis_with_mnn.py --mnn_model centernet.mnn --imgDir 'your image dir'`
113 | 
114 | 4.2 coreml
115 | 
116 |     + 4.2.1 convert
117 | 
118 |         `python tools/converter_to_coreml.py`
119 | 
120 |     + 4.2.2 visualization with coreml python wrapper
121 | 
122 |         `python visualization/vis_with_coreml.py --coreml_model centernet.mlmodel --imgDir 'your image dir'`
123 | 
124 | ps, if you want to do quantization, please reffer to the official doc, it is easy.
125 | 
126 | ### TODO: 
127 | - [ ] Android project.
128 | 


--------------------------------------------------------------------------------
/configs/mscoco/resnet_config.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | from easydict import EasyDict as edict
 6 | 
 7 | config = edict()
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"          ##if u use muti gpu set them visiable there and then set config.TRAIN.num_gpu
 9 | config.TRAIN = edict()
10 | 
11 | #### below are params for dataiter
12 | config.TRAIN.process_num = 4                      ### process_num for data provider
13 | config.TRAIN.prefetch_size = 20                  ### prefect Q size for data provider
14 | 
15 | config.TRAIN.num_gpu = 1                         ##match with   os.environ["CUDA_VISIBLE_DEVICES"]
16 | config.TRAIN.batch_size = 16                    ###A big batch size may achieve a better result, but the memory is a problem
17 | config.TRAIN.log_interval = 10
18 | config.TRAIN.epoch = 300                        ###just keep training , evaluation shoule be take care by yourself,
19 |                                                 ### generally 10,0000 iters is enough
20 | 
21 | config.TRAIN.train_set_size=117266              ###coco train size
22 | config.TRAIN.val_set_size=5000                  ###coco val size
23 | 
24 | config.TRAIN.iter_num_per_epoch = config.TRAIN.train_set_size // config.TRAIN.num_gpu // config.TRAIN.batch_size
25 | config.TRAIN.val_iter=config.TRAIN.val_set_size// config.TRAIN.num_gpu // config.TRAIN.batch_size
26 | 
27 | config.TRAIN.lr_value_every_step = [0.00001,0.0001,0.00025,0.0001,0.00001,0.000001]        ##warm up is used
28 | config.TRAIN.lr_decay_every_step = [500,1000,150000,200000,250000]
29 | 
30 | config.TRAIN.opt='adam'
31 | config.TRAIN.weight_decay_factor = 5.e-5                  ##l2 regular
32 | config.TRAIN.vis=False                                    ##check data flag
33 | config.TRAIN.mix_precision=True
34 | 
35 | config.TRAIN.norm='BN'    ##'GN' OR 'BN'
36 | config.TRAIN.lock_basenet_bn=False
37 | config.TRAIN.frozen_stages=-1   ##no freeze
38 | 
39 | config.DATA = edict()
40 | config.DATA.root_path=''
41 | config.DATA.train_txt_path='train.txt'
42 | config.DATA.val_txt_path='val.txt'
43 | config.DATA.num_category=80                                  ###face 1  voc 20 coco 80
44 | config.DATA.num_class = config.DATA.num_category         # +1 background
45 | 
46 | config.DATA.PIXEL_MEAN = [127.]                 ###rgb
47 | config.DATA.PIXEL_STD = [127.]
48 | 
49 | config.DATA.hin = 416  # input size
50 | config.DATA.win = 416
51 | config.DATA.channel = 3
52 | config.DATA.max_size=[config.DATA.hin,config.DATA.win]  ##h,w
53 | config.DATA.cover_small_face=0                          ###cover the small faces
54 | 
55 | config.DATA.mutiscale=False                #if muti scale set False  then config.DATA.MAX_SIZE will be the inputsize
56 | config.DATA.scales=(320,640)
57 | config.DATA.use_int8_data=True
58 | config.DATA.use_int8_enlarge=255.
59 | 
60 | # anchors -------------------------
61 | config.ANCHOR = edict()
62 | config.ANCHOR.rect=False
63 | config.ANCHOR.rect_longer=False       ####    make anchor h/w=1.5
64 | config.ANCHOR.ANCHOR_STRIDE = 16
65 | config.ANCHOR.ANCHOR_SIZES = (32, 64, 128, 256, 320)   # sqrtarea of the anchor box
66 | config.ANCHOR.ANCHOR_STRIDES = (8, 16, 32, 64, 128)  # strides for each FPN level. Must be the same length as ANCHOR_SIZES
67 | config.ANCHOR.ANCHOR_RATIOS = (0.25, 1., 4.) ######           squrae
68 | config.ANCHOR.ANCHOR_SCALES = (2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)) ######           1:2 in size,
69 | config.ANCHOR.POSITIVE_ANCHOR_THRESH = 0.5
70 | config.ANCHOR.NEGATIVE_ANCHOR_THRESH = 0.4
71 | 
72 | ##mobilenetv3 as basemodel
73 | config.MODEL = edict()
74 | config.MODEL.continue_train=False ### revover from a trained model
75 | config.MODEL.model_path = './model/'  # save directory
76 | config.MODEL.net_structure='resnet_v2_50' ######'resnet_v1_50,resnet_v1_101,MobilenetV2
77 | config.MODEL.pretrained_model='resnet_v2_50.ckpt'
78 | config.MODEL.fpn_dims=[256,256,256,256,256]
79 | config.MODEL.face=False
80 | config.MODEL.min_overlap=0.7
81 | 
82 | config.MODEL.focal_loss=True
83 | config.MODEL.fpn=True
84 | config.MODEL.max_negatives_per_positive= 3.0
85 | 
86 | 
87 | config.MODEL.deployee= False    ### tensorflow, mnn, coreml
88 | if config.MODEL.deployee:
89 |     config.TRAIN.batch_size = 1
90 | 
91 | config.MODEL.iou_thres= 0.05
92 | config.MODEL.score_thres= 0.3
93 | config.MODEL.max_box= 1500
94 | 


--------------------------------------------------------------------------------
/lib/core/anchor/tf_anchors.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import sys
  4 | sys.path.append('.')
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | from train_config import config as cfg
  8 | 
  9 | from lib.core.anchor.anchor import CellAnchor
 10 | 
 11 | 
 12 | def get_all_anchors(max_size,stride=None, sizes=None):
 13 |     """
 14 |     Get all anchors in the largest possible image, shifted, floatbox
 15 |     Args:
 16 |         max_size(int) : h w
 17 |         stride (int): the stride of anchors.
 18 |         sizes (tuple[int]): the sizes (sqrt area) of anchors
 19 | 
 20 |     Returns:
 21 |         anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
 22 |         The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
 23 | 
 24 |     """
 25 |     if stride is None:
 26 |         stride = cfg.ANCHOR.ANCHOR_STRIDE
 27 |     if sizes is None:
 28 |         sizes = cfg.ANCHOR.ANCHOR_SIZES
 29 |     # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
 30 |     # are centered on stride / 2, have (approximate) sqrt areas of the specified
 31 |     # sizes, and aspect ratios as given.
 32 |     cell_anchors = CellAnchor.generate_cell_anchor(
 33 |         stride,
 34 |         scales=np.array(sizes, dtype=np.float32) / stride,
 35 |         ratios=np.array(cfg.ANCHOR.ANCHOR_RATIOS, dtype=np.float32))
 36 |     # anchors are intbox here.
 37 |     # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
 38 | 
 39 | 
 40 |     field_size_y = tf.cast(tf.ceil(max_size[0] / stride), tf.float32)
 41 |     field_size_x = tf.cast(tf.ceil(max_size[1] / stride), tf.float32)
 42 |     shifts_x = tf.range(0, field_size_x) * stride
 43 |     shifts_y = tf.range(0, field_size_y) * stride
 44 |     shift_x, shift_y = tf.meshgrid(shifts_x, shifts_y)
 45 | 
 46 |     shift_x = tf.reshape(shift_x,shape=[1,-1])
 47 |     shift_y = tf.reshape(shift_y,shape=[1,-1])
 48 | 
 49 |     shifts = tf.transpose(tf.concat((shift_x, shift_y, shift_x, shift_y),axis=0))
 50 |     # Kx4, K = field_size * field_size
 51 |     K = shifts.shape[0]
 52 |     A = cell_anchors.shape[0]
 53 | 
 54 |     field_of_anchors = (
 55 |         tf.reshape(cell_anchors,shape=[1, A, 4]) +
 56 |         tf.transpose(tf.reshape(shifts,shape=[1, -1, 4]),(1, 0, 2)))
 57 | 
 58 |     field_of_anchors = tf.reshape(field_of_anchors,shape=(field_size_y, field_size_x, A, 4))
 59 | 
 60 |     # FSxFSxAx4
 61 |     # Many rounding happens inside the anchor code anyway
 62 |     # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
 63 | 
 64 |     ##scale it to 0 - 1
 65 | 
 66 |     h=tf.cast(max_size[0],tf.float32)
 67 |     w=tf.cast(max_size[1],tf.float32)
 68 | 
 69 |     _xx0 = (field_of_anchors[:, :, :, 0:1])/w
 70 |     _xx1 = (field_of_anchors[:, :, :, 1:2])/h
 71 |     _xx2 = (field_of_anchors[:, :, :, 2:3]+1)/w
 72 |     _xx3 = (field_of_anchors[:, :, :, 3:4]+1)/h
 73 |     field_of_anchors=tf.concat([_xx0,_xx1,_xx2,_xx3],axis=3)
 74 | 
 75 |     return field_of_anchors
 76 | 
 77 | def get_all_anchors_fpn(strides=None, sizes=None,scales=None,max_size=[640,640]):
 78 |     """
 79 |     Returns:
 80 |         [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
 81 |     """
 82 |     if strides is None:
 83 |         strides = cfg.ANCHOR.ANCHOR_STRIDES
 84 |     if sizes is None:
 85 |         sizes = cfg.ANCHOR.ANCHOR_SIZES
 86 |     if scales is None:
 87 |         scales = cfg.ANCHOR.ANCHOR_SCALES
 88 |     if max_size is None:
 89 |         max_size= [cfg.DATA.max_size,cfg.DATA.max_size]
 90 | 
 91 |     assert len(strides) == len(sizes)
 92 |     foas = []
 93 |     for stride, size in zip(strides, sizes):
 94 |         sizes_ = size * np.array(scales)
 95 |         foa = get_all_anchors(stride=stride, sizes=sizes_,max_size=max_size)
 96 | 
 97 |         foas.append(foa)
 98 | 
 99 |     flatten_anchors_per_level = [tf.reshape(k,shape=(-1, 4)) for k in foas]
100 |     anchors = tf.concat(flatten_anchors_per_level, axis=0)
101 | 
102 |     ###concat them
103 |     return anchors
104 | 
105 | 
106 | if __name__=='__main__':
107 |     import cv2
108 |     anchors=get_all_anchors_fpn(max_size=[640,640])
109 | 
110 |     init = tf.global_variables_initializer()
111 |     with tf.Session() as sess:
112 |         sess.run(init)
113 |         anchors=sess.run(anchors)
114 | 
115 |     anchors=np.array(anchors)
116 |     print(anchors.shape)
117 |     image = np.ones(shape=[cfg.DATA.max_size, cfg.DATA.max_size, 3]) * 255
118 |     for i in range(0,anchors.shape[0]):
119 |         box=anchors[i]
120 |         print(int(round((box[2]-box[0])*cfg.DATA.max_size)))
121 |         cv2.rectangle(image, (int(round(box[0]*cfg.DATA.max_size)), int(round(box[1]*cfg.DATA.max_size))),
122 |                       (int(round(box[2]*cfg.DATA.max_size)), int(round(box[3]*cfg.DATA.max_size))), (255, 0, 0), 1)
123 | 
124 |         cv2.namedWindow('anchors',0)
125 |         cv2.imshow('anchors',image)
126 |         cv2.waitKey(0)


--------------------------------------------------------------------------------
/lib/core/model/head/centernet_head.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | 
  4 | import tensorflow as tf
  5 | import tensorflow.contrib.slim as slim
  6 | from lib.core.model.net.arg_scope.resnet_args_cope import resnet_arg_scope
  7 | from train_config import config as cfg
  8 | 
  9 | from lib.core.model.sqeeze_excitation.se import se
 10 | 
 11 | class CenternetHead():
 12 | 
 13 |     def __call__(self, fms, training=True):
 14 |         arg_scope = resnet_arg_scope( bn_is_training=training, )
 15 |         with slim.arg_scope(arg_scope):
 16 |             with tf.variable_scope('CenternetHead'):
 17 |                 # c2, c3, c4, c5 = fms
 18 |                 # deconv_feature=c5
 19 | 
 20 |                 deconv_feature = self._unet_magic(fms)
 21 | 
 22 |                 #####
 23 | 
 24 |                 kps = slim.separable_conv2d(deconv_feature,
 25 |                                   cfg.DATA.num_class,
 26 |                                   [3, 3],
 27 |                                   stride=1,
 28 |                                   activation_fn=None,
 29 |                                   normalizer_fn=None,
 30 |                                   weights_initializer=tf.initializers.random_normal(stddev=0.001),
 31 |                                   biases_initializer=tf.initializers.constant(-2.19),
 32 |                                   scope='centernet_cls_output')
 33 | 
 34 | 
 35 |                 wh = slim.separable_conv2d(deconv_feature,
 36 |                                  4,
 37 |                                  [3, 3],
 38 |                                  stride=1,
 39 |                                  activation_fn=None,
 40 |                                  normalizer_fn=None,
 41 |                                  weights_initializer=tf.initializers.random_normal(stddev=0.001),
 42 |                                  biases_initializer=tf.initializers.constant(0),
 43 |                                  scope='centernet_wh_output')
 44 | 
 45 |         return kps, wh*16
 46 | 
 47 |     def _complex_upsample(self,fm,output_dim, factor=2,scope='upsample'):
 48 |         with tf.variable_scope(scope):
 49 | 
 50 | 
 51 |             x = slim.separable_conv2d(fm,
 52 |                                        output_dim,
 53 |                                        [3, 3],
 54 |                                        activation_fn=None,
 55 |                                        padding='SAME',
 56 |                                        scope='branch_x_upsample_resize')
 57 |             y = slim.separable_conv2d(fm,
 58 |                                        output_dim,
 59 |                                        [5, 5],
 60 |                                        activation_fn=None,
 61 |                                        padding='SAME',
 62 |                                        scope='branch_y_upsample_resize')
 63 |             final = x+y
 64 |             final = tf.keras.layers.UpSampling2D(data_format='channels_last', interpolation='bilinear',
 65 |                                                           size=(factor, factor))(final)
 66 | 
 67 |             return final
 68 | 
 69 |     def revers_conv(self,fm,output_dim,k_size,refraction=4,scope='boring'):
 70 | 
 71 |         input_channel = fm.shape[3].value
 72 | 
 73 |         mid_channels=input_channel//refraction
 74 |         with tf.variable_scope(scope):
 75 |             fm_bypass = slim.conv2d(fm,
 76 |                              mid_channels,
 77 |                              [1, 1],
 78 |                              padding='SAME',
 79 |                              scope='1x1')
 80 | 
 81 |             fm_bypass = slim.separable_conv2d(fm_bypass,
 82 |                                               output_dim,
 83 |                                               [k_size, k_size],
 84 |                                               activation_fn=None,
 85 |                                               padding='SAME',
 86 |                                               scope='3x3')
 87 | 
 88 | 
 89 |             return fm_bypass
 90 | 
 91 |     def _unet_magic(self, fms, dims=cfg.MODEL.head_dims):
 92 | 
 93 |         c2, c3, c4, c5 = fms
 94 | 
 95 |         ####24, 116, 232, 464,
 96 | 
 97 |         c5_upsample = self._complex_upsample(c5, output_dim= dims[0]//2,factor=2, scope='c5_upsample')
 98 |         c4 = self.revers_conv(c4,  dims[0]//2, k_size=5, scope='c4_reverse')
 99 |         p4=tf.nn.relu(tf.concat([c4,c5_upsample],axis=3))
100 | 
101 |         c4_upsample = self._complex_upsample(p4, output_dim= dims[1]//2, factor=2,scope='c4_upsample')
102 |         c3 = self.revers_conv(c3,  dims[1]//2, k_size=5, scope='c3_reverse')
103 |         p3=tf.nn.relu(tf.concat([c3,c4_upsample],axis=3))
104 | 
105 |         c3_upsample = self._complex_upsample(p3, output_dim= dims[2]//2,factor=2, scope='c3_upsample')
106 |         c2 = self.revers_conv(c2, dims[2]//2,k_size=5,scope='c2_reverse')
107 |         p2=tf.nn.relu(tf.concat([c2,c3_upsample],axis=3))
108 | 
109 |         final = se(p2, dims[2])
110 | 
111 |         return final
112 | 
113 | 


--------------------------------------------------------------------------------
/lib/dataset/augmentor/test.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import random
  6 | import augmentor
  7 | 
  8 | 
  9 | 
 10 | 
 11 | ####CAUTION the data is from pytorch tutorial ,
 12 | ###download from url=https://download.pytorch.org/tutorial/faces.zip
 13 | ##### and i find some of them are not labeled very well
 14 | 
 15 | csv_file='faces/face_landmarks.csv'
 16 | 
 17 | ###parse the scv
 18 | label_file=csv.reader(open(csv_file,'r'))
 19 | 
 20 | 
 21 | for _,single_sample in enumerate(label_file):
 22 |     if _==0:
 23 |         ##drop the header in csvfile
 24 |         continue
 25 | 
 26 |     image_path=os.path.join('faces',single_sample[0])
 27 |     label=np.array(single_sample[1:]).reshape([-1,2]).astype(np.int)
 28 |     img=cv2.imread(image_path)
 29 |     for _index in range(label.shape[0]):
 30 |         x_y=label[_index]
 31 |         cv2.circle(img,center=(x_y[0],x_y[1]),color=(122,122,122),radius=2,thickness=2)
 32 | 
 33 |     cv2.imshow('raw',img)
 34 | 
 35 |     ##first make it rotate with label
 36 |     img = cv2.imread(image_path)
 37 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 38 |     angle=random.uniform(-180,180)
 39 |     img,aug_label=augmentor.Rotate_aug(img,label=label,angle=angle)
 40 |     for _index in range(aug_label.shape[0]):
 41 |         x_y=aug_label[_index]
 42 |         cv2.circle(img,center=(x_y[0],x_y[1]),color=(122,122,122),radius=2,thickness=2)
 43 |     cv2.imshow('rotate with label',img)
 44 | 
 45 |     ##first make it rotate without label
 46 |     img = cv2.imread(image_path)
 47 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 48 |     angle = random.uniform(-180, 180)
 49 |     img, _ = augmentor.Rotate_aug(img, angle=angle)
 50 |     cv2.imshow('rotate without label', img)
 51 | 
 52 |     ##first make it Affine_aug with label
 53 |     img = cv2.imread(image_path)
 54 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 55 |     strength=random.uniform(0,100)
 56 |     img, aug_label = augmentor.Affine_aug(img,strength=strength,label=label)
 57 |     for _index in range(aug_label.shape[0]):
 58 |         x_y = aug_label[_index]
 59 |         cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2)
 60 |     cv2.imshow('Affine transform with label', img)
 61 | 
 62 | 
 63 | 
 64 |     ###padding  with a target shape
 65 |     img = cv2.imread(image_path)
 66 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 67 |     img,aug_label = augmentor.Fill_img(img,target_height=480,target_width=640,label=label)
 68 |     for _index in range(aug_label.shape[0]):
 69 |         x_y = aug_label[_index]
 70 |         cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2)
 71 |     cv2.imshow('padding transform with label', img)
 72 | 
 73 |     ##blur
 74 |     img = cv2.imread(image_path)
 75 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 76 |     strength = random.uniform(0, 60)
 77 |     img = augmentor.Blur_aug(img, ksize=(7,7))
 78 |     for _index in range(label.shape[0]):
 79 |         x_y = label[_index]
 80 |         cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2)
 81 |     cv2.imshow('blur transform with label', img)
 82 | 
 83 |     ##img dropout
 84 |     img = cv2.imread(image_path)
 85 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 86 |     strength = random.uniform(0, 60)
 87 |     img = augmentor.Img_dropout(img, max_pattern_ratio=0.4)
 88 |     for _index in range(label.shape[0]):
 89 |         x_y = label[_index]
 90 |         cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2)
 91 |     cv2.imshow('img_dropout transform with label', img)
 92 | 
 93 |     ##mirror
 94 | 
 95 |     img = cv2.imread(image_path)
 96 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int)
 97 |     strength = random.uniform(0, 60)
 98 |     ####need symmetry to swap from left and right, the symmetry need change for u data
 99 |     symmetry=[(0, 16), (1, 15), (2, 14), (3, 13), (4, 12), (5, 11), (6, 10), (7, 9),(8,8),
100 |               (17,26),(18,25),(19,24),(20,23),(21,22),
101 |               (31,35),(32,34),
102 |               (36,45),(37,44),(38,43),(39,42),(40,47),(41,46),
103 |               (48,54),(49,53),(50,52),(55,59),(56,58),(60,64),(61,63),(65,67)]
104 |     img,aug_label = augmentor.Mirror(img, label=label,symmetry=symmetry)
105 |     for _index in range(aug_label.shape[0]):
106 |         x_y = aug_label[_index]
107 |         cv2.circle(img, center=(x_y[0], x_y[1]), color=(122, 122, 122), radius=2, thickness=2)
108 |     cv2.imshow('flip transform with label', img)
109 | 
110 |     ###heatmaps
111 | 
112 |     label = np.array(single_sample[1:]).reshape([-1, 2]).astype(np.int).T
113 |     heat_map_size=img.shape[0:2]
114 |     heat=augmentor.produce_heat_maps(label,heat_map_size,1,1)
115 |     augmentor.visualize_heatmap_target(heat)##visualise
116 | 
117 | 
118 | 
119 |     cv2.waitKey(0)
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/model_eval/fddb.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | import sys
  3 | sys.path.append('.')
  4 | import numpy as np
  5 | import os
  6 | import cv2
  7 | from tqdm import tqdm
  8 | import argparse
  9 | 
 10 | from lib.core.api.face_detector import FaceDetector
 11 | 
 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 13 | 
 14 | ap = argparse.ArgumentParser()
 15 | ap.add_argument( "--model", required=False, default='./model/detector.pb', help="model to eval:")
 16 | ap.add_argument( "--is_show", required=False, default=False, help="show result or not?")
 17 | ap.add_argument( "--data_dir", required=False, default="./FDDB/img", help="dir to img")
 18 | ap.add_argument( "--split_dir", required=False,default='./FDDB/FDDB-folds',help="dir to FDDB-folds")
 19 | ap.add_argument( "--result", required=False,default='./result',help="dir to write result")
 20 | args = ap.parse_args()
 21 | 
 22 | 
 23 | IMAGES_DIR = args.data_dir
 24 | ANNOTATIONS_PATH = args.split_dir
 25 | RESULT_DIR = args.result
 26 | MODEL_PATH = args.model
 27 | 
 28 | face_detector = FaceDetector([MODEL_PATH])
 29 | 
 30 | 
 31 | annotations = [s for s in os.listdir(ANNOTATIONS_PATH) if s.endswith('ellipseList.txt')]
 32 | image_lists = [s for s in os.listdir(ANNOTATIONS_PATH) if not s.endswith('ellipseList.txt')]
 33 | annotations = sorted(annotations)
 34 | image_lists = sorted(image_lists)
 35 | 
 36 | images_to_use = []
 37 | for n in image_lists:
 38 |     with open(os.path.join(ANNOTATIONS_PATH, n)) as f:
 39 |         images_to_use.extend(f.readlines())
 40 | 
 41 | images_to_use = [s.strip() for s in images_to_use]
 42 | with open(os.path.join(RESULT_DIR, 'faceList.txt'), 'w') as f:
 43 |     for p in images_to_use:
 44 |         f.write(p + '\n')
 45 | 
 46 | 
 47 | ellipses = []
 48 | for n in annotations:
 49 |     with open(os.path.join(ANNOTATIONS_PATH, n)) as f:
 50 |         ellipses.extend(f.readlines())
 51 | 
 52 | i = 0
 53 | with open(os.path.join(RESULT_DIR, 'ellipseList.txt'), 'w') as f:
 54 |     for p in ellipses:
 55 | 
 56 |         # check image order
 57 |         if 'big/img' in p:
 58 |             assert images_to_use[i] in p
 59 |             i += 1
 60 | 
 61 |         f.write(p)
 62 | 
 63 | def bbox_vote(det):
 64 |     order = det[:, 4].ravel().argsort()[::-1]
 65 |     det = det[order, :]
 66 |     while det.shape[0] > 0:
 67 |         # IOU
 68 |         area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
 69 |         xx1 = np.maximum(det[0, 0], det[:, 0])
 70 |         yy1 = np.maximum(det[0, 1], det[:, 1])
 71 |         xx2 = np.minimum(det[0, 2], det[:, 2])
 72 |         yy2 = np.minimum(det[0, 3], det[:, 3])
 73 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 74 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 75 |         inter = w * h
 76 |         o = inter / (area[0] + area[:] - inter)
 77 | 
 78 |         # get needed merge det and delete these det
 79 |         merge_index = np.where(o >= 0.3)[0]
 80 |         det_accu = det[merge_index, :]
 81 |         det = np.delete(det, merge_index, 0)
 82 | 
 83 |         if merge_index.shape[0] <= 1:
 84 |             continue
 85 |         det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
 86 |         max_score = np.max(det_accu[:, 4])
 87 |         det_accu_sum = np.zeros((1, 5))
 88 |         det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
 89 |         det_accu_sum[:, 4] = max_score
 90 |         try:
 91 |             dets = np.row_stack((dets, det_accu_sum))
 92 |         except:
 93 |             dets = det_accu_sum
 94 |     try:
 95 |         dets = dets[0:750, :]
 96 |     except:
 97 |         dets=[]
 98 |     return dets
 99 | 
100 | predictions = []
101 | for n in tqdm(images_to_use):
102 |     image_array = cv2.imread(os.path.join(IMAGES_DIR, n) + '.jpg')
103 |     image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
104 |     # threshold is important to set low
105 | 
106 | 
107 |     boxes = face_detector(image_array, score_threshold=0.05)
108 | 
109 |     boxes=boxes[:,0:5]
110 |     ##flip det
111 |     flip_img=np.flip(image_array,1)
112 | 
113 |     boxes_flip_ = face_detector(flip_img, score_threshold=0.05)
114 |     boxes_flip_ = boxes_flip_[:, 0:5]
115 | 
116 |     boxes_flip = np.zeros(boxes_flip_.shape)
117 |     boxes_flip[:, 0] = flip_img.shape[1] - boxes_flip_[:, 2]
118 |     boxes_flip[:, 1] = boxes_flip_[:, 1]
119 |     boxes_flip[:, 2] = flip_img.shape[1] - boxes_flip_[:, 0]
120 |     boxes_flip[:, 3] = boxes_flip_[:, 3]
121 |     boxes_flip[:, 4] = boxes_flip_[:, 4]
122 | 
123 |     #####
124 |     det = np.row_stack((boxes, boxes_flip))
125 | 
126 |     dets = bbox_vote(det)
127 | 
128 |     if args.is_show:
129 |         for bbox in dets:
130 |             if bbox[4] > 0.3:
131 |                 # cv2.circle(img_show,(p[0],p[1]),3,(0,0,213),-1)
132 |                 cv2.rectangle(image_array, (int(bbox[0]), int(bbox[1])),
133 |                               (int(bbox[2]), int(bbox[3])), (255, 0, 0), 7)
134 |         cv2.imshow('tmp', image_array)
135 |         cv2.waitKey(0)
136 | 
137 | 
138 |     ###
139 | 
140 | 
141 |     predictions.append((n, dets[:,0:4], dets[:,4]))
142 | 
143 | 
144 | with open(os.path.join(RESULT_DIR, 'detections.txt'), 'w') as f:
145 |     for n, boxes, scores in predictions:
146 |         f.write(n + '\n')
147 |         f.write(str(len(boxes)) + '\n')
148 |         for b, s in zip(boxes, scores):
149 |             xmin, ymin, xmax, ymax = b
150 |             h, w = int(ymax - ymin+1), int(xmax - xmin+1)
151 |             f.write('{0} {1} {2} {3} {4:.4f}\n'.format(int(xmin), int(ymin), w, h, s))
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/lib/dataset/augmentor/visual_augmentation.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import random
  4 | 
  5 | def pixel_jitter(src,p=0.5,max_=5.):
  6 | 
  7 |     src=src.astype(np.float32)
  8 |     if random.uniform(0, 1) < p:
  9 |         pattern=(np.random.rand(src.shape[0], src.shape[1],src.shape[2])-0.5)*2*max_
 10 |         img = src + pattern
 11 | 
 12 |         img[img<0]=0
 13 |         img[img >255] = 255
 14 | 
 15 |         img = img.astype(np.uint8)
 16 | 
 17 |         return img
 18 |     else:
 19 |         src = src.astype(np.uint8)
 20 |         return src
 21 | 
 22 | def gray(src):
 23 |     g_img=cv2.cvtColor(src,cv2.COLOR_RGB2GRAY)
 24 |     src[:,:,0]=g_img
 25 |     src[:,:,1]=g_img
 26 |     src[:,:,2]=g_img
 27 |     return src
 28 | 
 29 | def swap_change(src):
 30 |     a = [0,1,2]
 31 | 
 32 |     k = random.sample(a, 3)
 33 | 
 34 |     res=src.copy()
 35 |     res[:,:,0]=src[:,:,k[0]]
 36 |     res[:, :, 1] = src[:, :, k[1]]
 37 |     res[:, :, 2] = src[:, :, k[2]]
 38 |     return res
 39 | 
 40 | 
 41 | def Img_dropout(src,max_pattern_ratio=0.05):
 42 |     pattern=np.ones_like(src)
 43 |     width_ratio = random.uniform(0, max_pattern_ratio)
 44 |     height_ratio = random.uniform(0, max_pattern_ratio)
 45 |     width=src.shape[1]
 46 |     height=src.shape[0]
 47 |     block_width=width*width_ratio
 48 |     block_height=height*height_ratio
 49 |     width_start=int(random.uniform(0,width-block_width))
 50 |     width_end=int(width_start+block_width)
 51 |     height_start=int(random.uniform(0,height-block_height))
 52 |     height_end=int(height_start+block_height)
 53 |     pattern[height_start:height_end,width_start:width_end,:]=0
 54 |     img=src*pattern
 55 |     return img
 56 | 
 57 | 
 58 | 
 59 | def blur_heatmap(src, ksize=(3, 3)):
 60 |     for i in range(src.shape[2]):
 61 |         src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 0)
 62 |         amin, amax = src[:, :, i].min(), src[:, :, i].max()  # 求最大最小值
 63 |         if amax>0:
 64 |             src[:, :, i] = (src[:, :, i] - amin) / (amax - amin)  # (矩阵元素-最小值)/(最大值-最小值)
 65 |     return src
 66 | def blur(src,ksize=(3,3)):
 67 |     for i in range(src.shape[2]):
 68 |         src[:, :, i]=cv2.GaussianBlur(src[:, :, i],ksize,1.5)
 69 |     return src
 70 | 
 71 | 
 72 | 
 73 | 
 74 | def adjust_contrast(image, factor):
 75 |     """ Adjust contrast of an image.
 76 | 
 77 |     Args
 78 |         image: Image to adjust.
 79 |         factor: A factor for adjusting contrast.
 80 |     """
 81 |     mean = image.mean(axis=0).mean(axis=0)
 82 |     return _clip((image - mean) * factor + mean)
 83 | 
 84 | 
 85 | def adjust_brightness(image, delta):
 86 |     """ Adjust brightness of an image
 87 | 
 88 |     Args
 89 |         image: Image to adjust.
 90 |         delta: Brightness offset between -1 and 1 added to the pixel values.
 91 |     """
 92 |     return _clip(image + delta * 255)
 93 | 
 94 | 
 95 | def adjust_hue(image, delta):
 96 |     """ Adjust hue of an image.
 97 | 
 98 |     Args
 99 |         image: Image to adjust.
100 |         delta: An interval between -1 and 1 for the amount added to the hue channel.
101 |                The values are rotated if they exceed 180.
102 |     """
103 |     image[..., 0] = np.mod(image[..., 0] + delta * 180, 180)
104 |     return image
105 | 
106 | 
107 | def adjust_saturation(image, factor):
108 |     """ Adjust saturation of an image.
109 | 
110 |     Args
111 |         image: Image to adjust.
112 |         factor: An interval for the factor multiplying the saturation values of each pixel.
113 |     """
114 |     image[..., 1] = np.clip(image[..., 1] * factor, 0, 255)
115 |     return image
116 | 
117 | 
118 | def _clip(image):
119 |     """
120 |     Clip and convert an image to np.uint8.
121 | 
122 |     Args
123 |         image: Image to clip.
124 |     """
125 |     return np.clip(image, 0, 255).astype(np.uint8)
126 | def _uniform(val_range):
127 |     """ Uniformly sample from the given range.
128 | 
129 |     Args
130 |         val_range: A pair of lower and upper bound.
131 |     """
132 |     return np.random.uniform(val_range[0], val_range[1])
133 | 
134 | 
135 | class ColorDistort():
136 | 
137 |     def __init__(
138 |             self,
139 |             contrast_range=(0.8, 1.2),
140 |             brightness_range=(-.2, .2),
141 |             hue_range=(-0.1, 0.1),
142 |             saturation_range=(0.8, 1.2)
143 |     ):
144 |         self.contrast_range = contrast_range
145 |         self.brightness_range = brightness_range
146 |         self.hue_range = hue_range
147 |         self.saturation_range = saturation_range
148 | 
149 |     def __call__(self, image):
150 | 
151 | 
152 |         if self.contrast_range is not None:
153 |             contrast_factor = _uniform(self.contrast_range)
154 |             image = adjust_contrast(image,contrast_factor)
155 |         if self.brightness_range is not None:
156 |             brightness_delta = _uniform(self.brightness_range)
157 |             image = adjust_brightness(image, brightness_delta)
158 | 
159 |         if self.hue_range is not None or self.saturation_range is not None:
160 | 
161 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
162 | 
163 |             if self.hue_range is not None:
164 |                 hue_delta = _uniform(self.hue_range)
165 |                 image = adjust_hue(image, hue_delta)
166 | 
167 |             if self.saturation_range is not None:
168 |                 saturation_factor = _uniform(self.saturation_range)
169 |                 image = adjust_saturation(image, saturation_factor)
170 | 
171 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
172 | 
173 |         return image
174 | 
175 | 
176 | 
177 | 
178 | class DsfdVisualAug():
179 |     pass


--------------------------------------------------------------------------------
/model_eval/custome_eval.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | 
  4 | sys.path.append('.')
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | import argparse
 11 | from tqdm import tqdm
 12 | from pycocotools.coco import COCO
 13 | from pycocotools.cocoeval import COCOeval
 14 | 
 15 | from train_config import config as cfg
 16 | from lib.core.api.face_detector import FaceDetector
 17 | 
 18 | 
 19 | 
 20 | ap = argparse.ArgumentParser()
 21 | ap.add_argument("--model", required=True, default='./model/detector.pb', help="model to eval:")
 22 | ap.add_argument("--annFile", required=True, default='./model_eval/DatasetTest_cocoStyle.json', help="coco style json")
 23 | ap.add_argument("--imgDir", required=True, default='', help="coco style json")
 24 | ap.add_argument("--is_show", required=False, default=0,type=int, help="show result or not?")
 25 | args = ap.parse_args()
 26 | 
 27 | MODEL_PATH = args.model
 28 | IMAGE_DIR = args.imgDir
 29 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 30 | detector = FaceDetector(['./model/detector.pb'])
 31 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'),
 32 |             6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'),
 33 |             11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'),
 34 |             16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'),
 35 |             21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'),
 36 |             26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'),
 37 |             31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'),
 38 |             35: (40, 'baseball glove'),
 39 |             36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'),
 40 |             40: (46, 'wine glass'),
 41 |             41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'),
 42 |             46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'),
 43 |             51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'),
 44 |             56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'),
 45 |             61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'),
 46 |             66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'),
 47 |             71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'),
 48 |             76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')}
 49 | 
 50 | 
 51 | def predict_box():
 52 |     annFile = args.annFile
 53 |     cocoGt = COCO(annFile)
 54 |     imgIds = sorted(cocoGt.getImgIds())
 55 | 
 56 |     res_coco = []
 57 | 
 58 |     for img_id in tqdm(imgIds):
 59 | 
 60 |         fname=os.path.join(IMAGE_DIR,cocoGt.loadImgs(img_id)[0]['file_name'])
 61 | 
 62 |         image = cv2.imread(fname)
 63 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 64 |         h, w, _ = image.shape
 65 |         image_show = image.copy()
 66 | 
 67 |         if args.is_show:
 68 |             detect_res = detector(image, 0.3, input_shape=(cfg.DATA.hin, cfg.DATA.win),max_boxes=1500)
 69 |         else:
 70 |             detect_res =detector(image,0.05,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1500)
 71 | 
 72 |         if args.is_show:
 73 |             for i in range(detect_res.shape[0]):
 74 |                 one_box = detect_res[i]
 75 |                 str_draw = '%s:%.2f' %(coco_map[int(one_box[5])][1],one_box[4])
 76 | 
 77 |                 cv2.rectangle(image_show, (int(one_box[0]), int(one_box[1])), (int(one_box[2]), int(one_box[3])),
 78 |                               (0, 255, 0), 2)
 79 |                 cv2.putText(image_show, str_draw, (int(one_box[0]), int(one_box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1,
 80 |                             (255, 0, 255), 2)
 81 |             cv2.namedWindow('ss',0)
 82 |             cv2.imshow('ss', image_show)
 83 |             cv2.waitKey(0)
 84 | 
 85 |         for i in range(detect_res.shape[0]):
 86 |             one_box = detect_res[i]
 87 |             one_box=[float(x) for x in one_box]
 88 |             box = [one_box[0], one_box[1], one_box[2] - one_box[0], one_box[3] - one_box[1]]
 89 | 
 90 |             res_coco.append({
 91 |                 'bbox': box,
 92 |                 'category_id': coco_map[int(one_box[5])][0],
 93 |                 'image_id': img_id,
 94 |                 'score': one_box[4]
 95 |             })
 96 | 
 97 |     with open('bbox_result.json', 'w') as f_dump:
 98 |         json.dump(res_coco, f_dump, indent=2)
 99 | 
100 | 
101 | def eval_box():
102 | 
103 |     import pylab
104 |     pylab.rcParams['figure.figsize'] = (10.0, 8.0)
105 |     annType = ['segm', 'bbox', 'keypoints']
106 |     annType = annType[1]  # specify type here
107 |     print('Running for *%s* results.' % (annType))
108 |     # initialize COCO ground truth api
109 |     annFile = args.annFile
110 |     cocoGt = COCO(annFile)
111 |     catIds = cocoGt.getCatIds()
112 |     print(catIds)
113 |     imgIds = sorted(cocoGt.getImgIds(catIds=catIds))
114 |     # initialize COCO detections api
115 |     resFile = './bbox_result.json'
116 |     cocoDt = cocoGt.loadRes(resFile)
117 |     # running evaluation
118 |     cocoEval = COCOeval(cocoGt, cocoDt, annType)
119 |     # cocoEval.params.imgIds  = imgIds
120 |     #catIds=5
121 |     cocoEval.params.catIds = catIds
122 |     cocoEval.evaluate()
123 |     cocoEval.accumulate()
124 |     cocoEval.summarize()
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     predict_box()
129 |     eval_box()
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/lib/core/model/centernet.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import tensorflow.contrib.slim as slim
  5 | from lib.core.anchor.box_utils import batch_decode,batch_decode_fix
  6 | 
  7 | from lib.core.model.net.shufflenet.shufflenetv2_5x5 import ShuffleNetV2_5x5
  8 | from lib.core.model.net.shufflenet.shufflenetv2plus import ShufflenetV2Plus
  9 | from lib.core.model.net.mobilenetv3.backbone import mobilenetv3_large_detection
 10 | from lib.core.model.net.mobilenet.backbone import mobilenet_ssd
 11 | from lib.core.model.net.resnet.backbone import resnet_ssd
 12 | from lib.core.model.loss.centernet_loss import loss
 13 | 
 14 | from train_config import config as cfg
 15 | 
 16 | from lib.helper.logger import logger
 17 | 
 18 | from lib.core.model.head.centernet_head import CenternetHead
 19 | 
 20 | class Centernet():
 21 | 
 22 |     def __init__(self,):
 23 |         if "ShuffleNetV2_Plus"  in cfg.MODEL.net_structure:
 24 |             self.backbone=ShufflenetV2Plus                 ### it is a func
 25 |         elif "ShuffleNetV2_5x5"  in cfg.MODEL.net_structure:
 26 |             self.backbone=ShuffleNetV2_5x5
 27 |         elif "MobilenetV2" in cfg.MODEL.net_structure:
 28 |             self.backbone = mobilenet_ssd
 29 |         elif "MobilenetV3" in cfg.MODEL.net_structure:
 30 |             self.backbone = mobilenetv3_large_detection
 31 |         elif "resnet_v2_50" in cfg.MODEL.net_structure:
 32 |             self.backbone = resnet_ssd
 33 |         self.head=CenternetHead()                         ### it is a class
 34 | 
 35 |         self.top_k_results_output=cfg.MODEL.max_box
 36 | 
 37 |     def forward(self,inputs,hm_target, wh_target,weights_,training_flag):
 38 | 
 39 |         ## process the label
 40 |         if cfg.DATA.use_int8_data:
 41 |             inputs,hm_target,wh_target,weights_=self.process_label(inputs,hm_target,wh_target,weights_)
 42 | 
 43 |         ###preprocess
 44 |         #inputs=self.preprocess(inputs)
 45 | 
 46 |         ### extract feature maps
 47 |         origin_fms=self.backbone(inputs,training_flag)
 48 | 
 49 |         kps_predicts,wh_predicts = self.head(origin_fms, training_flag)
 50 |         kps_predicts= tf.nn.sigmoid(kps_predicts)
 51 |         ### calculate loss
 52 |         hm_loss,wh_loss = loss(predicts=[kps_predicts,wh_predicts] ,targets=[hm_target,wh_target,weights_])
 53 | 
 54 |         kps_predicts = tf.identity(kps_predicts, name='keypoints')
 55 |         wh_predicts = tf.identity(wh_predicts, name='wh')
 56 | 
 57 |         self.postprocess(kps_predicts,wh_predicts,self.top_k_results_output)
 58 | 
 59 |         return hm_loss,wh_loss
 60 | 
 61 |     def preprocess(self,image):
 62 |         with tf.name_scope('image_preprocess'):
 63 |             if image.dtype.base_dtype != tf.float32:
 64 |                 image = tf.cast(image, tf.float32)
 65 | 
 66 |             image=image/255.
 67 |         return image
 68 |     def process_label(self,inputs,cls_hm,wh_target,weights_):
 69 | 
 70 |         inputs= tf.cast(inputs, tf.float32)
 71 | 
 72 |         cls_hm = tf.cast(cls_hm, tf.float32)/cfg.DATA.use_int8_enlarge
 73 | 
 74 |         return inputs,cls_hm,wh_target,weights_
 75 | 
 76 | 
 77 |     def postprocess(self, keypoints,wh,max_size):
 78 |         """Postprocess outputs of the network.
 79 | 
 80 |         Returns:
 81 |             boxes: a float tensor with shape [batch_size, N, 4].
 82 |             scores: a float tensor with shape [batch_size, N].
 83 |             num_boxes: an int tensor with shape [batch_size], it
 84 |                 represents the number of detections on an image.
 85 | 
 86 |             where N = max_boxes.
 87 |         """
 88 | 
 89 |         def nms(heat, kernel=3):
 90 |             hmax = tf.layers.max_pooling2d(heat, kernel, 1, padding='same')
 91 |             keep = tf.cast(tf.equal(heat, hmax), tf.float32)
 92 |             return heat * keep
 93 | 
 94 |         def topk(hm, K=100):
 95 |             batch, height, width, cat = tf.shape(hm)[0], tf.shape(hm)[1], tf.shape(hm)[2], tf.shape(hm)[3]
 96 |             # [b,h*w*c]
 97 |             scores = tf.reshape(hm, (batch, -1))
 98 |             # [b,k]
 99 |             topk_scores, topk_inds = tf.nn.top_k(scores, k=K)
100 |             # [b,k]
101 |             topk_clses = topk_inds % cat
102 |             topk_xs = topk_inds // cat % width
103 |             topk_ys = topk_inds // cat // width
104 |             topk_inds = topk_ys * width + topk_xs
105 | 
106 |             return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
107 | 
108 |         def decode(heat, wh, K=100):
109 |             batch, height, width, cat = tf.shape(heat)[0], tf.shape(heat)[1], tf.shape(heat)[2], tf.shape(heat)[3]
110 |             heat = nms(heat)
111 |             scores, inds, clses, ys, xs = topk(heat, K=K)
112 | 
113 | 
114 |             xs = tf.cast(tf.expand_dims(xs, axis=-1),tf.float32)
115 |             ys = tf.cast(tf.expand_dims(ys, axis=-1),tf.float32)
116 | 
117 |             # [b,h*w,2]
118 |             wh = tf.reshape(wh, (batch, -1, tf.shape(wh)[-1]))
119 |             # [b,k,2]
120 |             wh = tf.batch_gather(wh, inds)
121 | 
122 |             clses = tf.cast(tf.expand_dims(clses, axis=-1), tf.float32)
123 |             scores = tf.expand_dims(scores, axis=-1)
124 | 
125 |             xmin = xs*cfg.MODEL.global_stride - wh[:,:, 0:1]
126 |             ymin = ys*cfg.MODEL.global_stride - wh[:,:, 1:2]
127 |             xmax = xs*cfg.MODEL.global_stride + wh[:,:, 2:3]
128 |             ymax = ys*cfg.MODEL.global_stride + wh[:,:, 3:4]
129 | 
130 |             bboxes = tf.concat([xmin, ymin, xmax, ymax], axis=-1)
131 | 
132 | 
133 |             # [b,k,6]
134 |             detections = tf.concat([bboxes, scores, clses], axis=-1)
135 |             detections = tf.identity(detections, name='detections')
136 | 
137 |             # bboxes = tf.identity(bboxes, name='boxes')
138 |             # scores = tf.identity(scores, name='scores')
139 |             # labels = tf.identity(clses, name='labels')  ## no use
140 |             return detections
141 | 
142 | 
143 |         decode(keypoints,wh,max_size)
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/prepare_wider_data.py:
--------------------------------------------------------------------------------
  1 | #-*- coding:utf-8 -*-
  2 | 
  3 | from __future__ import division
  4 | from __future__ import absolute_import
  5 | from __future__ import print_function
  6 | 
  7 | 
  8 | import os
  9 | 
 10 | 
 11 | 
 12 | WIDER_ROOT = './WIDER'
 13 | train_list_file = os.path.join(WIDER_ROOT, 'wider_face_split',
 14 |                                'wider_face_train_bbx_gt.txt')
 15 | val_list_file = os.path.join(WIDER_ROOT, 'wider_face_split',
 16 |                              'wider_face_val_bbx_gt.txt')
 17 | 
 18 | WIDER_TRAIN = os.path.join(WIDER_ROOT, 'WIDER_train', 'images')
 19 | WIDER_VAL = os.path.join(WIDER_ROOT, 'WIDER_val', 'images')
 20 | 
 21 | 
 22 | def parse_wider_file(root, file):
 23 |     with open(file, 'r') as fr:
 24 |         lines = fr.readlines()
 25 |     face_count = []
 26 |     img_paths = []
 27 |     face_loc = []
 28 |     img_faces = []
 29 |     count = 0
 30 |     flag = False
 31 |     for k, line in enumerate(lines):
 32 |         line = line.strip().strip('\n')
 33 |         if count > 0:
 34 |             line = line.split(' ')
 35 |             count -= 1
 36 |             loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])]
 37 |             face_loc += [loc]
 38 |         if flag:
 39 |             face_count += [int(line)]
 40 |             flag = False
 41 |             count = int(line)
 42 |         if 'jpg' in line:
 43 |             img_paths += [os.path.join(root, line)]
 44 |             flag = True
 45 | 
 46 |     total_face = 0
 47 |     for k in face_count:
 48 |         face_ = []
 49 |         for x in range(total_face, total_face + k):
 50 |             face_.append(face_loc[x])
 51 |         img_faces += [face_]
 52 |         total_face += k
 53 |     return img_paths, img_faces
 54 | 
 55 | 
 56 | def wider_data_file():
 57 |     img_paths, bbox = parse_wider_file(WIDER_TRAIN, train_list_file)
 58 |     fw = open('train.txt', 'w')
 59 |     for index in range(len(img_paths)):
 60 |         tmp_str = ''
 61 |         tmp_str =tmp_str+ img_paths[index]+'|'
 62 |         boxes = bbox[index]
 63 | 
 64 |         for box in boxes:
 65 |             data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2],  box[1]+box[3])
 66 |             tmp_str=tmp_str+data
 67 |         if len(boxes) == 0:
 68 |             print(tmp_str)
 69 |             continue
 70 |         ####err box?
 71 |         if box[2] <= 0 or box[3] <= 0:
 72 |             pass
 73 |         else:
 74 |             fw.write(tmp_str + '\n')
 75 |     fw.close()
 76 | 
 77 |     img_paths, bbox = parse_wider_file(WIDER_VAL, val_list_file)
 78 |     fw = open('val.txt', 'w')
 79 |     for index in range(len(img_paths)):
 80 | 
 81 |         tmp_str=''
 82 |         tmp_str =tmp_str+ img_paths[index]+'|'
 83 |         boxes = bbox[index]
 84 | 
 85 |         for box in boxes:
 86 |             data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2],  box[1]+box[3])
 87 |             tmp_str=tmp_str+data
 88 | 
 89 | 
 90 | 
 91 |         if len(boxes) == 0:
 92 |             print(tmp_str)
 93 |             continue
 94 |         ####err box?
 95 |         if box[2] <= 0 or box[3] <= 0:
 96 |             pass
 97 |         else:
 98 |             fw.write(tmp_str + '\n')
 99 |     fw.close()
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | def wider_data_file_refine():
107 | 
108 |     def parse_wider_file(root, file):
109 |         with open(file, 'r') as fr:
110 |             lines = fr.readlines()
111 |         face_count = []
112 |         img_paths = []
113 |         face_loc = []
114 |         img_faces = []
115 |         count = 0
116 | 
117 | 
118 |         one_image_faces=[]
119 |         for k, line in enumerate(lines):
120 |             if "#" in line:
121 | 
122 |                 img_paths += [os.path.join(root, line[2:].rstrip())]
123 | 
124 |                 one_image_faces=[]
125 |             if '#' not in line:
126 |                 line = line.strip().strip('\n')
127 | 
128 | 
129 |                 line = line.split(' ')
130 | 
131 |                 loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])]
132 | 
133 |                 one_image_faces.append(loc)
134 | 
135 | 
136 |             if k<len(lines)-1:
137 |                 if '#' in lines[k+1] :
138 |                     img_faces.append(one_image_faces.copy())
139 |             else:
140 |                 img_faces.append(one_image_faces.copy())
141 | 
142 |         return img_paths, img_faces
143 | 
144 | 
145 | 
146 | 
147 | 
148 |     WIDER_ROOT = './WIDER'
149 |     train_list_file = os.path.join(WIDER_ROOT, 'refine', 'train',
150 |                                    'label.txt')
151 |     val_list_file = os.path.join(WIDER_ROOT, 'refine', 'val',
152 |                                  'label.txt')
153 | 
154 |     WIDER_TRAIN = os.path.join(WIDER_ROOT, 'WIDER_train', 'images')
155 |     WIDER_VAL = os.path.join(WIDER_ROOT, 'WIDER_val', 'images')
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 |     img_paths, bbox = parse_wider_file(WIDER_TRAIN, train_list_file)
163 |     fw = open('train.txt', 'w')
164 |     for index in range(len(img_paths)):
165 |         tmp_str = ''
166 |         tmp_str =tmp_str+ img_paths[index]+'|'
167 |         boxes = bbox[index]
168 | 
169 |         for box in boxes:
170 |             data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2],  box[1]+box[3])
171 |             tmp_str=tmp_str+data
172 |         if len(boxes) == 0:
173 |             print(tmp_str)
174 |             continue
175 |         ####err box?
176 |         if box[2] <= 0 or box[3] <= 0:
177 |             pass
178 |         else:
179 |             fw.write(tmp_str + '\n')
180 |     fw.close()
181 | 
182 |     img_paths, bbox = parse_wider_file(WIDER_VAL, val_list_file)
183 |     fw = open('val.txt', 'w')
184 |     for index in range(len(img_paths)):
185 | 
186 |         tmp_str=''
187 |         tmp_str =tmp_str+ img_paths[index]+'|'
188 |         boxes = bbox[index]
189 | 
190 |         for box in boxes:
191 |             data = ' %d,%d,%d,%d,0'%(box[0], box[1], box[0]+box[2],  box[1]+box[3])
192 |             tmp_str=tmp_str+data
193 | 
194 | 
195 | 
196 |         if len(boxes) == 0:
197 | 
198 |             continue
199 |         ####err box?
200 |         if box[2] <= 0 or box[3] <= 0:
201 |             pass
202 |         else:
203 |             fw.write(tmp_str + '\n')
204 |     fw.close()
205 | 
206 | 
207 | 
208 | if __name__ == '__main__':
209 | 
210 |     try:
211 |         wider_data_file_refine()
212 | 
213 |     except:
214 |         wider_data_file()


--------------------------------------------------------------------------------
/lib/core/model/loss/iouloss.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | def giou_loss(pred,
  6 |               target,
  7 |               weight,
  8 |               avg_factor=None):
  9 |     """GIoU loss.
 10 |     Computing the GIoU loss between a set of predicted bboxes and target bboxes.
 11 |     """
 12 |     pos_mask = weight > 0
 13 |     weight = tf.cast(weight[pos_mask],tf.float32)
 14 |     if avg_factor is None:
 15 |         avg_factor = tf.reduce_sum(pos_mask) + 1e-6
 16 |     bboxes1 = tf.reshape(pred[pos_mask],(-1, 4))
 17 |     bboxes2 =  tf.reshape(target[pos_mask],(-1, 4))
 18 | 
 19 | 
 20 |     lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
 21 |     rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
 22 |     wh = tf.maximum((rb - lt + 1),0)  # [rows, 2]
 23 |     enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2])
 24 |     enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
 25 |     enclose_wh =  tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0)
 26 | 
 27 |     overlap = wh[:, 0] * wh[:, 1]
 28 |     ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1)
 29 |     ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1)
 30 |     ious = overlap / (ap + ag - overlap)
 31 | 
 32 |     enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1]  # i.e. C in paper
 33 |     u = ap + ag - overlap
 34 |     gious = ious - (enclose_area - u) / enclose_area
 35 |     iou_distances = 1 - gious
 36 |     return tf.reduce_sum(iou_distances * weight) / avg_factor
 37 | 
 38 | def diou_loss(pred,
 39 |               target,
 40 |               weight,
 41 |               avg_factor=None):
 42 |     """DIoU loss.
 43 |     Computing the GIoU loss between a set of predicted bboxes and target bboxes.
 44 |     """
 45 |     pos_mask = weight > 0
 46 |     weight = tf.cast(weight[pos_mask],tf.float32)
 47 |     if avg_factor is None:
 48 |         avg_factor = tf.reduce_sum(pos_mask) + 1e-6
 49 |     bboxes1 = tf.reshape(pred[pos_mask],(-1, 4))
 50 |     bboxes2 = tf.reshape(target[pos_mask],(-1, 4))
 51 | 
 52 | 
 53 |     lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
 54 |     rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
 55 |     wh = tf.maximum((rb - lt + 1),0)  # [rows, 2]
 56 |     # enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2])
 57 |     # enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
 58 |     # enclose_wh =  tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0)
 59 | 
 60 |     overlap = wh[:, 0] * wh[:, 1]
 61 |     ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1)
 62 |     ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1)
 63 |     ious = overlap / (ap + ag - overlap)
 64 | 
 65 | 
 66 |     # cal outer boxes
 67 |     outer_left_up = tf.minimum(bboxes1[:, :2], bboxes2[:, :2])
 68 |     outer_right_down = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
 69 |     outer = tf.maximum(outer_right_down - outer_left_up, 0.0)
 70 |     outer_diagonal_line = tf.square(outer[:, 0]) + tf.square(outer[:, 1])
 71 | 
 72 |     boxes1_center = (bboxes1[:, :2] + bboxes1[:, 2:]+ 1) * 0.5
 73 |     boxes2_center = (bboxes2[:, :2] + bboxes2[:, 2:]+ 1) * 0.5
 74 |     center_dis = tf.square(boxes1_center[:, 0] - boxes2_center[:, 0]) + \
 75 |                  tf.square(boxes1_center[:, 1] - boxes2_center[:, 1])
 76 | 
 77 |     dious = ious - (center_dis / outer_diagonal_line)
 78 | 
 79 |     iou_distances = 1-dious
 80 | 
 81 |     return tf.reduce_sum(iou_distances * weight) / avg_factor
 82 | def ciou_loss(pred,
 83 |               target,
 84 |               weight,
 85 |               avg_factor=None):
 86 |     """GIoU loss.
 87 |     Computing the GIoU loss between a set of predicted bboxes and target bboxes.
 88 |     """
 89 |     pos_mask = weight > 0
 90 |     weight = tf.cast(weight[pos_mask],tf.float32)
 91 |     if avg_factor is None:
 92 |         avg_factor = tf.reduce_sum(pos_mask) + 1e-6
 93 |     bboxes1 = tf.reshape(pred[pos_mask],(-1, 4))
 94 |     bboxes2 = tf.reshape(target[pos_mask],(-1, 4))
 95 | 
 96 | 
 97 |     lt = tf.maximum(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
 98 |     rb = tf.minimum(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
 99 |     wh = tf.maximum((rb - lt + 1),0)  # [rows, 2]
100 |     # enclose_x1y1 = tf.minimum(bboxes1[:, :2], bboxes2[:, :2])
101 |     # enclose_x2y2 = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
102 |     # enclose_wh =  tf.maximum((enclose_x2y2 - enclose_x1y1 + 1),0)
103 | 
104 |     overlap = wh[:, 0] * wh[:, 1]
105 |     ap = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1)
106 |     ag = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1)
107 |     ious = overlap / (ap + ag - overlap)
108 | 
109 | 
110 | 
111 | 
112 |     # cal outer boxes
113 |     outer_left_up = tf.minimum(bboxes1[:, :2], bboxes2[:, :2])
114 |     outer_right_down = tf.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
115 |     outer = tf.maximum(outer_right_down - outer_left_up, 0.0)
116 |     outer_diagonal_line = tf.square(outer[:, 0]) + tf.square(outer[:, 1])
117 | 
118 | 
119 |     boxes1_center = (bboxes1[:, :2] + bboxes1[:, 2:]+ 1) * 0.5
120 |     boxes2_center = (bboxes2[:, :2] + bboxes2[:, 2:]+ 1) * 0.5
121 |     center_dis = tf.square(boxes1_center[:, 0] - boxes2_center[:, 0]) + \
122 |                  tf.square(boxes1_center[:, 1] - boxes2_center[:, 1])
123 | 
124 | 
125 | 
126 | 
127 | 
128 |     boxes1_size = tf.maximum(bboxes1[:,2:]-bboxes1[:,:2],0.0)
129 |     boxes2_size = tf.maximum(bboxes2[:, 2:] - bboxes2[:, :2], 0.0)
130 | 
131 |     v = (4.0 / (np.pi**2)) * \
132 |         tf.square(tf.math.atan(boxes2_size[:, 0] / (boxes2_size[:, 1]+0.00001)) -
133 |                     tf.math.atan(boxes1_size[:, 0] / (boxes1_size[:, 1]+0.00001)))
134 | 
135 |     S = tf.cast(tf.greater(ious , 0.5),dtype=tf.float32)
136 |     alpha = S * v / (1 - ious + v)
137 | 
138 |     cious = ious - (center_dis / outer_diagonal_line)-alpha * v
139 | 
140 |     cious = 1-cious
141 | 
142 |     return tf.reduce_sum(cious * weight) / avg_factor
143 | 
144 | 
145 | 
146 | if __name__=='__main__':
147 |     gt=[[1000,10,100,100]]
148 |     pre=[[200,200,1,1]]
149 |     weight = [1]
150 |     a = tf.constant(gt,dtype=tf.float32)
151 |     b = tf.constant(pre,dtype=tf.float32)
152 | 
153 |     w=tf.constant(weight,dtype=tf.float32)
154 | 
155 |     session = tf.Session()
156 | 
157 |     loss,lt=giou_loss(pre,gt,w)
158 |     v1 = session.run(loss)  # fetches参数为单个张量值，返回值为Numpy数组
159 |     print(v1)
160 |     lt = session.run(lt[0,:,:,0])  # fetches参数为单个张量值，返回值为Numpy数组
161 |     print(lt.shape)
162 |     print(lt)
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/visulization/vis.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | 
  4 | import cv2
  5 | import os
  6 | import time
  7 | 
  8 | 
  9 | from lib.core.api.face_detector import FaceDetector
 10 | from train_config import config as cfg
 11 | 
 12 | import argparse
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--style', type=str,default='coco', help='detect with coco or face',required=False)
 16 | parser.add_argument('--imgDir', type=str,default='../pubdata/mscoco/val2017', help='the image dir to detect')
 17 | parser.add_argument('--thres', type=float,default=0.3, help='the thres for detect')
 18 | args = parser.parse_args()
 19 | 
 20 | data_dir=args.imgDir
 21 | style=args.style
 22 | thres=args.thres
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 25 | detector = FaceDetector(['./model/detector.pb'])
 26 | coco_map = {0: (1, 'person'), 1: (2, 'bicycle'), 2: (3, 'car'), 3: (4, 'motorcycle'), 4: (5, 'airplane'), 5: (6, 'bus'),
 27 |             6: (7, 'train'), 7: (8, 'truck'), 8: (9, 'boat'), 9: (10, 'traffic shufflenet'), 10: (11, 'fire hydrant'),
 28 |             11: (13, 'stop sign'), 12: (14, 'parking meter'), 13: (15, 'bench'), 14: (16, 'bird'), 15: (17, 'cat'),
 29 |             16: (18, 'dog'), 17: (19, 'horse'), 18: (20, 'sheep'), 19: (21, 'cow'), 20: (22, 'elephant'),
 30 |             21: (23, 'bear'), 22: (24, 'zebra'), 23: (25, 'giraffe'), 24: (27, 'backpack'), 25: (28, 'umbrella'),
 31 |             26: (31, 'handbag'), 27: (32, 'tie'), 28: (33, 'suitcase'), 29: (34, 'frisbee'), 30: (35, 'skis'),
 32 |             31: (36, 'snowboard'), 32: (37, 'sports ball'), 33: (38, 'kite'), 34: (39, 'baseball bat'),
 33 |             35: (40, 'baseball glove'),
 34 |             36: (41, 'skateboard'), 37: (42, 'surfboard'), 38: (43, 'tennis racket'), 39: (44, 'bottle'),
 35 |             40: (46, 'wine glass'),
 36 |             41: (47, 'cup'), 42: (48, 'fork'), 43: (49, 'knife'), 44: (50, 'spoon'), 45: (51, 'bowl'),
 37 |             46: (52, 'banana'), 47: (53, 'apple'), 48: (54, 'sandwich'), 49: (55, 'orange'), 50: (56, 'broccoli'),
 38 |             51: (57, 'carrot'), 52: (58, 'hot dog'), 53: (59, 'pizza'), 54: (60, 'donut'), 55: (61, 'cake'),
 39 |             56: (62, 'chair'), 57: (63, 'couch'), 58: (64, 'potted plant'), 59: (65, 'bed'), 60: (67, 'dining table'),
 40 |             61: (70, 'toilet'), 62: (72, 'tv'), 63: (73, 'laptop'), 64: (74, 'mouse'), 65: (75, 'remote'),
 41 |             66: (76, 'keyboard'), 67: (77, 'cell phone'), 68: (78, 'microwave'), 69: (79, 'oven'), 70: (80, 'toaster'),
 42 |             71: (81, 'sink'), 72: (82, 'refrigerator'), 73: (84, 'book'), 74: (85, 'clock'), 75: (86, 'vase'),
 43 |             76: (87, 'scissors'), 77: (88, 'teddy bear'), 78: (89, 'hair drier'), 79: (90, 'toothbrush')}
 44 | 
 45 | def GetFileList(dir, fileList):
 46 |     newDir = dir
 47 |     if os.path.isfile(dir):
 48 |         fileList.append(dir)
 49 |     elif os.path.isdir(dir):
 50 |         for s in os.listdir(dir):
 51 |             # if s == "pts":
 52 |             #     continue
 53 |             newDir=os.path.join(dir,s)
 54 |             GetFileList(newDir, fileList)
 55 |     return fileList
 56 | 
 57 | 
 58 | def cocodetect(data_dir):
 59 |     success_cnt=0
 60 |     count = 0
 61 | 
 62 |     pics = []
 63 |     GetFileList(data_dir,pics)
 64 | 
 65 |     pics = [x for x in pics if 'jpg' in x or 'png' in x or 'jpeg' in x]
 66 |     #pics.sort()
 67 | 
 68 |     for pic in pics:
 69 |         print(pic)
 70 |         try:
 71 |             img=cv2.imread(pic)
 72 |             #cv2.imwrite('tmp.png',img)
 73 |             img_show = img.copy()
 74 |         except:
 75 |             continue
 76 | 
 77 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 78 | 
 79 |         star=time.time()
 80 |         boxes=detector(img,thres,input_shape=(cfg.DATA.hin,cfg.DATA.win))
 81 | 
 82 |         print(boxes.shape[0])
 83 |         if boxes.shape[0]==0:
 84 |             print(pic)
 85 | 
 86 |         for box_index in range(boxes.shape[0]):
 87 | 
 88 |             bbox = boxes[box_index]
 89 | 
 90 |             cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])),
 91 |                           (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4)
 92 |             str_draw = '%s:%.2f' %(coco_map[int(bbox[5])][1],bbox[4])
 93 |             cv2.putText(img_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2,
 94 |                         (255, 0, 255), 2)
 95 | 
 96 | 
 97 |         cv2.namedWindow('res',0)
 98 |         cv2.imshow('res',img_show)
 99 |         cv2.waitKey(0)
100 | 
101 |     print(success_cnt,'decoded')
102 |     print(count)
103 | 
104 | 
105 | def camdetect():
106 |     cap = cv2.VideoCapture(0)
107 | 
108 |     while True:
109 | 
110 |         ret, img = cap.read()
111 |         img_show = img.copy()
112 | 
113 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
114 |         star=time.time()
115 |         boxes=detector(img,0.5,input_shape=(640,640))
116 | 
117 | 
118 |         print(boxes.shape[0])
119 | 
120 | 
121 |         for box_index in range(boxes.shape[0]):
122 | 
123 |             bbox = boxes[box_index]
124 | 
125 |             cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])),
126 |                           (int(bbox[2]), int(bbox[3])), (255, 0, 0), 8)
127 |             # cv2.putText(img_show, str(bbox[4]), (int(bbox[0]), int(bbox[1]) + 30),
128 |             #             cv2.FONT_HERSHEY_SIMPLEX, 1,
129 |             #             (255, 0, 255), 2)
130 |             #
131 |             # cv2.putText(img_show, str(int(bbox[5])), (int(bbox[0]), int(bbox[1]) + 40),
132 |             #             cv2.FONT_HERSHEY_SIMPLEX, 1,
133 |             #             (0, 0, 255), 2)
134 | 
135 | 
136 |         cv2.namedWindow('res',0)
137 |         cv2.imshow('res',img_show)
138 |         cv2.waitKey(0)
139 |     print(count)
140 | 
141 | def facedetect(data_dir):
142 |     success_cnt=0
143 |     count = 0
144 | 
145 |     pics = []
146 |     GetFileList(data_dir,pics)
147 | 
148 |     pics = [x for x in pics if 'jpg' in x or 'png' in x or 'jpeg' in x]
149 |     #pics.sort()
150 | 
151 |     for pic in pics:
152 |         print(pic)
153 |         try:
154 |             img=cv2.imread(pic)
155 |             #cv2.imwrite('tmp.png',img)
156 |             img_show = img.copy()
157 |         except:
158 |             continue
159 | 
160 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
161 | 
162 |         star=time.time()
163 |         boxes=detector(img,thres,input_shape=(cfg.DATA.hin,cfg.DATA.win))
164 | 
165 |         print(boxes.shape[0])
166 |         if boxes.shape[0]==0:
167 |             print(pic)
168 | 
169 |         for box_index in range(boxes.shape[0]):
170 | 
171 |             bbox = boxes[box_index]
172 | 
173 |             cv2.rectangle(img_show, (int(bbox[0]), int(bbox[1])),
174 |                           (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4)
175 |             str_draw = '%s:%.2f' %(coco_map[int(bbox[5])][1],bbox[4])
176 |             cv2.putText(img_show, str_draw, (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2,
177 |                         (255, 0, 255), 2)
178 | 
179 | 
180 |         cv2.namedWindow('res',0)
181 |         cv2.imshow('res',img_show)
182 |         cv2.waitKey(0)
183 | 
184 |     print(success_cnt,'decoded')
185 |     print(count)
186 | if __name__=='__main__':
187 | 
188 |     if style=='coco':
189 |         cocodetect(data_dir)
190 |     else:
191 |         facedetect(data_dir)
192 | 


--------------------------------------------------------------------------------
/lib/core/api/face_detector_bk.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cv2
  4 | import time
  5 | import math
  6 | 
  7 | from train_config import config as cfg
  8 | 
  9 | 
 10 | 
 11 | class FaceDetector:
 12 |     def __init__(self, model_path):
 13 |         """
 14 |         Arguments:
 15 |             model_path: a string, path to a pb file.
 16 |         """
 17 |         self._graph = tf.Graph()
 18 | 
 19 |         with self._graph.as_default():
 20 |             self._graph, self._sess = self.init_model(model_path)
 21 | 
 22 | 
 23 |             self.input_image = tf.get_default_graph().get_tensor_by_name('tower_0/images:0')
 24 |             self.training = tf.get_default_graph().get_tensor_by_name('training_flag:0')
 25 |             self.output_ops = [
 26 |                 tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'),
 27 |                 tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'),
 28 |                 tf.expand_dims(tf.cast(tf.get_default_graph().get_tensor_by_name('tower_0/labels:0'),dtype=tf.float32),-1)
 29 |             ]
 30 |             self.output_op=tf.concat(self.output_ops,axis=2)
 31 | 
 32 | 
 33 | 
 34 | 
 35 |     def __call__(self, image, score_threshold=0.5,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1000):
 36 |         """Detect faces.
 37 | 
 38 |         Arguments:
 39 |             image: a numpy uint8 array with shape [height, width, 3],
 40 |                 that represents a RGB image.
 41 |             score_threshold: a float number.
 42 |         Returns:
 43 |             boxes: a float numpy array of shape [num_faces, 5].
 44 | 
 45 |         """
 46 | 
 47 | 
 48 |         # if input_shape is None:
 49 |         #     h, w, c = image.shape
 50 |         #     input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32)
 51 |         #
 52 |         # else:
 53 |         #     h, w = input_shape
 54 |         #     input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32)
 55 | 
 56 |         image, scale_x, scale_y, dx, dy = self.preprocess(image,
 57 |                                                                  target_height=cfg.DATA.hin,
 58 |                                                                  target_width=cfg.DATA.win)
 59 | 
 60 | 
 61 |         if cfg.DATA.channel==1:
 62 |             image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
 63 |             image= np.expand_dims(image, -1)
 64 | 
 65 |         image_fornet = np.expand_dims(image, 0)
 66 | 
 67 | 
 68 |         bboxes = self._sess.run(
 69 |             self.output_op, feed_dict={self.input_image: image_fornet,self.training:False}
 70 |         )
 71 | 
 72 |         bboxes = self.py_nms(np.array(bboxes[0]), iou_thres=0.3, score_thres=score_threshold,max_boxes=max_boxes)
 73 | 
 74 |         ###recorver to raw image
 75 |         boxes_scaler = np.array([(input_shape[1]) / scale_x,
 76 |                                  (input_shape[0]) / scale_y,
 77 |                                  (input_shape[1]) / scale_x,
 78 |                                  (input_shape[0]) / scale_y,
 79 |                                  1.,1.], dtype='float32')
 80 | 
 81 |         boxes_bias = np.array([dx / scale_x,
 82 |                                dy / scale_y,
 83 |                                dx / scale_x,
 84 |                                dy / scale_y, 0.,0.], dtype='float32')
 85 |         bboxes = bboxes * boxes_scaler - boxes_bias
 86 | 
 87 | 
 88 | 
 89 |         # self.stats_graph(self._sess.graph)
 90 |         return bboxes
 91 | 
 92 | 
 93 |     def preprocess(self, image, target_height, target_width, label=None):
 94 | 
 95 |         ###sometimes use in objs detects
 96 |         h, w, c = image.shape
 97 | 
 98 |         bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype)
 99 | 
100 |         scale_y = target_height / h
101 |         scale_x = target_width / w
102 | 
103 |         scale = min(scale_x, scale_y)
104 | 
105 |         image = cv2.resize(image, None, fx=scale, fy=scale)
106 | 
107 |         h_, w_, _ = image.shape
108 | 
109 |         dx = (target_width - w_) // 2
110 |         dy = (target_height - h_) // 2
111 |         bimage[dy:h_ + dy, dx:w_ + dx, :] = image
112 | 
113 |         return bimage, scale, scale, dx, dy
114 | 
115 |     def py_nms(self, bboxes, iou_thres, score_thres, max_boxes=1000):
116 | 
117 |         upper_thres = np.where(bboxes[:, 4] > score_thres)[0]
118 | 
119 |         bboxes = bboxes[upper_thres]
120 | 
121 |         x1 = bboxes[:, 0]
122 |         y1 = bboxes[:, 1]
123 |         x2 = bboxes[:, 2]
124 |         y2 = bboxes[:, 3]
125 | 
126 |         order = np.argsort(bboxes[:, 4])[::-1]
127 | 
128 |         keep=[]
129 |         while order.shape[0] > 0:
130 |             if len(keep)>max_boxes:
131 |                 break
132 |             cur = order[0]
133 | 
134 |             keep.append(cur)
135 | 
136 |             area = (bboxes[cur, 2] - bboxes[cur, 0]) * (bboxes[cur, 3] - bboxes[cur, 1])
137 | 
138 |             x1_reain = x1[order[1:]]
139 |             y1_reain = y1[order[1:]]
140 |             x2_reain = x2[order[1:]]
141 |             y2_reain = y2[order[1:]]
142 | 
143 |             xx1 = np.maximum(bboxes[cur, 0], x1_reain)
144 |             yy1 = np.maximum(bboxes[cur, 1], y1_reain)
145 |             xx2 = np.minimum(bboxes[cur, 2], x2_reain)
146 |             yy2 = np.minimum(bboxes[cur, 3], y2_reain)
147 | 
148 |             intersection = np.maximum(0, yy2 - yy1) * np.maximum(0, xx2 - xx1)
149 | 
150 |             iou = intersection / (area + (y2_reain - y1_reain) * (x2_reain - x1_reain) - intersection)
151 | 
152 |             ##keep the low iou
153 |             low_iou_position = np.where(iou < iou_thres)[0]
154 | 
155 |             order = order[low_iou_position + 1]
156 | 
157 |         return bboxes[keep]
158 | 
159 |     def stats_graph(self,graph):
160 | 
161 | 
162 | 
163 |         flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation())
164 |         params = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
165 |         print(params)
166 |         print('FLOPs: {}M;    Trainable params: {}'.format(flops.total_float_ops/1024/1024., params.total_parameters))
167 | 
168 |     def init_model(self,args):
169 | 
170 |         if len(args) == 1:
171 |             use_pb = True
172 |             pb_path = args[0]
173 |         else:
174 |             use_pb = False
175 |             meta_path = args[0]
176 |             restore_model_path = args[1]
177 | 
178 |         def ini_ckpt():
179 |             graph = tf.Graph()
180 |             graph.as_default()
181 |             configProto = tf.ConfigProto()
182 |             configProto.gpu_options.allow_growth = True
183 |             sess = tf.Session(config=configProto)
184 |             # load_model(model_path, sess)
185 |             saver = tf.train.import_meta_graph(meta_path)
186 |             saver.restore(sess, restore_model_path)
187 | 
188 |             print("Model restred!")
189 |             return (graph, sess)
190 | 
191 |         def init_pb(model_path):
192 |             config = tf.ConfigProto()
193 |             config.gpu_options.per_process_gpu_memory_fraction = 0.5
194 |             compute_graph = tf.Graph()
195 |             compute_graph.as_default()
196 |             sess = tf.Session(config=config)
197 |             with tf.gfile.GFile(model_path, 'rb') as fid:
198 |                 graph_def = tf.GraphDef()
199 |                 graph_def.ParseFromString(fid.read())
200 |                 tf.import_graph_def(graph_def, name='')
201 | 
202 | 
203 |             # saver = tf.train.Saver(tf.global_variables())
204 |             # saver.save(sess, save_path='./tmp.ckpt')
205 |             return (compute_graph, sess)
206 | 
207 |         if use_pb:
208 |             model = init_pb(pb_path)
209 |         else:
210 |             model = ini_ckpt()
211 | 
212 |         graph = model[0]
213 |         sess = model[1]
214 | 
215 |         return graph, sess
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/lib/core/anchor/box_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import sys
  4 | sys.path.append('.')
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | # a small value
  8 | EPSILON = 1e-8
  9 | 
 10 | 
 11 | ###becaurefull , the decode use it as one
 12 | SCALE_FACTORS = [5.0, 5.0, 5.0, 5.0]
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 
 18 | def np_iou(boxes1, boxes2):
 19 |     def area(boxes):
 20 |         """Computes area of boxes.
 21 | 
 22 |     Arguments:
 23 |         boxes: a float tensor with shape [N, 4].
 24 |     Returns:
 25 |         a float tensor with shape [N] representing box areas.
 26 |     """
 27 | 
 28 |         xmin, ymin, xmax, ymax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
 29 |         return (ymax - ymin) * (xmax - xmin)
 30 | 
 31 |     """Computes pairwise intersection-over-union between two box collections.
 32 | 
 33 |       Arguments:
 34 |           boxes1: a float tensor with shape [N, 4].GT
 35 |           boxes2: a float tensor with shape [M, 4].ANCHOR
 36 |       Returns:
 37 |           a float tensor with shape [N, M] representing pairwise iou scores.
 38 |       """
 39 | 
 40 |     intersections = intersection(boxes1, boxes2)
 41 | 
 42 |     areas1 = area(boxes1)
 43 |     areas2 = area(boxes2)
 44 |     unions = np.expand_dims(areas1, 1) + np.expand_dims(areas2, 0) - intersections
 45 | 
 46 |     return np.clip(intersections / unions, 0.0, 1.0)
 47 | def intersection(boxes1, boxes2):
 48 |     """Compute pairwise intersection areas between boxes.
 49 | 
 50 |   Arguments:
 51 |       boxes1: a float tensor with shape [N, 4].
 52 |       boxes2: a float tensor with shape [M, 4].
 53 |   Returns:
 54 |       a float tensor with shape [N, M] representing pairwise intersections.
 55 |   """
 56 | 
 57 |     xmin1, ymin1, xmax1, ymax1 = np.split(boxes1, indices_or_sections=4, axis=1)
 58 |     xmin2, ymin2, xmax2, ymax2 = np.split(boxes2, indices_or_sections=4, axis=1)
 59 |     # they all have shapes like [None, 1]
 60 | 
 61 |     all_pairs_min_ymax = np.minimum(ymax1, np.transpose(ymax2))
 62 |     all_pairs_max_ymin = np.maximum(ymin1, np.transpose(ymin2))
 63 | 
 64 |     intersect_heights = np.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
 65 |     all_pairs_min_xmax = np.minimum(xmax1, np.transpose(xmax2))
 66 |     all_pairs_max_xmin = np.maximum(xmin1, np.transpose(xmin2))
 67 |     intersect_widths = np.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
 68 |     # they all have shape [N, M]
 69 |     return intersect_heights * intersect_widths
 70 | 
 71 | 
 72 | 
 73 | 
 74 | def encode(boxes, anchors):
 75 | 
 76 |     # print(boxes)
 77 |     # print(anchors)
 78 |     """Encode boxes with respect to anchors.
 79 |     Arguments:
 80 |         boxes: a float tensor with shape [N, 4].
 81 |         anchors: a float tensor with shape [N, 4].
 82 |     Returns:
 83 |         a float tensor with shape [N, 4],
 84 |         anchor-encoded boxes of the format [tx1, ty1, tx2, ty2].
 85 |     """
 86 | 
 87 |     anchor_widths = anchors[:, 2] - anchors[:, 0]
 88 |     anchor_heights = anchors[:, 3] - anchors[:, 1]
 89 | 
 90 |     tx1 = (boxes[:, 0] - anchors[:, 0]) / anchor_widths
 91 |     ty1 = (boxes[:, 1] - anchors[:, 1]) / anchor_heights
 92 |     tx2 = (boxes[:, 2] - anchors[:, 2]) / anchor_widths
 93 |     ty2 = (boxes[:, 3] - anchors[:, 3]) / anchor_heights
 94 | 
 95 |     tx1 *= SCALE_FACTORS[0]
 96 |     ty1 *= SCALE_FACTORS[1]
 97 |     tx2 *= SCALE_FACTORS[2]
 98 |     ty2 *= SCALE_FACTORS[3]
 99 | 
100 |     return np.stack([tx1, ty1, tx2, ty2], axis=1)
101 | 
102 | 
103 | 
104 | 
105 | 
106 | def decode(codes, anchors):
107 |     """Decode relative codes to boxes.
108 |     Arguments:
109 |         codes: a float tensor with shape [N, 4],
110 |             anchor-encoded boxes of the format [tx1, ty, tx2, ty2].
111 |         anchors: a float tensor with shape [N, 4].
112 |     Returns:
113 |         a float tensor with shape [N, 4],
114 |         bounding boxes of the format [ymin, xmin, ymax, xmax].  because tf nms needs yxyx
115 |     """
116 |     with tf.name_scope('decode_predictions'):
117 | 
118 |         anchor_widths = anchors[:, 2] - anchors[:, 0]
119 |         anchor_heights = anchors[:, 3] - anchors[:, 1]
120 | 
121 |         tx1, ty1, tx2, ty2 = tf.unstack(codes, axis=1)
122 | 
123 |         tx1 /= SCALE_FACTORS[0]
124 |         ty1 /= SCALE_FACTORS[1]
125 |         tx2 /= SCALE_FACTORS[2]
126 |         ty2 /= SCALE_FACTORS[3]
127 | 
128 |         x1=tx1 * anchor_widths + anchors[:,0]
129 |         y1=ty1 * anchor_heights + anchors[:,1]
130 |         x2=tx2 * anchor_widths + anchors[:,2]
131 |         y2=ty2 * anchor_heights + anchors[:,3]
132 | 
133 |         return tf.stack([x1,y1,x2,y2], axis=1)
134 | 
135 | 
136 | def decode_fix(codes, anchors,anchors_decode):
137 |     """Decode relative codes to boxes.
138 |     Arguments:
139 |         codes: a float tensor with shape [N, 4],
140 |             anchor-encoded boxes of the format [tx1, ty, tx2, ty2].
141 |         anchors: a float tensor with shape [N, 4].
142 |     Returns:
143 |         a float tensor with shape [N, 4],
144 |         bounding boxes of the format [ymin, xmin, ymax, xmax].  because tf nms needs yxyx
145 |     """
146 |     with tf.name_scope('decode_predictions'):
147 | 
148 |         decodes=codes*anchors_decode+anchors
149 | 
150 | 
151 |         return decodes
152 |         # tx1, ty1, tx2, ty2 = tf.unstack(codes, axis=1)
153 |         #
154 |         # tx1 /= SCALE_FACTORS[0]
155 |         # ty1 /= SCALE_FACTORS[1]
156 |         # tx2 /= SCALE_FACTORS[2]
157 |         # ty2 /= SCALE_FACTORS[3]
158 |         #
159 |         # x1=tx1 * anchor_widths + anchors[:,0]
160 |         # y1=ty1 * anchor_heights + anchors[:,1]
161 |         # x2=tx2 * anchor_widths + anchors[:,2]
162 |         # y2=ty2 * anchor_heights + anchors[:,3]
163 |         #
164 |         # return tf.stack([x1,y1,x2,y2], axis=1)
165 | 
166 | 
167 | 
168 | 
169 | 
170 | def batch_decode(box_encodings, anchors):
171 |     """Decodes a batch of box encodings with respect to the anchors.
172 | 
173 |     Arguments:
174 |         box_encodings: a float tensor with shape [batch_size, num_anchors, 4].
175 |         anchors: a float tensor with shape [num_anchors, 4].
176 |     Returns:
177 |         a float tensor with shape [batch_size, num_anchors, 4].
178 |         It contains the decoded boxes.
179 |     """
180 |     batch_size = tf.shape(box_encodings)[0]
181 |     num_anchors = tf.shape(box_encodings)[1]
182 | 
183 |     tiled_anchor_boxes = tf.tile(
184 |         tf.expand_dims(anchors, 0),
185 |         [batch_size, 1, 1]
186 |     )  # shape [batch_size, num_anchors, 4]
187 |     decoded_boxes = decode(
188 |         tf.reshape(box_encodings, [-1, 4]),
189 |         tf.reshape(tiled_anchor_boxes, [-1, 4])
190 |     )  # shape [batch_size * num_anchors, 4]
191 | 
192 |     decoded_boxes = tf.reshape(
193 |         decoded_boxes,
194 |         [batch_size, num_anchors, 4]
195 |     )
196 |     decoded_boxes = tf.clip_by_value(decoded_boxes, 0.0, 1.0)
197 |     return decoded_boxes
198 | 
199 | def batch_decode_fix(box_encodings, anchors,anchors_decode):
200 |     """Decodes a batch of box encodings with respect to the anchors.
201 | 
202 |     Arguments:
203 |         box_encodings: a float tensor with shape [batch_size, num_anchors, 4].
204 |         anchors: a float tensor with shape [num_anchors, 4].
205 |     Returns:
206 |         a float tensor with shape [batch_size, num_anchors, 4].
207 |         It contains the decoded boxes.
208 |     """
209 | 
210 |     batch_size = tf.shape(box_encodings)[0]
211 |     num_anchors = tf.shape(box_encodings)[1]
212 | 
213 |     # tiled_anchor_boxes = tf.tile(
214 |     #     tf.expand_dims(anchors, 0),
215 |     #     [batch_size, 1, 1]
216 |     # )  # shape [batch_size, num_anchors, 4]
217 |     #tiled_anchor_boxes=anchors
218 | 
219 |     decoded_boxes = decode_fix(
220 |         tf.reshape(box_encodings, [-1, 4]),
221 |         anchors,
222 |         anchors_decode
223 |     )  # shape [batch_size * num_anchors, 4]
224 | 
225 |     decoded_boxes = tf.reshape(
226 |         decoded_boxes,
227 |         [batch_size, num_anchors, 4]
228 |     )
229 |     #decoded_boxes = tf.clip_by_value(decoded_boxes, 0.0, 1.0)
230 |     return decoded_boxes
231 | 


--------------------------------------------------------------------------------
/model_eval/wider.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('.')
  4 | import os
  5 | import scipy.io as sio
  6 | import argparse
  7 | import cv2
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | import time
 12 | 
 13 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 14 | 
 15 | from lib.core.api.face_detector import FaceDetector
 16 | 
 17 | ap = argparse.ArgumentParser()
 18 | ap.add_argument("--model", required=False, default='./model/detector.pb', help="model to eval:")
 19 | ap.add_argument("--is_show", required=False, default=False, help="show result or not?")
 20 | ap.add_argument("--data_dir", required=False, default="./WIDER/WIDER_val", help="dir to img")
 21 | ap.add_argument("--multiscale", required=False, default=0, type=int, help="test in multiscales  0-False 1-True")
 22 | 
 23 | ap.add_argument("--input_shape", required=False, type=int, default=512, help="input shape")
 24 | ap.add_argument("--result", required=False, default='./result', help="dir to write result")
 25 | 
 26 | args = ap.parse_args()
 27 | 
 28 | IMAGES_DIR = args.data_dir
 29 | RESULT_DIR = args.result
 30 | MODEL_PATH = args.model
 31 | INPUT_SHAPE = (args.input_shape, args.input_shape)
 32 | MULTISCALETEST = True if args.multiscale == 1 else False
 33 | 
 34 | face_detector = FaceDetector([MODEL_PATH])
 35 | 
 36 | 
 37 | def get_data():
 38 |     subset = 'val'
 39 |     if subset is 'val':
 40 |         wider_face = sio.loadmat(
 41 |             './eval_tools/ground_truth/wider_face_val.mat')
 42 |     else:
 43 |         wider_face = sio.loadmat(
 44 |             './eval_tools/ground_truth/wider_face_test.mat')
 45 |     event_list = wider_face['event_list']
 46 |     file_list = wider_face['file_list']
 47 |     del wider_face
 48 | 
 49 |     imgs_path = os.path.join(IMAGES_DIR, 'images')
 50 |     save_path = RESULT_DIR
 51 | 
 52 |     return event_list, file_list, imgs_path, save_path
 53 | 
 54 | 
 55 | def bbox_vote(det):
 56 |     order = det[:, 4].ravel().argsort()[::-1]
 57 |     det = det[order, :]
 58 |     while det.shape[0] > 0:
 59 |         # IOU
 60 |         area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
 61 |         xx1 = np.maximum(det[0, 0], det[:, 0])
 62 |         yy1 = np.maximum(det[0, 1], det[:, 1])
 63 |         xx2 = np.minimum(det[0, 2], det[:, 2])
 64 |         yy2 = np.minimum(det[0, 3], det[:, 3])
 65 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 66 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 67 |         inter = w * h
 68 |         o = inter / (area[0] + area[:] - inter)
 69 | 
 70 |         # get needed merge det and delete these det
 71 |         merge_index = np.where(o >= 0.3)[0]
 72 |         det_accu = det[merge_index, :]
 73 |         det = np.delete(det, merge_index, 0)
 74 | 
 75 |         if merge_index.shape[0] <= 1:
 76 |             continue
 77 |         det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
 78 |         max_score = np.max(det_accu[:, 4])
 79 |         det_accu_sum = np.zeros((1, 5))
 80 |         det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
 81 |         det_accu_sum[:, 4] = max_score
 82 |         try:
 83 |             dets = np.row_stack((dets, det_accu_sum))
 84 |         except:
 85 |             dets = det_accu_sum
 86 |     try:
 87 |         dets = dets[0:750, :]
 88 |     except:
 89 |         dets = det
 90 | 
 91 |     return dets
 92 | 
 93 | 
 94 | def detect_face(img, shrink):
 95 |     if shrink != 1:
 96 |         img = cv2.resize(img, None, None, fx=shrink, fy=shrink,
 97 |                          interpolation=cv2.INTER_LINEAR)
 98 | 
 99 |     if not MULTISCALETEST:
100 |         detections = face_detector(img, score_threshold=0.05, input_shape=(args.input_shape, args.input_shape))
101 |     else:
102 |         INPUT_SHAPE = (img.shape[0], img.shape[1])
103 |         detections = face_detector(img, score_threshold=0.05, input_shape=INPUT_SHAPE)
104 | 
105 |     det_xmin = detections[:, 0] / shrink
106 |     det_ymin = detections[:, 1] / shrink
107 |     det_xmax = detections[:, 2] / shrink
108 |     det_ymax = detections[:, 3] / shrink
109 |     det_conf = detections[:, 4]
110 |     det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
111 | 
112 |     return det
113 | 
114 | 
115 | def multi_scale_test(image, max_im_shrink):
116 |     # shrink detecting and shrink only detect big face
117 |     st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
118 |     det_s = detect_face(image, st)
119 |     index = np.where(np.maximum(
120 |         det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
121 |     det_s = det_s[index, :]
122 | 
123 |     # enlarge one times
124 |     bt = min(2, max_im_shrink) if max_im_shrink > 1 else (
125 |                                                                  st + max_im_shrink) / 2
126 |     det_b = detect_face(image, bt)
127 | 
128 |     # enlarge small image x times for small face
129 |     if max_im_shrink > 2:
130 |         bt *= 2
131 |         while bt < max_im_shrink:
132 |             det_b = np.row_stack((det_b, detect_face(image, bt)))
133 |             bt *= 2
134 |         det_b = np.row_stack((det_b, detect_face(image, max_im_shrink)))
135 | 
136 |     # enlarge only detect small face
137 |     if bt > 1:
138 |         index = np.where(np.minimum(
139 |             det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
140 |         det_b = det_b[index, :]
141 |     else:
142 |         index = np.where(np.maximum(
143 |             det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
144 |         det_b = det_b[index, :]
145 | 
146 |     return det_s, det_b
147 | 
148 | 
149 | def flip_test(image, shrink):
150 |     image_f = cv2.flip(image, 1)
151 |     det_f = detect_face(image_f, shrink)
152 | 
153 |     det_t = np.zeros(det_f.shape)
154 |     det_t[:, 0] = image.shape[1] - det_f[:, 2]
155 |     det_t[:, 1] = det_f[:, 1]
156 |     det_t[:, 2] = image.shape[1] - det_f[:, 0]
157 |     det_t[:, 3] = det_f[:, 3]
158 |     det_t[:, 4] = det_f[:, 4]
159 |     return det_t
160 | 
161 | 
162 | event_list, file_list, imgs_path, save_path = get_data()
163 | 
164 | for index, event in enumerate(event_list):
165 |     print(event)
166 |     filelist = file_list[index][0]
167 |     path = os.path.join(save_path, event[0][0])
168 |     if not os.path.exists(path):
169 |         os.makedirs(path)
170 | 
171 |     for num, file in enumerate(filelist):
172 |         im_name = file[0][0]
173 |         in_file = os.path.join(imgs_path, event[0][0], im_name[:] + '.jpg')
174 | 
175 |         image_array = cv2.imread(in_file)
176 |         img = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
177 | 
178 |         # max_im_shrink = (0x7fffffff / 200.0 / (img.shape[0] * img.shape[1])) ** 0.5
179 |         max_im_shrink = np.sqrt(
180 |             2000 * 2000 / (img.shape[0] * img.shape[1]))
181 |         max_im_shrink = 3 if max_im_shrink > 3 else max_im_shrink
182 | 
183 |         shrink = max_im_shrink if max_im_shrink < 1 else 1
184 | 
185 |         det0 = detect_face(img, shrink)
186 | 
187 |         ##flip det
188 |         det1 = flip_test(img, shrink)
189 | 
190 |         if MULTISCALETEST:
191 |             [det2, det3] = multi_scale_test(img, max_im_shrink)
192 |             det = np.row_stack((det0, det1, det2, det3))
193 |         else:
194 |             det = np.row_stack((det0, det1))
195 | 
196 |         dets = bbox_vote(det)
197 | 
198 |         if args.is_show:
199 |             for bbox in dets:
200 |                 if bbox[4] > 0.3:
201 |                     # cv2.circle(img_show,(p[0],p[1]),3,(0,0,213),-1)
202 |                     cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
203 |                                   (int(bbox[2]), int(bbox[3])), (255, 0, 0), 7)
204 |             cv2.imshow('tmp', img)
205 |             cv2.waitKey(0)
206 | 
207 |         fout = open(os.path.join(save_path, event[0][0], im_name + '.txt'), 'w')
208 |         fout.write('{:s}\n'.format(event[0][0] + '/' + im_name + '.jpg'))
209 |         fout.write('{:d}\n'.format(dets.shape[0]))
210 |         for i in range(dets.shape[0]):
211 |             xmin = dets[i][0]
212 |             ymin = dets[i][1]
213 |             xmax = dets[i][2]
214 |             ymax = dets[i][3]
215 |             score = dets[i][4]
216 |             fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.
217 |                        format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))
218 | fout.close()
219 | 
220 | 


--------------------------------------------------------------------------------
/lib/core/api/face_detector.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cv2
  4 | import time
  5 | import math
  6 | 
  7 | from train_config import config as cfg
  8 | 
  9 | 
 10 | 
 11 | class FaceDetector:
 12 |     def __init__(self, model_path):
 13 |         """
 14 |         Arguments:
 15 |             model_path: a string, path to a pb file.
 16 |         """
 17 |         self._graph = tf.Graph()
 18 | 
 19 |         with self._graph.as_default():
 20 |             self._graph, self._sess = self.init_model(model_path)
 21 | 
 22 | 
 23 |             self.input_image = tf.get_default_graph().get_tensor_by_name('tower_0/images:0')
 24 |             #self.training = tf.get_default_graph().get_tensor_by_name('training_flag:0')
 25 |             self.output_op=tf.get_default_graph().get_tensor_by_name('tower_0/detections:0')
 26 | 
 27 |             self.output_kps=tf.get_default_graph().get_tensor_by_name('tower_0/keypoints:0')
 28 | 
 29 |             self.wh = tf.get_default_graph().get_tensor_by_name('tower_0/wh:0')
 30 | 
 31 |     def __call__(self, image, score_threshold=0.5,input_shape=(cfg.DATA.hin,cfg.DATA.win),max_boxes=1000):
 32 |         """Detect faces.
 33 | 
 34 |         Arguments:
 35 |             image: a numpy uint8 array with shape [height, width, 3],
 36 |                 that represents a RGB image.
 37 |             score_threshold: a float number.
 38 |         Returns:
 39 |             boxes: a float numpy array of shape [num_faces, 5].
 40 | 
 41 |         """
 42 | 
 43 | 
 44 |         if input_shape is None:
 45 |             h, w, c = image.shape
 46 |             input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32)
 47 | 
 48 |         else:
 49 |             h, w = input_shape
 50 |             input_shape = (math.ceil(h / 32) * 32, math.ceil(w / 32) * 32)
 51 | 
 52 |         image, scale_x, scale_y, dx, dy = self.preprocess(image,
 53 |                                                                  target_height=input_shape[0],
 54 |                                                                  target_width=input_shape[1])
 55 | 
 56 | 
 57 |         if cfg.DATA.channel==1:
 58 |             image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
 59 |             image= np.expand_dims(image, -1)
 60 | 
 61 |         image_fornet = np.expand_dims(image, 0)
 62 | 
 63 |         outputs,kps,wh = self._sess.run(
 64 |             [self.output_op,self.output_kps,self.wh], feed_dict={self.input_image: image_fornet}
 65 |         )
 66 | 
 67 |         bboxes=outputs[0]
 68 | 
 69 |         # print(kps.shape)
 70 |         # kps=kps[0][:,:,0]
 71 |         #
 72 |         # label =kps
 73 |         # #label = (label / np.max(label) * 255).astype(np.uint8)
 74 |         # cv2.namedWindow('label', 0)
 75 |         # cv2.imshow('label', label)
 76 |         #
 77 |         # wh = wh[0][:, :, 0]
 78 |         #
 79 |         # print(np.min(wh))
 80 |         # print(np.max(wh))
 81 |         # wh = wh / np.max(wh)
 82 |         # wh = wh
 83 |         # # label = (label / np.max(label) * 255).astype(np.uint8)
 84 |         # cv2.namedWindow('wh', 0)
 85 |         # cv2.imshow('wh', wh)
 86 | 
 87 | 
 88 |         bboxes = self.py_nms(np.array(bboxes), iou_thres=None, score_thres=score_threshold,max_boxes=max_boxes)
 89 | 
 90 |         ###recorver to raw image
 91 |         boxes_scaler = np.array([1 / scale_x,
 92 |                                  1  / scale_y,
 93 |                                  1 / scale_x,
 94 |                                  1  / scale_y,
 95 |                                  1.,1.], dtype='float32')
 96 | 
 97 |         boxes_bias = np.array([dx ,
 98 |                                dy ,
 99 |                                dx ,
100 |                                dy , 0.,0.], dtype='float32')
101 |         bboxes = (bboxes - boxes_bias)*boxes_scaler
102 | 
103 | 
104 | 
105 |         # self.stats_graph(self._sess.graph)
106 |         return bboxes
107 | 
108 | 
109 |     def preprocess(self, image, target_height, target_width, label=None):
110 | 
111 |         ###sometimes use in objs detects
112 |         h, w, c = image.shape
113 | 
114 |         bimage = np.zeros(shape=[target_height, target_width, c], dtype=image.dtype)
115 | 
116 |         scale_y = target_height / h
117 |         scale_x = target_width / w
118 | 
119 |         scale = min(scale_x, scale_y)
120 | 
121 |         image = cv2.resize(image, None, fx=scale, fy=scale)
122 | 
123 |         h_, w_, _ = image.shape
124 | 
125 |         dx = (target_width - w_) // 2
126 |         dy = (target_height - h_) // 2
127 |         bimage[dy:h_ + dy, dx:w_ + dx, :] = image
128 | 
129 |         return bimage, scale, scale, dx, dy
130 | 
131 |     def py_nms(self, bboxes, iou_thres, score_thres, max_boxes=1000):
132 | 
133 |         upper_thres = np.where(bboxes[:, 4] > score_thres)[0]
134 | 
135 |         bboxes = bboxes[upper_thres]
136 |         if iou_thres is None:
137 |             return bboxes
138 | 
139 |         x1 = bboxes[:, 0]
140 |         y1 = bboxes[:, 1]
141 |         x2 = bboxes[:, 2]
142 |         y2 = bboxes[:, 3]
143 | 
144 |         order = np.argsort(bboxes[:, 4])[::-1]
145 | 
146 |         keep=[]
147 |         while order.shape[0] > 0:
148 |             if len(keep)>max_boxes:
149 |                 break
150 |             cur = order[0]
151 | 
152 |             keep.append(cur)
153 | 
154 |             area = (bboxes[cur, 2] - bboxes[cur, 0]) * (bboxes[cur, 3] - bboxes[cur, 1])
155 | 
156 |             x1_reain = x1[order[1:]]
157 |             y1_reain = y1[order[1:]]
158 |             x2_reain = x2[order[1:]]
159 |             y2_reain = y2[order[1:]]
160 | 
161 |             xx1 = np.maximum(bboxes[cur, 0], x1_reain)
162 |             yy1 = np.maximum(bboxes[cur, 1], y1_reain)
163 |             xx2 = np.minimum(bboxes[cur, 2], x2_reain)
164 |             yy2 = np.minimum(bboxes[cur, 3], y2_reain)
165 | 
166 |             intersection = np.maximum(0, yy2 - yy1) * np.maximum(0, xx2 - xx1)
167 | 
168 |             iou = intersection / (area + (y2_reain - y1_reain) * (x2_reain - x1_reain) - intersection)
169 | 
170 |             ##keep the low iou
171 |             low_iou_position = np.where(iou < iou_thres)[0]
172 | 
173 |             order = order[low_iou_position + 1]
174 | 
175 |         return bboxes[keep]
176 | 
177 |     def stats_graph(self,graph):
178 | 
179 | 
180 | 
181 |         flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation())
182 |         params = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
183 |         print(params)
184 |         print('FLOPs: {}M;    Trainable params: {}'.format(flops.total_float_ops/1024/1024., params.total_parameters))
185 | 
186 |     def init_model(self,args):
187 | 
188 |         if len(args) == 1:
189 |             use_pb = True
190 |             pb_path = args[0]
191 |         else:
192 |             use_pb = False
193 |             meta_path = args[0]
194 |             restore_model_path = args[1]
195 | 
196 |         def ini_ckpt():
197 |             graph = tf.Graph()
198 |             graph.as_default()
199 |             configProto = tf.ConfigProto()
200 |             configProto.gpu_options.allow_growth = True
201 |             sess = tf.Session(config=configProto)
202 |             # load_model(model_path, sess)
203 |             saver = tf.train.import_meta_graph(meta_path)
204 |             saver.restore(sess, restore_model_path)
205 | 
206 |             print("Model restred!")
207 |             return (graph, sess)
208 | 
209 |         def init_pb(model_path):
210 |             config = tf.ConfigProto()
211 |             config.gpu_options.per_process_gpu_memory_fraction = 0.5
212 |             compute_graph = tf.Graph()
213 |             compute_graph.as_default()
214 |             sess = tf.Session(config=config)
215 |             with tf.gfile.GFile(model_path, 'rb') as fid:
216 |                 graph_def = tf.GraphDef()
217 |                 graph_def.ParseFromString(fid.read())
218 |                 tf.import_graph_def(graph_def, name='')
219 | 
220 | 
221 |             # saver = tf.train.Saver(tf.global_variables())
222 |             # saver.save(sess, save_path='./tmp.ckpt')
223 |             return (compute_graph, sess)
224 | 
225 |         if use_pb:
226 |             model = init_pb(pb_path)
227 |         else:
228 |             model = ini_ckpt()
229 | 
230 |         graph = model[0]
231 |         sess = model[1]
232 | 
233 |         return graph, sess
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/lib/core/model/net/mobilenet/mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Implementation of Mobilenet V2.
 16 | Architecture: https://arxiv.org/abs/1801.04381
 17 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
 18 | 3.4 M parameters.
 19 | """
 20 | 
 21 | from __future__ import absolute_import
 22 | from __future__ import division
 23 | from __future__ import print_function
 24 | 
 25 | import copy
 26 | import functools
 27 | 
 28 | import tensorflow as tf
 29 | 
 30 | from lib.core.model.net.mobilenet import conv_blocks as ops
 31 | from lib.core.model.net.mobilenet import mobilenet as lib
 32 | 
 33 | slim = tf.contrib.slim
 34 | op = lib.op
 35 | 
 36 | expand_input = ops.expand_input_by_factor
 37 | 
 38 | # pyformat: disable
 39 | # Architecture: https://arxiv.org/abs/1801.04381
 40 | V2_DEF = dict(
 41 |     defaults={
 42 |         # Note: these parameters of batch norm affect the architecture
 43 |         # that's why they are here and not in training_scope.
 44 |         (slim.batch_norm,): {'center': True, 'scale': True},
 45 |         (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
 46 |             'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
 47 |         },
 48 |         (ops.expanded_conv,): {
 49 |             'expansion_size': expand_input(6),
 50 |             'split_expansion': 1,
 51 |             'normalizer_fn': slim.batch_norm,
 52 |             'residual': True
 53 |         },
 54 |         (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
 55 |     },
 56 |     spec=[
 57 |         op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
 58 |         op(ops.expanded_conv,
 59 |            expansion_size=expand_input(1, divisible_by=1),
 60 |            num_outputs=16),
 61 |         op(ops.expanded_conv, stride=2, num_outputs=24),
 62 |         op(ops.expanded_conv, stride=1, num_outputs=24),
 63 |         op(ops.expanded_conv, stride=2, num_outputs=32),
 64 |         op(ops.expanded_conv, stride=1, num_outputs=32),
 65 |         op(ops.expanded_conv, stride=1, num_outputs=32),
 66 |         op(ops.expanded_conv, stride=2, num_outputs=64),
 67 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 68 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 69 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 70 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 71 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 72 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 73 |         op(ops.expanded_conv, stride=2, num_outputs=160),
 74 |         op(ops.expanded_conv, stride=1, num_outputs=160),
 75 |         op(ops.expanded_conv, stride=1, num_outputs=160),
 76 |         op(ops.expanded_conv, stride=1, num_outputs=320),
 77 |         #op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
 78 |     ],
 79 | )
 80 | # pyformat: enable
 81 | 
 82 | 
 83 | @slim.add_arg_scope
 84 | def mobilenet(input_tensor,
 85 |               num_classes=1001,
 86 |               depth_multiplier=1.0,
 87 |               scope='MobilenetV2',
 88 |               conv_defs=None,
 89 |               finegrain_classification_mode=False,
 90 |               min_depth=None,
 91 |               divisible_by=None,
 92 |               activation_fn=None,
 93 |               **kwargs):
 94 |   """Creates mobilenet V2 network.
 95 |   Inference mode is created by default. To create training use training_scope
 96 |   below.
 97 |   with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
 98 |      logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
 99 |   Args:
100 |     input_tensor: The input tensor
101 |     num_classes: number of classes
102 |     depth_multiplier: The multiplier applied to scale number of
103 |     channels in each layer.
104 |     scope: Scope of the operator
105 |     conv_defs: Allows to override default conv def.
106 |     finegrain_classification_mode: When set to True, the model
107 |     will keep the last layer large even for small multipliers. Following
108 |     https://arxiv.org/abs/1801.04381
109 |     suggests that it improves performance for ImageNet-type of problems.
110 |       *Note* ignored if final_endpoint makes the builder exit earlier.
111 |     min_depth: If provided, will ensure that all layers will have that
112 |     many channels after application of depth multiplier.
113 |     divisible_by: If provided will ensure that all layers # channels
114 |     will be divisible by this number.
115 |     activation_fn: Activation function to use, defaults to tf.nn.relu6 if not
116 |       specified.
117 |     **kwargs: passed directly to mobilenet.mobilenet:
118 |       prediction_fn- what prediction function to use.
119 |       reuse-: whether to reuse variables (if reuse set to true, scope
120 |       must be given).
121 |   Returns:
122 |     logits/endpoints pair
123 |   Raises:
124 |     ValueError: On invalid arguments
125 |   """
126 |   if conv_defs is None:
127 |     conv_defs = V2_DEF
128 |   if 'multiplier' in kwargs:
129 |     raise ValueError('mobilenetv2 doesn\'t support generic '
130 |                      'multiplier parameter use "depth_multiplier" instead.')
131 |   if finegrain_classification_mode:
132 |     conv_defs = copy.deepcopy(conv_defs)
133 |     if depth_multiplier < 1:
134 |       conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
135 |   if activation_fn:
136 |     conv_defs = copy.deepcopy(conv_defs)
137 |     defaults = conv_defs['defaults']
138 |     conv_defaults = (
139 |         defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)])
140 |     conv_defaults['activation_fn'] = activation_fn
141 | 
142 |   depth_args = {}
143 |   # NB: do not set depth_args unless they are provided to avoid overriding
144 |   # whatever default depth_multiplier might have thanks to arg_scope.
145 |   if min_depth is not None:
146 |     depth_args['min_depth'] = min_depth
147 |   if divisible_by is not None:
148 |     depth_args['divisible_by'] = divisible_by
149 | 
150 |   with slim.arg_scope((lib.depth_multiplier,), **depth_args):
151 |     return lib.mobilenet(
152 |         input_tensor,
153 |         num_classes=num_classes,
154 |         conv_defs=conv_defs,
155 |         scope=scope,
156 |         multiplier=depth_multiplier,
157 |         **kwargs)
158 | 
159 | mobilenet.default_image_size = 224
160 | 
161 | 
162 | def wrapped_partial(func, *args, **kwargs):
163 |   partial_func = functools.partial(func, *args, **kwargs)
164 |   functools.update_wrapper(partial_func, func)
165 |   return partial_func
166 | 
167 | # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that
168 | # 'finegrain_classification_mode' is set to True, which means the embedding
169 | # layer will not be shrinked when given a depth-multiplier < 1.0.
170 | mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4)
171 | mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50,
172 |                                    finegrain_classification_mode=True)
173 | mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35,
174 |                                    finegrain_classification_mode=True)
175 | 
176 | mobilenet_v2_025 = wrapped_partial(mobilenet, depth_multiplier=0.25,
177 |                                    finegrain_classification_mode=True)
178 | 
179 | @slim.add_arg_scope
180 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
181 |   """Creates base of the mobilenet (no pooling and no logits) ."""
182 |   return mobilenet(input_tensor,
183 |                    depth_multiplier=depth_multiplier,
184 |                    base_only=True, **kwargs)
185 | 
186 | 
187 | def training_scope(**kwargs):
188 |   """Defines MobilenetV2 training scope.
189 |   Usage:
190 |      with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
191 |        logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
192 |   with slim.
193 |   Args:
194 |     **kwargs: Passed to mobilenet.training_scope. The following parameters
195 |     are supported:
196 |       weight_decay- The weight decay to use for regularizing the model.
197 |       stddev-  Standard deviation for initialization, if negative uses xavier.
198 |       dropout_keep_prob- dropout keep probability
199 |       bn_decay- decay for the batch norm moving averages.
200 |   Returns:
201 |     An `arg_scope` to use for the mobilenet v2 model.
202 |   """
203 |   return lib.training_scope(**kwargs)
204 | 
205 | 
206 | __all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF']


--------------------------------------------------------------------------------
/lib/dataset/augmentor/data_aug/bbox_util.py:
--------------------------------------------------------------------------------
  1 | import cv2 
  2 | import numpy as np
  3 | 
  4 | 
  5 | def draw_rect(im, cords, color = None):
  6 |     """Draw the rectangle on the image
  7 |     
  8 |     Parameters
  9 |     ----------
 10 |     
 11 |     im : numpy.ndarray
 12 |         numpy image 
 13 |     
 14 |     cords: numpy.ndarray
 15 |         Numpy array containing bounding boxes of shape `N X 4` where N is the 
 16 |         number of bounding boxes and the bounding boxes are represented in the
 17 |         format `x1 y1 x2 y2`
 18 |         
 19 |     Returns
 20 |     -------
 21 |     
 22 |     numpy.ndarray
 23 |         numpy image with bounding boxes drawn on it
 24 |         
 25 |     """
 26 |     
 27 |     im = im.copy()
 28 |     
 29 |     cords = cords[:,:4]
 30 |     cords = cords.reshape(-1,4)
 31 |     if not color:
 32 |         color = [255,255,255]
 33 |     for cord in cords:
 34 |         
 35 |         pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3])
 36 |                 
 37 |         pt1 = int(pt1[0]), int(pt1[1])
 38 |         pt2 = int(pt2[0]), int(pt2[1])
 39 |     
 40 |         im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200))
 41 |     return im
 42 | 
 43 | def bbox_area(bbox):
 44 |     return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1])
 45 |         
 46 | def clip_box(bbox, clip_box, alpha):
 47 |     """Clip the bounding boxes to the borders of an image
 48 |     
 49 |     Parameters
 50 |     ----------
 51 |     
 52 |     bbox: numpy.ndarray
 53 |         Numpy array containing bounding boxes of shape `N X 4` where N is the 
 54 |         number of bounding boxes and the bounding boxes are represented in the
 55 |         format `x1 y1 x2 y2`
 56 |     
 57 |     clip_box: numpy.ndarray
 58 |         An array of shape (4,) specifying the diagonal co-ordinates of the image
 59 |         The coordinates are represented in the format `x1 y1 x2 y2`
 60 |         
 61 |     alpha: float
 62 |         If the fraction of a bounding box left in the image after being clipped is 
 63 |         less than `alpha` the bounding box is dropped. 
 64 |     
 65 |     Returns
 66 |     -------
 67 |     
 68 |     numpy.ndarray
 69 |         Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the 
 70 |         number of bounding boxes left are being clipped and the bounding boxes are represented in the
 71 |         format `x1 y1 x2 y2` 
 72 |     
 73 |     """
 74 |     ar_ = (bbox_area(bbox))
 75 |     x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1)
 76 |     y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1)
 77 |     x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1)
 78 |     y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1)
 79 |     
 80 |     bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:]))
 81 |     
 82 |     delta_area = ((ar_ - bbox_area(bbox))/ar_)
 83 |     
 84 |     mask = (delta_area < (1 - alpha)).astype(int)
 85 |     
 86 |     bbox = bbox[mask == 1,:]
 87 | 
 88 | 
 89 |     return bbox
 90 | 
 91 | 
 92 | def rotate_im(image, angle):
 93 |     """Rotate the image.
 94 |     
 95 |     Rotate the image such that the rotated image is enclosed inside the tightest
 96 |     rectangle. The area not occupied by the pixels of the original image is colored
 97 |     black. 
 98 |     
 99 |     Parameters
100 |     ----------
101 |     
102 |     image : numpy.ndarray
103 |         numpy image
104 |     
105 |     angle : float
106 |         angle by which the image is to be rotated
107 |     
108 |     Returns
109 |     -------
110 |     
111 |     numpy.ndarray
112 |         Rotated Image
113 |     
114 |     """
115 |     # grab the dimensions of the image and then determine the
116 |     # centre
117 |     (h, w) = image.shape[:2]
118 |     (cX, cY) = (w // 2, h // 2)
119 | 
120 |     # grab the rotation matrix (applying the negative of the
121 |     # angle to rotate clockwise), then grab the sine and cosine
122 |     # (i.e., the rotation components of the matrix)
123 |     M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
124 |     cos = np.abs(M[0, 0])
125 |     sin = np.abs(M[0, 1])
126 | 
127 |     # compute the new bounding dimensions of the image
128 |     nW = int((h * sin) + (w * cos))
129 |     nH = int((h * cos) + (w * sin))
130 | 
131 |     # adjust the rotation matrix to take into account translation
132 |     M[0, 2] += (nW / 2) - cX
133 |     M[1, 2] += (nH / 2) - cY
134 | 
135 |     # perform the actual rotation and return the image
136 |     image = cv2.warpAffine(image, M, (nW, nH))
137 | 
138 | #    image = cv2.resize(image, (w,h))
139 |     return image
140 | 
141 | def get_corners(bboxes):
142 |     
143 |     """Get corners of bounding boxes
144 |     
145 |     Parameters
146 |     ----------
147 |     
148 |     bboxes: numpy.ndarray
149 |         Numpy array containing bounding boxes of shape `N X 4` where N is the 
150 |         number of bounding boxes and the bounding boxes are represented in the
151 |         format `x1 y1 x2 y2`
152 |     
153 |     returns
154 |     -------
155 |     
156 |     numpy.ndarray
157 |         Numpy array of shape `N x 8` containing N bounding boxes each described by their 
158 |         corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`      
159 |         
160 |     """
161 |     width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1)
162 |     height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1)
163 |     
164 |     x1 = bboxes[:,0].reshape(-1,1)
165 |     y1 = bboxes[:,1].reshape(-1,1)
166 |     
167 |     x2 = x1 + width
168 |     y2 = y1 
169 |     
170 |     x3 = x1
171 |     y3 = y1 + height
172 |     
173 |     x4 = bboxes[:,2].reshape(-1,1)
174 |     y4 = bboxes[:,3].reshape(-1,1)
175 |     
176 |     corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
177 |     
178 |     return corners
179 | 
180 | def rotate_box(corners,angle,  cx, cy, h, w):
181 |     
182 |     """Rotate the bounding box.
183 |     
184 |     
185 |     Parameters
186 |     ----------
187 |     
188 |     corners : numpy.ndarray
189 |         Numpy array of shape `N x 8` containing N bounding boxes each described by their 
190 |         corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
191 |     
192 |     angle : float
193 |         angle by which the image is to be rotated
194 |         
195 |     cx : int
196 |         x coordinate of the center of image (about which the box will be rotated)
197 |         
198 |     cy : int
199 |         y coordinate of the center of image (about which the box will be rotated)
200 |         
201 |     h : int 
202 |         height of the image
203 |         
204 |     w : int 
205 |         width of the image
206 |     
207 |     Returns
208 |     -------
209 |     
210 |     numpy.ndarray
211 |         Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their 
212 |         corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
213 |     """
214 | 
215 |     corners = corners.reshape(-1,2)
216 |     corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0]))))
217 |     
218 |     M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
219 |     
220 |     
221 |     cos = np.abs(M[0, 0])
222 |     sin = np.abs(M[0, 1])
223 |     
224 |     nW = int((h * sin) + (w * cos))
225 |     nH = int((h * cos) + (w * sin))
226 |     # adjust the rotation matrix to take into account translation
227 |     M[0, 2] += (nW / 2) - cx
228 |     M[1, 2] += (nH / 2) - cy
229 |     # Prepare the vector to be transformed
230 |     calculated = np.dot(M,corners.T).T
231 |     
232 |     calculated = calculated.reshape(-1,8)
233 |     
234 |     return calculated
235 | 
236 | 
237 | def get_enclosing_box(corners):
238 |     """Get an enclosing box for ratated corners of a bounding box
239 |     
240 |     Parameters
241 |     ----------
242 |     
243 |     corners : numpy.ndarray
244 |         Numpy array of shape `N x 8` containing N bounding boxes each described by their 
245 |         corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`  
246 |     
247 |     Returns 
248 |     -------
249 |     
250 |     numpy.ndarray
251 |         Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the 
252 |         number of bounding boxes and the bounding boxes are represented in the
253 |         format `x1 y1 x2 y2`
254 |         
255 |     """
256 |     x_ = corners[:,[0,2,4,6]]
257 |     y_ = corners[:,[1,3,5,7]]
258 |     
259 |     xmin = np.min(x_,1).reshape(-1,1)
260 |     ymin = np.min(y_,1).reshape(-1,1)
261 |     xmax = np.max(x_,1).reshape(-1,1)
262 |     ymax = np.max(y_,1).reshape(-1,1)
263 |     
264 |     final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:]))
265 |     
266 |     return final
267 | 
268 | 
269 | def letterbox_image(img, inp_dim):
270 |     '''resize image with unchanged aspect ratio using padding
271 |     
272 |     Parameters
273 |     ----------
274 |     
275 |     img : numpy.ndarray
276 |         Image 
277 |     
278 |     inp_dim: tuple(int)
279 |         shape of the reszied image
280 |         
281 |     Returns
282 |     -------
283 |     
284 |     numpy.ndarray:
285 |         Resized image
286 |     
287 |     '''
288 | 
289 |     inp_dim = (inp_dim, inp_dim)
290 |     img_w, img_h = img.shape[1], img.shape[0]
291 |     w, h = inp_dim
292 |     new_w = int(img_w * min(w/img_w, h/img_h))
293 |     new_h = int(img_h * min(w/img_w, h/img_h))
294 |     resized_image = cv2.resize(img, (new_w,new_h))
295 |     
296 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
297 | 
298 |     canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
299 |     
300 |     return canvas


--------------------------------------------------------------------------------
/lib/core/model/loss/centernet_loss.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow.contrib.slim as slim
  6 | from tensorflow.python.ops import array_ops
  7 | 
  8 | from train_config import config as cfg
  9 | 
 10 | from lib.core.model.loss.iouloss import *
 11 | 
 12 | def loss(predicts,targets):
 13 |     pred_hm, pred_wh=predicts
 14 |     hm_target, wh_target,weights_=targets
 15 | 
 16 |     with tf.name_scope('losses'):
 17 |         # whether anchor is matched
 18 |         # shape [batch_size, num_anchors]
 19 | 
 20 |         with tf.name_scope('classification_loss'):
 21 |             hm_loss = focal_loss(
 22 |                 pred_hm,
 23 |                 hm_target
 24 |             )
 25 | 
 26 | 
 27 | 
 28 |         with tf.name_scope('iou_loss'):
 29 |             H, W = tf.shape(pred_hm)[1],tf.shape(pred_hm)[2]
 30 | 
 31 |             weights_=tf.transpose(weights_,perm=[0,3,1,2])
 32 |             mask = tf.reshape(weights_,shape=(-1, H, W))
 33 |             avg_factor = tf.reduce_sum(mask) + 1e-4
 34 | 
 35 |             base_step = cfg.MODEL.global_stride
 36 |             shifts_x = tf.range(0, (W - 1) * base_step + 1, base_step,
 37 |                                     dtype=tf.int32)
 38 |             shifts_x=tf.cast(shifts_x,dtype=tf.float32)
 39 |             shifts_y = tf.range(0, (H - 1) * base_step + 1, base_step,
 40 |                                     dtype=tf.int32)
 41 |             shifts_y = tf.cast(shifts_y, dtype=tf.float32)
 42 | 
 43 |             x_range, y_range = tf.meshgrid(shifts_x, shifts_y)
 44 | 
 45 |             base_loc = tf.stack((x_range, y_range), axis=2)  # (2, h, w)
 46 | 
 47 |             base_loc = tf.expand_dims(base_loc, axis=0)
 48 | 
 49 |             pred_boxes = tf.concat((base_loc[:,:,:,0:1] - pred_wh[:,:,:, 0:1],
 50 |                                     base_loc[:,:,:,1:2] - pred_wh[:,:,:, 1:2],
 51 |                                     base_loc[:,:,:,0:1] + pred_wh[:,:,:, 2:3],
 52 |                                     base_loc[:,:,:,1:2] + pred_wh[:,:,:, 3:4]), axis=3)
 53 | 
 54 |             # (batch, h, w, 4)
 55 |             boxes = wh_target#.permute(0, 2, 3, 1)
 56 | 
 57 |             wh_loss = ciou_loss(pred_boxes, boxes, mask, avg_factor=avg_factor)
 58 | 
 59 |         return hm_loss, wh_loss*5
 60 | 
 61 | def _reg_l1_loss(pred,
 62 |               target,
 63 |               weight,
 64 |               avg_factor=None):
 65 |     pos_mask = weight > 0
 66 |     weight = tf.cast(weight[pos_mask], tf.float32)
 67 |     if avg_factor is None:
 68 |         avg_factor = tf.reduce_sum(pos_mask) + 1e-6
 69 |     bboxes1 = tf.reshape(pred[pos_mask], (-1, 4))
 70 |     bboxes2 = tf.reshape(target[pos_mask], (-1, 4))
 71 | 
 72 | 
 73 |     loss=tf.reduce_mean(tf.abs(bboxes1-bboxes2),axis=1)
 74 |     return tf.reduce_sum(loss * weight) / avg_factor
 75 | 
 76 | 
 77 | def classification_loss(predictions, targets):
 78 |     """
 79 |     Arguments:
 80 |         predictions: a float tensor with shape [batch_size, num_anchors, num_classes + 1],
 81 |             representing the predicted logits for each class.
 82 |         targets: an int tensor with shape [batch_size, num_anchors].
 83 |     Returns:
 84 |         a float tensor with shape [batch_size, num_anchors].
 85 |     """
 86 | 
 87 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 88 |         labels=targets, logits=predictions
 89 |     )
 90 |     return cross_entropy
 91 | 
 92 | 
 93 | def localization_loss(predictions, targets, indices, mask,sigma=9):
 94 |     """A usual L1 smooth loss.
 95 | 
 96 |     Arguments:
 97 |         predictions: a float tensor with shape [batch_size, num_anchors, 4],
 98 |             representing the (encoded) predicted locations of objects.
 99 |         targets: a float tensor with shape [batch_size, num_anchors, 4],
100 |             representing the regression targets.
101 |         weights: a float tensor with shape [batch_size, num_anchors].
102 |     Returns:
103 |         a float tensor with shape [batch_size, num_anchors].
104 |     """
105 | 
106 |     indices = tf.where(tf.greater(targets, 0.))
107 |     predictions = tf.gather_nd(predictions, indices)
108 |     targets = tf.gather_nd(targets, indices)
109 | 
110 | 
111 |     abs_diff = tf.abs(predictions - targets)
112 |     abs_diff_lt_1 = tf.less(abs_diff, 1.0/sigma)
113 | 
114 |     # compute the normalizer: the number of positive anchors
115 |     normalizer = tf.maximum(1, tf.shape(indices)[0])
116 |     normalizer = tf.cast(normalizer, dtype=tf.float32)
117 | 
118 |     return  tf.reduce_sum(tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5/sigma))/normalizer
119 | 
120 | def reg_l1_loss(y_pred, y_true, indices, mask):
121 |     b = tf.shape(y_pred)[0]
122 |     k = tf.shape(indices)[1]
123 |     c = tf.shape(y_pred)[-1]
124 |     y_pred = tf.reshape(y_pred, (b, -1, c))
125 |     indices = tf.cast(indices, tf.int32)
126 |     y_pred = tf.gather(y_pred, indices, batch_dims=1)
127 |     mask = tf.tile(tf.expand_dims(mask, axis=-1), (1, 1, 2))
128 |     total_loss = tf.reduce_sum(tf.abs(y_true * mask - y_pred * mask))
129 |     reg_loss = total_loss / (tf.reduce_sum(mask) + 1e-4)
130 |     return reg_loss
131 | 
132 | 
133 | 
134 | # def focal_loss(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2):
135 | #     r"""Compute focal loss for predictions.
136 | #         Multi-labels Focal loss formula:
137 | #             FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
138 | #                  ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
139 | #     Args:
140 | #      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
141 | #         num_classes] representing the predicted logits for each class
142 | #      target_tensor: A float tensor of shape [batch_size, num_anchors,
143 | #         num_classes] representing one-hot encoded classification targets
144 | #      weights: A float tensor of shape [batch_size, num_anchors]
145 | #      alpha: A scalar tensor for focal loss alpha hyper-parameter
146 | #      gamma: A scalar tensor for focal loss gamma hyper-parameter
147 | #     Returns:
148 | #         loss: A (scalar) tensor representing the value of the loss function
149 | #     """
150 | #
151 | #
152 | #     sigmoid_p = tf.nn.sigmoid(prediction_tensor)
153 | #     zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
154 | #
155 | #     # For poitive prediction, only need consider front part loss, back part is 0;
156 | #     # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
157 | #     pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
158 | #
159 | #     # For negative prediction, only need consider back part loss, front part is 0;
160 | #     # target_tensor > zeros <=> z=1, so negative coefficient = 0.
161 | #     neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
162 | #     per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
163 | #                           - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
164 | #
165 | #
166 | #     # compute the normalizer: the number of positive anchors
167 | #     # normalizer = tf.where(tf.greater(target_tensor, 0))
168 | #     # normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32)
169 | #     # normalizer = tf.maximum(1., normalizer)
170 | #
171 | #
172 | #     return tf.reduce_sum(per_entry_cross_ent)
173 | 
174 | 
175 | def focal_loss(pred, gt):
176 |     ''' Modified focal loss. Exactly the same as CornerNet.
177 |         Runs faster and costs a little bit more memory
178 |       Arguments:
179 |         pred (batch,h,w,c)
180 |         gt_regr (batch,h,w,c)
181 |     '''
182 |     pos_inds = tf.cast(tf.equal(gt, 1.0), dtype=tf.float32)
183 |     neg_inds = 1.0 - pos_inds
184 |     neg_weights = tf.pow(1.0 - gt, 4.0)
185 | 
186 |     pred = tf.clip_by_value(pred, 1e-6, 1.0 - 1e-6)
187 |     pos_loss = tf.log(pred) * tf.pow(1.0 - pred, 2.0) * pos_inds
188 |     neg_loss = tf.log(1.0 - pred) * tf.pow(pred, 2.0) * neg_weights * neg_inds
189 | 
190 |     num_pos = tf.reduce_sum(pos_inds)
191 |     pos_loss = tf.reduce_sum(pos_loss)
192 |     neg_loss = tf.reduce_sum(neg_loss)
193 | 
194 |     normalizer = tf.maximum(1., num_pos)
195 |     loss = - (pos_loss + neg_loss) / normalizer
196 | 
197 |     return loss
198 | 
199 | 
200 | def ohem_loss(logits, targets, weights):
201 | 
202 | 
203 |     indices = tf.where(tf.not_equal(weights, -1))
204 |     targets = tf.gather_nd(targets, indices)
205 |     logits = tf.gather_nd(logits, indices)
206 | 
207 | 
208 |     logits=tf.reshape(logits,shape=[-1,cfg.DATA.num_class])
209 |     targets = tf.reshape(targets, shape=[-1])
210 | 
211 |     weights=tf.reshape(weights,shape=[-1])
212 | 
213 | 
214 |     dtype = logits.dtype
215 | 
216 |     pmask = weights
217 |     fpmask = tf.cast(pmask, dtype)
218 |     n_positives = tf.reduce_sum(fpmask)
219 | 
220 | 
221 |     no_classes = tf.cast(pmask, tf.int32)
222 | 
223 |     predictions = slim.softmax(logits)
224 | 
225 | 
226 |     nmask = tf.logical_not(tf.cast(pmask,tf.bool))
227 | 
228 |     fnmask = tf.cast(nmask, dtype)
229 | 
230 |     nvalues = tf.where(nmask,
231 |                        predictions[:, 0],
232 |                        1. - fnmask)
233 |     nvalues_flat = tf.reshape(nvalues, [-1])
234 |     # Number of negative entries to select.
235 |     max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
236 |     n_neg = tf.cast(cfg.MODEL.max_negatives_per_positive * n_positives, tf.int32) + cfg.TRAIN.batch_size
237 | 
238 |     n_neg = tf.minimum(n_neg, max_neg_entries)
239 | 
240 |     val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
241 |     max_hard_pred = -val[-1]
242 |     # Final negative mask.
243 |     nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
244 |     fnmask = tf.cast(nmask, dtype)
245 | 
246 |     # Add cross-entropy loss.
247 |     with tf.name_scope('cross_entropy_pos'):
248 |         loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
249 |                                                               labels=targets)
250 | 
251 |         neg_loss = tf.reduce_sum(loss * fpmask)
252 | 
253 |     with tf.name_scope('cross_entropy_neg'):
254 |         loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
255 |                                                               labels=no_classes)
256 |         pos_loss = tf.reduce_sum(loss * fnmask)
257 | 
258 |     # compute the normalizer: the number of positive anchors
259 |     normalizer = tf.where(tf.equal(weights, 1))
260 |     normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32)
261 |     normalizer = tf.maximum(1., normalizer)
262 | 
263 |     return (neg_loss+pos_loss)/normalizer
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 


--------------------------------------------------------------------------------
/lib/core/anchor/anchor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | 
  5 | import sys
  6 | sys.path.append('.')
  7 | 
  8 | 
  9 | from lib.core.anchor.box_utils import encode,np_iou
 10 | 
 11 | from train_config import config as cfg
 12 | 
 13 | 
 14 | 
 15 | class CellAnchor():
 16 | 
 17 |     def __init__(self):
 18 |       pass
 19 | 
 20 |     @classmethod
 21 |     def generate_cell_anchor(self,base_size=16,ratios=[0.5,1.,2.],scales=2**np.arange(3,6),rect=cfg.ANCHOR.rect):
 22 |         base_anchor = np.array([1, 1, base_size, base_size]) - 1
 23 |         anchors_in_ratios = self.make_anchor_in_ratios(base_anchor, ratios, rect)
 24 |         anchors_in_scales = self.make_anchor_in_sclaes(anchors_in_ratios, scales)
 25 |         return anchors_in_scales
 26 | 
 27 |     @classmethod
 28 |     def _to_whxy(self,anchors):
 29 |         w=anchors[2]-anchors[0]+1
 30 |         h=anchors[3]-anchors[1]+1
 31 | 
 32 |         x=anchors[0]+(w-1)/2
 33 |         y=anchors[1]+(h-1)/2
 34 |         return w,h,x,y
 35 | 
 36 |     @classmethod
 37 |     def _to_xyxy(self,w,h,x,y):
 38 | 
 39 |         x0=x-(w-1)/2
 40 |         y0=y-(h-1)/2
 41 |         x1=x+(w-1)/2
 42 |         y1 = y + (h-1) / 2
 43 | 
 44 |         return np.stack((x0,y0,x1,y1),axis=-1)
 45 | 
 46 |     @classmethod
 47 |     def make_anchor_in_ratios(self,base_anchor,ratios,rect=False):
 48 | 
 49 |         anchors_in_ratios=[]
 50 |         w,h,x,y=self._to_whxy(base_anchor)
 51 |         area=w*h
 52 | 
 53 |         for ratio in ratios:
 54 | 
 55 |             ### choose the face anchor ratio h/w ==1.5 or 1
 56 |             if rect:
 57 |                 w=h=np.round(np.sqrt(area/ratio))
 58 |                 if cfg.ANCHOR.rect_longer:
 59 |                     h=np.round(1.5*w)
 60 |             else:
 61 |                 w=np.round(np.sqrt(area/ratio))
 62 |                 h=np.round(ratio*w)
 63 | 
 64 |             anchors_in_ratios.append(self._to_xyxy(w,h,x,y))
 65 | 
 66 | 
 67 |         return np.array(anchors_in_ratios)
 68 | 
 69 |     @classmethod
 70 |     def make_anchor_in_sclaes(self,anchors,scales):
 71 |         anchors_res=[]
 72 | 
 73 |         for anchor in anchors:
 74 |             w,h,x,y=self._to_whxy(anchor)
 75 |             w=w*scales
 76 |             h=h*scales
 77 |             anchors_sclase=self._to_xyxy(w,h,x,y)
 78 |             anchors_res.append(anchors_sclase)
 79 |         return np.array(anchors_res).reshape([-1,4])
 80 | 
 81 | class Anchor():
 82 | 
 83 |     def __init__(self):
 84 | 
 85 |         self.strides=cfg.ANCHOR.ANCHOR_STRIDES
 86 |         self.sizes = cfg.ANCHOR.ANCHOR_SIZES
 87 | 
 88 |         self.ratios=cfg.ANCHOR.ANCHOR_RATIOS
 89 |         self.scales=cfg.ANCHOR.ANCHOR_SCALES
 90 | 
 91 |         self.max_size=cfg.DATA.max_size     ##use to calculate the anchor
 92 | 
 93 |         self.anchors=self.produce_anchors()
 94 | 
 95 |         self.decode_anchors=self.get_decode_anchor()
 96 | 
 97 |     def produce_anchors(self):
 98 |         anchors_per_level = self.get_all_anchors_fpn()
 99 |         flatten_anchors_per_level = [k.reshape((-1, 4)) for k in anchors_per_level]
100 |         all_anchors_flatten = np.concatenate(flatten_anchors_per_level, axis=0)
101 |         return  all_anchors_flatten
102 | 
103 |     def get_all_anchors(self,stride=None, sizes=None):
104 |         """
105 |         Get all anchors in the largest possible image, shifted, floatbox
106 |         Args:
107 |             stride (int): the stride of anchors.
108 |             sizes (tuple[int]): the sizes (sqrt area) of anchors
109 | 
110 |         Returns:
111 |             anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
112 |             The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
113 | 
114 |         """
115 | 
116 |         # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
117 |         # are centered on stride / 2, have (approximate) sqrt areas of the specified
118 |         # sizes, and aspect ratios as given.
119 |         cell_anchors = CellAnchor.generate_cell_anchor(
120 |             stride,
121 |             scales=np.array(sizes, dtype=np.float) / stride,
122 |             ratios=np.array(self.ratios, dtype=np.float))
123 |         # anchors are intbox here.
124 |         # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
125 | 
126 |         field_size_y = int(np.ceil(self.max_size[0] / stride))
127 |         field_size_x = int(np.ceil(self.max_size[1] / stride))
128 | 
129 |         shifts_x = np.arange(0, field_size_x) * stride
130 |         shifts_y = np.arange(0, field_size_y) * stride
131 |         shift_x, shift_y = np.meshgrid(shifts_x, shifts_y)
132 |         shift_x = shift_x.flatten()
133 |         shift_y = shift_y.flatten()
134 |         shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
135 |         # Kx4, K = field_size * field_size
136 |         K = shifts.shape[0]
137 | 
138 |         A = cell_anchors.shape[0]
139 |         field_of_anchors = (
140 |                 cell_anchors.reshape((1, A, 4)) +
141 |                 shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
142 |         field_of_anchors = field_of_anchors.reshape((field_size_y, field_size_x, A, 4))
143 |         # FSxFSxAx4
144 |         # Many rounding happens inside the anchor code anyway
145 |         # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
146 |         field_of_anchors = field_of_anchors.astype('float32')
147 |         field_of_anchors[:, :, :, [2, 3]] += 1
148 |         return field_of_anchors
149 | 
150 |     def get_all_anchors_fpn(self):
151 |         """
152 |         Returns:
153 |             [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
154 |         """
155 |         strides =self.strides
156 |         sizes = self.sizes
157 | 
158 |         assert len(strides) == len(sizes)
159 |         foas = []
160 |         for stride, size in zip(strides, sizes):
161 |             sizes_=size*np.array(self.scales)
162 |             foa = self.get_all_anchors(stride=stride, sizes=sizes_)
163 |             foas.append(foa)
164 | 
165 | 
166 |         return foas
167 | 
168 |     def produce_target(self, boxes, labels):
169 |         boxes = boxes.copy()
170 | 
171 |         all_anchors_flatten =self.anchors
172 | 
173 |         # inside_ind, inside_anchors = filter_boxes_inside_shape(all_anchors_flatten, image.shape[:2])
174 |         inside_anchors = all_anchors_flatten
175 | 
176 |         # obtain anchor labels and their corresponding gt boxes
177 |         anchor_labels, anchor_gt_boxes = self.get_anchor_labels(inside_anchors, boxes, labels)
178 | 
179 |         # start = 0
180 |         # multilevel_inputs = []
181 |         # for level_anchor in anchors_per_level:
182 |         #     assert level_anchor.shape[2] == len(cfg.ANCHOR.ANCHOR_RATIOS)
183 |         #     anchor_shape = level_anchor.shape[:3]   # fHxfWxNUM_ANCHOR_RATIOS
184 |         #     num_anchor_this_level = np.prod(anchor_shape)
185 |         #     end = start + num_anchor_this_level
186 |         #     multilevel_inputs.append(
187 |         #         (all_labels[start: end].reshape(anchor_shape),
188 |         #          all_boxes[start: end, :].reshape(anchor_shape + (4,))
189 |         #          ))
190 |         #     start = end
191 |         # assert end == num_all_anchors, "{} != {}".format(end, num_all_anchors)
192 |         # return multilevel_inputs
193 |         return anchor_gt_boxes, anchor_labels
194 | 
195 |     def get_anchor_labels(self,anchors, gt_boxes, labels):
196 |         # This function will modify labels and return the filtered inds
197 | 
198 |         NA, NB = len(anchors), len(gt_boxes)
199 |         assert NB > 0  # empty images should have been filtered already
200 |         # ##########
201 | 
202 |         anchor_state= np.zeros((NA,), dtype='int32')-1
203 | 
204 |         anchor_labels = np.zeros((NA,), dtype='int32')
205 |         anchor_boxes = np.zeros((NA, 4), dtype='float32')
206 | 
207 |         box_ious = np_iou(anchors, gt_boxes)  # NA x NB
208 | 
209 |         # for each anchor box choose the groundtruth box with largest iou, set iou<0.4 as backgroud, ignore 0.4-0.5
210 |         max_iou = box_ious.max(axis=1)  # NA
211 | 
212 |         positive_anchor_indices = np.where(max_iou > cfg.ANCHOR.POSITIVE_ANCHOR_THRESH)[0]
213 |         negative_anchor_indices = np.where(max_iou < cfg.ANCHOR.NEGATIVE_ANCHOR_THRESH)[0]
214 | 
215 |         positive_iou = box_ious[positive_anchor_indices]
216 |         matched_gt_box_indices = positive_iou.argmax(axis=1)
217 | 
218 |         anchor_labels[positive_anchor_indices] = labels[matched_gt_box_indices]
219 |         anchor_state[positive_anchor_indices]=1
220 |         anchor_boxes[positive_anchor_indices] = gt_boxes[matched_gt_box_indices]
221 | 
222 |         anchor_state[negative_anchor_indices] = 0
223 | 
224 |         fg_boxes = anchor_boxes[anchor_state==1]
225 | 
226 |         matched_anchors = anchors[anchor_state==1]
227 | 
228 |         ##select and normlised the box coordinate
229 |         fg_boxes[:,0::2] = fg_boxes[:,0::2] / self.max_size[1]
230 |         fg_boxes[:, 1::2] = fg_boxes[:, 1::2] / self.max_size[0]
231 | 
232 |         matched_anchors[:,0::2] = matched_anchors[:,0::2] / self.max_size[1]
233 |         matched_anchors[:, 1::2] = matched_anchors[:, 1::2] / self.max_size[0]
234 | 
235 | 
236 | 
237 |         encode_fg_boxes = encode(fg_boxes, matched_anchors)
238 |         anchor_boxes[anchor_state==1] = encode_fg_boxes
239 | 
240 |         anchor_labels=np.stack([anchor_labels,anchor_state])
241 | 
242 |         #
243 |         return anchor_labels, anchor_boxes
244 | 
245 | 
246 | 
247 |     def reset_anchors(self,max_size=(512,512)):
248 |         '''
249 | 
250 |         :param max_size: h,w
251 |         :return:
252 |         '''
253 |         self.max_size=max_size
254 | 
255 |         self.anchors = self.produce_anchors()
256 | 
257 |         self.decode_anchors = self.get_decode_anchor()
258 | 
259 | 
260 |     def get_decode_anchor(self):
261 |         '''
262 | 
263 |         :return: the anchor decode [w,h,w,h]
264 | 
265 |         the right way to decode the bbox is   res *[w,h,w,h]+ raw_anchor,  which means   anchors_bias +anchors
266 |         '''
267 |         anchor_widths = self.anchors[:, 2] - self.anchors[:, 0]
268 |         anchor_heights = self.anchors[:, 3] - self.anchors[:, 1]
269 |         tmp_anchor_details = np.stack([anchor_widths, anchor_heights, anchor_widths, anchor_heights], axis=1)
270 | 
271 |         return tmp_anchor_details
272 | 
273 | 
274 | 
275 | anchor_tools=Anchor()
276 | 
277 | if __name__=='__main__':
278 |     ##model_eval the  anchor codes there
279 |     import cv2
280 | 
281 |     cell_anchor = CellAnchor.generate_cell_anchor()
282 |     print(cell_anchor)
283 | 
284 | 
285 |     anchor_maker=Anchor()
286 | 
287 |     all_anchor= anchor_maker.anchors
288 |     print(len(all_anchor))
289 |     image=np.ones(shape=[cfg.DATA.max_size[0],cfg.DATA.max_size[1],3])*255
290 | 
291 |     # for x in anchors:
292 |     #     print(x.shape)
293 | 
294 |     anchors=np.array(all_anchor)
295 |     # cv2.namedWindow('anchors', 0)
296 |     # for i in range(10000,anchors.shape[0]):
297 |     #     box=anchors[i]
298 |     #     print(box[2]-box[0])
299 |     #     cv2.rectangle(image, (int(box[0]), int(box[1])),
300 |     #                   (int(box[2]), int(box[3])), (255, 0, 0), 1)
301 |     #
302 |     #
303 |     #     cv2.imshow('anchors',image)
304 |     #     cv2.waitKey(0)
305 | 
306 |     anchor_labels, anchor_boxes=anchor_maker.produce_target(np.array([[34., 396.,  58., 508.],[20,140,50,160]]),np.array([1,1]))
307 | 
308 | 
309 | 
310 | 
311 | 
312 | 
313 | 
314 | 
315 | 


--------------------------------------------------------------------------------
/lib/dataset/centernet_data_sampler.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | import numpy as np
  3 | import math
  4 | import cv2
  5 | 
  6 | from train_config import config as cfg
  7 | 
  8 | def gaussian_radius(det_size, min_overlap=cfg.MODEL.min_overlap):
  9 |   height, width = det_size
 10 | 
 11 |   a1  = 1
 12 |   b1  = (height + width)
 13 |   c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
 14 |   sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
 15 |   r1  = (b1 + sq1) / 2
 16 | 
 17 |   a2  = 4
 18 |   b2  = 2 * (height + width)
 19 |   c2  = (1 - min_overlap) * width * height
 20 |   sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
 21 |   r2  = (b2 + sq2) / 2
 22 | 
 23 |   a3  = 4 * min_overlap
 24 |   b3  = -2 * min_overlap * (height + width)
 25 |   c3  = (min_overlap - 1) * width * height
 26 |   sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
 27 |   r3  = (b3 + sq3) / 2
 28 |   return min(r1, r2, r3)
 29 | 
 30 | def draw_umich_gaussian(heatmap, center, radius, k=1):
 31 |     diameter = 2 * radius + 1
 32 |     gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
 33 | 
 34 |     x, y = int(center[0]), int(center[1])
 35 | 
 36 |     height, width = heatmap.shape[0:2]
 37 | 
 38 |     left, right = min(x, radius), min(width - x, radius + 1)
 39 |     top, bottom = min(y, radius), min(height - y, radius + 1)
 40 | 
 41 |     masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
 42 |     masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
 43 |     if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:  # TODO debug
 44 |         np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
 45 |     return heatmap
 46 | def draw_msra_gaussian(heatmap, center, sigma):
 47 |   #heatmap=np.transpose(heatmap,axes=[1,0])
 48 |   tmp_size = sigma * 3
 49 |   mu_x = int(center[0] + 0.5)
 50 |   mu_y = int(center[1] + 0.5)
 51 |   w, h = heatmap.shape[0], heatmap.shape[1]
 52 |   ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
 53 |   br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
 54 |   if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
 55 |     return heatmap
 56 |   size = 2 * tmp_size + 1
 57 |   x = np.arange(0, size, 1, np.float32)
 58 |   y = x[:, np.newaxis]
 59 |   x0 = y0 = size // 2
 60 |   g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
 61 |   g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
 62 |   g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
 63 |   img_x = max(0, ul[0]), min(br[0], h)
 64 |   img_y = max(0, ul[1]), min(br[1], w)
 65 |   heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
 66 |     heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
 67 |     g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
 68 |   #heatmap = np.transpose(heatmap, axes=[1, 0])
 69 |   return heatmap
 70 | 
 71 | def gaussian2D(shape, sigma=1):
 72 |     m, n = [(ss - 1.) / 2. for ss in shape]
 73 |     y, x = np.ogrid[-m:m+1,-n:n+1]
 74 | 
 75 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
 76 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
 77 |     return h
 78 | 
 79 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
 80 |     diameter = 2 * radius + 1
 81 |     gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
 82 |     value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
 83 |     dim = value.shape[0]
 84 |     reg = np.ones((dim, diameter * 2 + 1, diameter * 2 + 1), dtype=np.float32) * value
 85 |     if is_offset and dim == 2:
 86 |         delta = np.arange(diameter * 2 + 1) - radius
 87 |         reg[0] = reg[0] - delta.reshape(1, -1)
 88 |         reg[1] = reg[1] - delta.reshape(-1, 1)
 89 | 
 90 |     x, y = int(center[0]), int(center[1])
 91 | 
 92 |     height, width = heatmap.shape[0:2]
 93 | 
 94 |     left, right = min(x, radius), min(width - x, radius + 1)
 95 |     top, bottom = min(y, radius), min(height - y, radius + 1)
 96 | 
 97 |     masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
 98 |     masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
 99 |     masked_gaussian = gaussian[radius - top:radius + bottom,
100 |                       radius - left:radius + right]
101 |     masked_reg = reg[:, radius - top:radius + bottom,
102 |                  radius - left:radius + right]
103 |     if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:  # TODO debug
104 |         idx = (masked_gaussian >= masked_heatmap).reshape(
105 |             1, masked_gaussian.shape[0], masked_gaussian.shape[1])
106 |         masked_regmap = (1 - idx) * masked_regmap + idx * masked_reg
107 |     regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
108 |     return regmap
109 | 
110 | 
111 | def produce_heat_map(center, map_size, stride,objects_size, sigma,magic_divide=100):
112 |     grid_y = map_size[0] // stride
113 |     grid_x = map_size[1] // stride
114 |     start = stride / 2.0 - 0.5
115 |     y_range = [i for i in range(grid_y)]
116 |     x_range = [i for i in range(grid_x)]
117 |     xx, yy = np.meshgrid(x_range, y_range)
118 |     xx = xx * stride + start
119 |     yy = yy * stride + start
120 | 
121 |     radis=gaussian_radius(objects_size)
122 |     ratio=((objects_size[0]*objects_size[1]+0.000005)/(map_size[1]*map_size[0]))*magic_divide
123 | 
124 |     #d2 = (yy - center[0]) ** 2 / 2. / sigma_y / sigma_y + (xx - center[1]) ** 2 / 2. / sigma_x / sigma_x
125 |     d2 = (yy - center[0]) ** 2 + (xx - center[1]) ** 2
126 |     exponent = d2 / 2.0 / sigma / sigma/ratio
127 |     heatmap = np.exp(-exponent)
128 | 
129 |     am = np.amax(heatmap)
130 |     if am > 0:
131 |         heatmap /= am
132 | 
133 |     return heatmap
134 | 
135 | # def produce_heatmaps_with_bbox_official(image,boxes,klass,num_klass=cfg.DATA.num_class):
136 | #     h_out, w_out, _ = image.shape
137 | #     ## stride equal to 4
138 | #     h_out //= 4
139 | #     w_out //= 4
140 | #     boxes[:, :4] //= 4
141 | #
142 | #     heatmap = np.zeros(shape=[h_out, w_out, num_klass],dtype=np.float32)
143 | #
144 | #     regression_map = np.zeros(shape=[h_out, w_out, 2],dtype=np.float32)
145 | #
146 | #     each_klass = set(klass)
147 | #     for one_klass in each_klass:
148 | #
149 | #         for single_box, single_klass in zip(boxes, klass):
150 | #             if single_klass == one_klass:
151 | #                 ####box center (y,x)
152 | #                 center = [round((single_box[1] + single_box[3]) / 2),
153 | #                           round((single_box[0] + single_box[2]) / 2)]  ###0-1
154 | #                 center = [int(x) for x in center]
155 | #
156 | #                 object_width = single_box[2] - single_box[0]
157 | #                 object_height = single_box[3] - single_box[1]
158 | #
159 | #
160 | #                 if center[0] >= h_out:
161 | #                     center[0] -= 1
162 | #                 if center[1] >= w_out:
163 | #                     center[1] -= 1
164 | #                 radius = gaussian_radius((math.ceil(object_height), math.ceil(object_width)))
165 | #                 radius = max(0, int(radius))
166 | #                 draw_msra_gaussian(heatmap[:, :, int(one_klass)],center,radius)
167 | #
168 | #                 regression_map[center[0], center[1], 0] = object_width
169 | #                 regression_map[center[0], center[1], 1] = object_height
170 | #
171 | #
172 | #     if cfg.DATA.use_int8_data:
173 | #         h_am = np.amax(heatmap)
174 | #
175 | #         heatmap = (heatmap/h_am*cfg.DATA.use_int8_enlarge).astype(np.uint8)
176 | #
177 | #         regression_map=regression_map.astype(np.uint8)
178 | #         return heatmap, regression_map
179 | #     else:
180 | #
181 | #         return heatmap.astype(np.float16), regression_map.astype(np.float16)
182 | 
183 | def produce_heatmaps_with_bbox_official(image,boxes,klass,num_klass=cfg.DATA.num_class):
184 |     return _official_centernet_datasampler(image,boxes,klass,num_klass)
185 | 
186 | def _official_centernet_datasampler(image,boxes,klass,num_classes=cfg.DATA.num_class,max_objs=cfg.DATA.max_objs):
187 | 
188 | 
189 |     num_obj=min(max_objs,len(boxes))
190 |     h_out, w_out, _ = image.shape
191 |     ## stride equal to 4
192 |     output_h=h_out / cfg.MODEL.global_stride
193 |     output_w=w_out / cfg.MODEL.global_stride
194 | 
195 |     if len(boxes)>0:
196 |         boxes[:, :4] /= cfg.MODEL.global_stride
197 | 
198 |     hm = np.zeros((num_classes, math.ceil(output_h), math.ceil(output_w)), dtype=np.float32)
199 |     wh = np.zeros((max_objs, 2), dtype=np.float32)
200 | 
201 |     reg = np.zeros((max_objs, 2), dtype=np.float32)
202 |     ind = np.zeros((max_objs), dtype=np.int64)
203 |     reg_mask = np.zeros((max_objs), dtype=np.uint8)
204 | 
205 |     for k in range(num_obj):
206 | 
207 |         bbox = boxes[k]
208 |         cls_id = klass[k]
209 | 
210 |         h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
211 |         if h > 0 and w > 0:
212 |             radius = gaussian_radius((math.ceil(h), math.ceil(w)))
213 | 
214 |             radius = max(0, int(radius))
215 |             if radius == 0:
216 |                 continue
217 | 
218 |             ct = np.array(
219 |                 [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
220 |             ct_int = ct.astype(np.int32)
221 |             draw_umich_gaussian(hm[cls_id], ct_int, radius)
222 |             wh[k] = 1. * w, 1. * h
223 |             ind[k] = ct_int[1] * output_w + ct_int[0]
224 |             reg[k] = ct - ct_int
225 |             reg_mask[k] = 1
226 | 
227 |     heatmap=np.transpose(hm,axes=[1,2,0])
228 | 
229 |     if cfg.DATA.use_int8_data:
230 | 
231 |         heatmap = (heatmap*cfg.DATA.use_int8_enlarge).astype(np.uint8)
232 | 
233 |         return heatmap, wh,reg,ind,reg_mask
234 |     else:
235 |         return heatmap, wh,reg,ind,reg_mask
236 | 
237 | 
238 | 
239 | def get_3rd_point(a, b):
240 |     direct = a - b
241 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
242 | 
243 | def get_dir(src_point, rot_rad):
244 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
245 | 
246 |     src_result = [0, 0]
247 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
248 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
249 | 
250 |     return src_result
251 | def get_affine_transform(center,
252 |                          scale,
253 |                          rot,
254 |                          output_size,
255 |                          shift=np.array([0, 0], dtype=np.float32),
256 |                          inv=0):
257 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
258 |         scale = np.array([scale, scale], dtype=np.float32)
259 | 
260 |     scale_tmp = scale
261 |     src_w = scale_tmp[0]
262 |     dst_w = output_size[0]
263 |     dst_h = output_size[1]
264 | 
265 |     rot_rad = np.pi * rot / 180
266 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
267 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
268 | 
269 |     src = np.zeros((3, 2), dtype=np.float32)
270 |     dst = np.zeros((3, 2), dtype=np.float32)
271 |     src[0, :] = center + scale_tmp * shift
272 |     src[1, :] = center + src_dir + scale_tmp * shift
273 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
274 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
275 | 
276 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
277 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
278 | 
279 |     if inv:
280 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
281 |     else:
282 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
283 | 
284 |     return trans
285 | 
286 | 
287 | 
288 | def affine_transform(pt, t):
289 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
290 |     new_pt = np.dot(t, new_pt)
291 |     return new_pt[:2]


--------------------------------------------------------------------------------
/lib/dataset/ttf_net_data_sampler.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('.')
  4 | 
  5 | import numpy as np
  6 | 
  7 | import math
  8 | import cv2
  9 | from train_config import config as cfg
 10 | 
 11 | def safe_box(bboxes,klasses):
 12 |     safe_box=[]
 13 |     safe_klass=[]
 14 |     for i in range(bboxes.shape[0]):
 15 |         cur_box=bboxes[i]
 16 |         cur_klass=klasses[i]
 17 |         x_min, y_min, x_max, y_max = cur_box[0], cur_box[1], cur_box[ 2], cur_box[ 3]
 18 | 
 19 |         if x_min<x_max  and y_min<y_max:
 20 |             safe_box.append(cur_box)
 21 |             safe_klass.append(cur_klass)
 22 | 
 23 | 
 24 |     return np.array(safe_box),np.array(safe_klass)
 25 | 
 26 | def bbox_areas(bboxes, keep_axis=False):
 27 | 
 28 |     x_min, y_min, x_max, y_max = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
 29 | 
 30 |     areas = (y_max - y_min + 1) * (x_max - x_min + 1)
 31 | 
 32 |     if keep_axis:
 33 |         return areas[:, None]
 34 |     return areas
 35 | 
 36 | 
 37 | def calc_region(bbox, ratio, featmap_size=None):
 38 |     """Calculate a proportional bbox region.
 39 | 
 40 |     The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
 41 | 
 42 |     Args:
 43 |         bbox (Tensor): Bboxes to calculate regions, shape (n, 4)
 44 |         ratio (float): Ratio of the output region.
 45 |         featmap_size (tuple): Feature map size used for clipping the boundary.
 46 | 
 47 |     Returns:
 48 |         tuple: x1, y1, x2, y2
 49 |     """
 50 |     x1 = np.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
 51 |     y1 = np.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
 52 |     x2 = np.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
 53 |     y2 = np.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
 54 |     if featmap_size is not None:
 55 |         x1 = x1.clamp(min=0, max=featmap_size[1] - 1)
 56 |         y1 = y1.clamp(min=0, max=featmap_size[0] - 1)
 57 |         x2 = x2.clamp(min=0, max=featmap_size[1] - 1)
 58 |         y2 = y2.clamp(min=0, max=featmap_size[0] - 1)
 59 |     return (x1, y1, x2, y2)
 60 | 
 61 | 
 62 | def torch_style_topK(matrix, K, axis=0):
 63 |     """
 64 |     perform topK based on np.argsort
 65 |     :param matrix: to be sorted
 66 |     :param K: select and sort the top K items
 67 |     :param axis: dimension to be sorted.
 68 |     :return:
 69 |     """
 70 |     full_sort = np.argsort(-matrix, axis=axis)
 71 | 
 72 |     ind=full_sort.take(np.arange(K), axis=axis)
 73 |     return matrix[ind],ind
 74 | 
 75 | 
 76 | class CenternetDatasampler:
 77 |     def __init__(self,
 78 |                  alpha=cfg.DATA.alpha,
 79 |                  beta=cfg.DATA.beta,
 80 |                  wh_agnostic=True,
 81 |                  wh_gaussian=True,
 82 |                  wh_area_process='log',
 83 |                  down_ratio=cfg.MODEL.global_stride):
 84 | 
 85 |         assert wh_area_process in [None, 'norm', 'log', 'sqrt']
 86 | 
 87 |         self.alpha=alpha
 88 |         self.beta=beta
 89 |         self.wh_area_process=wh_area_process
 90 | 
 91 |         self.down_ratio=down_ratio
 92 |         self.wh_agnostic = wh_agnostic,
 93 |         self.wh_gaussian = wh_gaussian,
 94 |         self.wh_planes=4
 95 | 
 96 | 
 97 |     def _ttfnet_gaussian_2d(self, shape, sigma_x=1, sigma_y=1):
 98 |         m, n = [(ss - 1.) / 2. for ss in shape]
 99 |         y, x = np.ogrid[-m:m + 1, -n:n + 1]
100 | 
101 |         h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y * sigma_y)))
102 |         try:
103 |             h[h < np.finfo(h.dtype).eps * h.max()] = 0
104 | 
105 |         except:
106 |             print(h.shape)
107 |         return h
108 | 
109 |     def draw_truncate_gaussian(self,heatmap, center, h_radius, w_radius, k=1):
110 |         h, w = 2 * h_radius + 1, 2 * w_radius + 1
111 |         sigma_x = w / 6
112 |         sigma_y = h / 6
113 |         gaussian = self._ttfnet_gaussian_2d((h, w), sigma_x=sigma_x, sigma_y=sigma_y)
114 | 
115 |         x, y = int(center[0]), int(center[1])
116 | 
117 |         height, width = heatmap.shape[0:2]
118 | 
119 |         left, right = min(x, w_radius), min(width - x, w_radius + 1)
120 |         top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
121 | 
122 |         masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
123 |         masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius - left:w_radius + right]
124 |         if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:  # TODO debug
125 |             np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
126 |         return heatmap
127 |     def ttfnet_centernet_datasampler(self,image, gt_boxes, gt_labels, num_classes=cfg.DATA.num_class, max_objs=cfg.DATA.max_objs):
128 | 
129 |         """
130 | 
131 |         Args:
132 |             gt_boxes: tensor, tensor <=> img, (num_gt, 4).
133 |             gt_labels: tensor, tensor <=> img, (num_gt,).
134 |             feat_shape: tuple.
135 | 
136 |         Returns:
137 |             heatmap: tensor, tensor <=> img, (80, h, w).
138 |             box_target: tensor, tensor <=> img, (4, h, w) or (80 * 4, h, w).
139 |             reg_weight: tensor, same as box_target
140 |         """
141 |         gt_boxes,gt_labels=safe_box(gt_boxes, gt_labels)
142 | 
143 | 
144 |         img_h,img_w,_c=image.shape
145 | 
146 |         output_h, output_w = img_h//self.down_ratio,img_w//self.down_ratio
147 | 
148 | 
149 |         heatmap_channel = num_classes
150 | 
151 |         heatmap = np.zeros((heatmap_channel, output_h, output_w),dtype=np.float32)
152 |         fake_heatmap =np.zeros((output_h, output_w),dtype=np.float32)
153 |         box_target = np.ones((self.wh_planes, output_h, output_w),dtype=np.float32) * -1
154 |         reg_weight = np.zeros((self.wh_planes // 4, output_h, output_w),dtype=np.float32)
155 | 
156 | 
157 |         if gt_boxes.shape[0]>0:
158 | 
159 |             if self.wh_area_process == 'log':
160 |                 boxes_areas_log = np.log(bbox_areas(gt_boxes))
161 |             elif self.wh_area_process == 'sqrt':
162 |                 boxes_areas_log = np.sqrt(bbox_areas(gt_boxes))
163 |             else:
164 |                 boxes_areas_log = bbox_areas(gt_boxes)
165 | 
166 |             boxes_area_topk_log, boxes_ind = torch_style_topK(boxes_areas_log, boxes_areas_log.shape[0])
167 | 
168 |             if self.wh_area_process == 'norm':
169 |                 boxes_area_topk_log[:] = 1.
170 | 
171 |             gt_boxes = gt_boxes[boxes_ind]
172 |             gt_labels = gt_labels[boxes_ind]
173 | 
174 |             feat_gt_boxes = gt_boxes / self.down_ratio
175 |             feat_gt_boxes[:, [0, 2]] = np.clip(feat_gt_boxes[:, [0, 2]], a_min=0,
176 |                                                    a_max=output_w - 1)
177 |             feat_gt_boxes[:, [1, 3]] = np.clip(feat_gt_boxes[:, [1, 3]], a_min=0,
178 |                                                    a_max=output_h - 1)
179 |             feat_hs, feat_ws = (feat_gt_boxes[:, 3] - feat_gt_boxes[:, 1],
180 |                                 feat_gt_boxes[:, 2] - feat_gt_boxes[:, 0])
181 | 
182 |             # we calc the center and ignore area based on the gt-boxes of the origin scale
183 |             # no peak will fall between pixels
184 |             ct_ints = (np.stack([(gt_boxes[:, 0] + gt_boxes[:, 2]) / 2,
185 |                                     (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2],
186 |                                    axis=1) / self.down_ratio).astype(np.int)
187 | 
188 | 
189 |             h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype(np.int)
190 |             w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype(np.int)
191 | 
192 |             if self.wh_gaussian and self.alpha != self.beta:
193 |                 h_radiuses_beta = (feat_hs / 2. * self.beta).astype(np.int)
194 |                 w_radiuses_beta = (feat_ws / 2. * self.beta).astype(np.int)
195 | 
196 |             if not self.wh_gaussian:
197 |                 # calculate positive (center) regions
198 |                 r1 = (1 - self.beta) / 2
199 |                 ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s = calc_region(gt_boxes.transpose(0, 1), r1)
200 |                 ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s = [np.round(x.float() / self.down_ratio).int()
201 |                                                       for x in [ctr_x1s, ctr_y1s, ctr_x2s, ctr_y2s]]
202 |                 ctr_x1s, ctr_x2s = [np.clamp(x, max=output_w - 1) for x in [ctr_x1s, ctr_x2s]]
203 |                 ctr_y1s, ctr_y2s = [np.clamp(y, max=output_h - 1) for y in [ctr_y1s, ctr_y2s]]
204 |         else:
205 |             boxes_ind=np.array([])
206 |         # larger boxes have lower priority than small boxes.
207 |         for k in range(boxes_ind.shape[0]):
208 |             cls_id = gt_labels[k]
209 | 
210 |             fake_heatmap = fake_heatmap*0
211 | 
212 |             self.draw_truncate_gaussian(fake_heatmap, ct_ints[k],
213 |                                         h_radiuses_alpha[k], w_radiuses_alpha[k])
214 | 
215 |             heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
216 | 
217 | 
218 |             if self.wh_gaussian:
219 |                 if self.alpha != self.beta:
220 |                     fake_heatmap = fake_heatmap*0
221 |                     self.draw_truncate_gaussian(fake_heatmap,
222 |                                                 ct_ints[k],
223 |                                                 h_radiuses_beta[k],
224 |                                                 w_radiuses_beta[k])
225 |                 box_target_inds = fake_heatmap > 0
226 |             else:
227 |                 ctr_x1, ctr_y1, ctr_x2, ctr_y2 = ctr_x1s[k], ctr_y1s[k], ctr_x2s[k], ctr_y2s[k]
228 |                 box_target_inds = np.zeros_like(fake_heatmap, dtype=np.uint8)
229 |                 box_target_inds[ctr_y1:ctr_y2 + 1, ctr_x1:ctr_x2 + 1] = 1
230 | 
231 |             if self.wh_agnostic:
232 | 
233 |                 box_target[:, box_target_inds] =np.expand_dims(gt_boxes[k],-1)
234 | 
235 |                 cls_id = 0
236 |             else:
237 |                 box_target[(cls_id * 4):((cls_id + 1) * 4), box_target_inds] = np.expand_dims(gt_boxes[k],-1)
238 | 
239 |             if self.wh_gaussian:
240 |                 local_heatmap = fake_heatmap[box_target_inds]
241 | 
242 | 
243 | 
244 |                 ct_div = local_heatmap.sum()
245 |                 local_heatmap *= boxes_area_topk_log[k]
246 |                 reg_weight[cls_id, box_target_inds] = local_heatmap / ct_div
247 |             else:
248 |                 reg_weight[cls_id, box_target_inds] = \
249 |                     boxes_area_topk_log[k] / box_target_inds.sum()
250 | 
251 | 
252 |         heatmap = np.transpose(heatmap, axes=[1, 2, 0])
253 |         box_target= np.transpose(box_target, axes=[1, 2, 0])
254 |         reg_weight = np.transpose(reg_weight, axes=[1, 2, 0])
255 | 
256 | 
257 | 
258 | 
259 | 
260 |         if cfg.DATA.use_int8_data:
261 | 
262 |             heatmap = (heatmap * cfg.DATA.use_int8_enlarge).astype(np.uint8)
263 |             return heatmap, box_target, reg_weight
264 |         else:
265 |             return heatmap, box_target, reg_weight
266 | 
267 | 
268 | 
269 | 
270 | 
271 | if __name__=='__main__':
272 | 
273 | 
274 |     from train_config import config as cfg
275 | 
276 | 
277 |     data_sampler=CenternetDatasampler()
278 | 
279 |     for i in range(1000):
280 |         image = cv2.imread('./lib/dataset/augmentor/test.jpg')
281 |         boxes = np.array([[165, 60, 233, 138],[5, 60, 133, 138]], dtype=np.float)
282 | 
283 |         cls=np.array([0,0])
284 | 
285 |         heatmap, box_target, reg_weight=data_sampler.ttfnet_centernet_datasampler(image,boxes,cls)
286 | 
287 |         hm=heatmap[:,:,0]
288 |         wh = box_target[:, :, 1]+1
289 | 
290 |         weight=reg_weight[:, :, 0]
291 | 
292 |         print(np.max(wh))
293 |         print(np.max(weight))
294 |         cv2.namedWindow('image', 0)
295 |         cv2.imshow('image', image)
296 | 
297 |         cv2.namedWindow('hm',0)
298 |         cv2.imshow('hm',hm)
299 | 
300 |         cv2.namedWindow('weight', 0)
301 |         cv2.imshow('weight', weight)
302 | 
303 |         cv2.namedWindow('wh', 0)
304 |         cv2.imshow('wh', wh)
305 |         cv2.waitKey(0)


--------------------------------------------------------------------------------