├── .gitignore
├── README.md
├── object_detection
    ├── config
    │   ├── __init__.py
    │   ├── config_factory.py
    │   ├── faster_rcnn_config.py
    │   └── fpn_config.py
    ├── dataset
    │   ├── README.md
    │   ├── __init__.py
    │   ├── coco_tf_dataset_generator.py
    │   ├── dataset_factory.py
    │   ├── eval_pascal_tf_dataset.py
    │   ├── pascal_tf_dataset_generator.py
    │   ├── pascal_tf_dataset_local_file.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── label_map_utils.py
    │   │   ├── tf_dataset_utils.py
    │   │   └── tf_record_utils.py
    ├── evaluation
    │   ├── detectron_pascal_evaluation_utils.py
    │   ├── pascal_eval_files_utils.py
    │   └── pascal_voc_map_utils.py
    ├── model
    │   ├── __init__.py
    │   ├── anchor_target.py
    │   ├── faster_rcnn
    │   │   ├── __init__.py
    │   │   ├── base_faster_rcnn_model.py
    │   │   ├── resnet_faster_rcnn.py
    │   │   └── vgg16_faster_rcnn.py
    │   ├── fpn
    │   │   ├── __init__.py
    │   │   ├── base_fpn_model.py
    │   │   └── resnet_fpn.py
    │   ├── losses.py
    │   ├── model_factory.py
    │   ├── prediction.py
    │   ├── proposal_target.py
    │   ├── region_proposal.py
    │   └── roi_pooling.py
    ├── protos
    │   ├── __init__.py
    │   └── string_int_label_map.proto
    └── utils
    │   ├── __init__.py
    │   ├── anchor_generator.py
    │   ├── bbox_np.py
    │   ├── bbox_tf.py
    │   ├── bbox_transform.py
    │   ├── pytorch_to_tf.py
    │   └── visual_utils.py
└── scripts
    ├── eval_coco.py
    ├── eval_pascal.py
    ├── generate_pascal_tf_records.py
    ├── label_map_src
        └── pascal_label_map.pbtxt
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *pb2.py
3 | .ipynb_checkpoints/
4 | logs*
5 | ./pycocotools


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TF EAGER OBJECT DETECTION
  2 | 
  3 | ## 0. Targets
  4 | + TensorFlow Eager Mode.
  5 | + Object detection models.
  6 | 
  7 | ## 1. Architecture
  8 | + `scripts`:
  9 |     + `generate_pascal_tf_records.py`: generate tfrecords files from pascal source files.
 10 |     + `train.py`: train coco or pascal.
 11 |     + `eval_pascal.py`: eval pascal dataset.
 12 |     + `label_map_src`: copy from TensorFlow Object Detection API.
 13 | + `object_detection/dataset`:
 14 |     + `utils`:
 15 |         + `label_map_utils.py`: copy from TensorFlow Object Detection API.
 16 |         + `tf_record_utils.py`: utils to generate tfrecords files.    
 17 |         + `tf_dataset_utils.py`: utils to generate `tf.data.Dataset` objects.
 18 |     + `pascal_tf_dataset_generator.py`: get training pascal `tf.data.Dataset` object from tfrecords files.
 19 |     + `pascal_tf_dataset_local_file.py`: get training pascal `tf.data.Dataset` by local files.
 20 |     + `coco_tf_dataset_generator.py`: get training coco `tf.data.Dataset` object.
 21 |     + `eval_pascal_tf_dataset.py`: get eval pascal `tf.data.Dataset` object.
 22 | + `object_detection/evaluation`:
 23 |     + `detectron_pascal_evaluation_utils.py`: copy from `Detectron`, eval pascal with local detection results.
 24 |     + `pascal_eval_files_utils.py`: generate local detection result files.
 25 |     + `pascal_voc_map_utils.py`: get pascal map results.
 26 | + `object_detection/model`:
 27 |     + `faster_rcnn`:
 28 |         + `base_faster_rcnn_model.py`: base class for faster rcnn.
 29 |         + `vgg16_faster_rcnn.py`: vgg16 faster rcnn model.
 30 |         + `resnet_faster_rcnn.py`: resnet faster rcnn model.
 31 |     + `fpn`:
 32 |         + `base_fpn_model.py`: base class for fpn.
 33 |         + `resnet_fpn.py`: resnet fpn model.
 34 |     + `model_factory`: factory for model creation.
 35 |     + `anchor_target.py`: generate anchor target for rpn training.
 36 |     + `losses.py`: smooth l1 loss & cross entropy loss.
 37 |     + `prediction.py`: generate predictions after roi head.
 38 |     + `proposal_target.py`: generate proposal target for roi training.
 39 |     + `region_proposal.py`: generate region proposals for both training & testing procedure.
 40 |     + `roi_pooling.py`: roi pooling results.
 41 | + `object_detection/protos`: protobuf source files.
 42 |     + `protoc ./object_detection/protos/*.proto --python_out=./object_detection/protos/ `
 43 | + `object_detection/utils`:
 44 |     + `anchor_generator.py`: generate anchors.
 45 |     + `bbox_np.py`: cal iou, bbox range filter and bbox clip filter by np.
 46 |     + `bbox_tf.py`: cal iou, bbox range filter and bbox clip filter by tf.
 47 |     + `bbox_transform.py`: convert between bbox(xmin, ymin, xmax, ymax) and pred(tx, ty, tw, th)
 48 |     + `visual_utils.py`: draw bboxes in an image.
 49 |     + `pytorch_to_tf.py`: convert pytorch model to pickle map.
 50 | 
 51 | 
 52 | ---
 53 | 
 54 | 
 55 | ## 2. TODO
 56 | 
 57 | ### 2.1. dataset
 58 | + [x] pascal training dataset.
 59 | + [x] pascal evaluating dataset.
 60 | + [x] coco training dataset.
 61 | + [x] coco evaluating dataset.
 62 | 
 63 | ### 2.2. model
 64 | + [x] faster rcnn
 65 | + [x] fpn
 66 | + [ ] mask rcnn
 67 | 
 68 | ### 2.3. training & evaluating
 69 | + [ ] use `defun` in all components.
 70 | + [ ] multi gpu support.
 71 | 
 72 | ### 2.4. others
 73 | + [x] BUG: after a few epochs, gpu memory will boomed twice... #issue 27288
 74 | + [ ] jupyter samples.
 75 | + [ ] add global step in restore variables.
 76 | 
 77 | ---
 78 | 
 79 | ## 3. training records
 80 | 
 81 | ### 3.1. VOC Pascal 2007 trainval & test
 82 | | Models | mAP |
 83 | |:------:|:-----:|
 84 | |vgg16 tf-faster-rcnn(source)|0.708|
 85 | |vgg16 tf-faster-rcnn(load pre-trained model)|0.7106|
 86 | |**vgg16 faster rcnn typical configs**|0.6935/0.6869/0.6751|
 87 | |**resnet50 faster rcnn typical configs**|0.7294/0.7304|
 88 | |resnet101 faster rcnn tf-faster-rcnn(source)|0.757|
 89 | |resnet101 faster rcnn tf-faster-rcnn(load pre-trained model)|0.7578|
 90 | |**resnet101 faster rcnn typical configs**|0.7456/0.7303/0.7247/0.7261|
 91 | |resnet50 fpn FPN_Tensorflow(source)|0.7426|
 92 | |resnet50 fpn FPN_Tensorflow(load pre-trained model)|0.7430|
 93 | |**resnet50 fpn typical configs**|0.7465/0.7377/0.7392|
 94 | |resnet101 fpn FPN_Tensorflow(source)|0.7614|
 95 | |**resnet101 fpn typical configs**|0.7604/0.7618/0.7599|
 96 | 
 97 | ### 3.2. COCO 2014 minival
 98 | | Models | mAP |
 99 | |:------:|:-----:|
100 | |vgg16 tf-faster-rcnn(source)|0.302|
101 | |vgg16 tf-faster-rcnn(load pre-trained model)|0.302|
102 | |resnet50 tf-faster-rcnn(source)|0.324|
103 | |resnet50 tf-faster-rcnn(load pre-trained model)|0.324|
104 | 
105 | 
106 | ---
107 | 
108 | ## 4. 可有可无的教程……
109 | + training on pascal voc 2007 trainval set, evaluating on pascal voc 2007 test set.
110 | + Step 0: generate python protos by `protoc ./object_detection/protos/*.proto --python_out=./object_detection/protos/ `.
111 | + Step 1: generate trainval datasets, set configs and use `python scripts/generate_pascal_tf_records.py`.
112 | + Step 2: training by `python scripts/train.py`, get logs at `/path/to/logs_dir/`.
113 | + Step 3: evaluating by `python scripts/eval_pascal.py /path/to/logs_dir/ckpt`.


--------------------------------------------------------------------------------
/object_detection/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/config/__init__.py


--------------------------------------------------------------------------------
/object_detection/config/config_factory.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def config_factory(data_type, model_type):
 3 |     if model_type == 'faster_rcnn':
 4 |         if data_type == 'pascal':
 5 |             from object_detection.config.faster_rcnn_config import PASCAL_CONFIG
 6 |             return PASCAL_CONFIG
 7 |         elif data_type == 'coco':
 8 |             from object_detection.config.faster_rcnn_config import COCO_CONFIG
 9 |             return COCO_CONFIG
10 |     elif model_type == 'fpn':
11 |         if data_type == 'pascal':
12 |             from object_detection.config.fpn_config import PASCAL_CONFIG
13 |             return PASCAL_CONFIG
14 | 
15 |     raise ValueError('config for dataset type {} and model type {} doesn\'t exist'.format(data_type, model_type))


--------------------------------------------------------------------------------
/object_detection/config/faster_rcnn_config.py:
--------------------------------------------------------------------------------
  1 | def get_default_pascal_faster_rcnn_config():
  2 |     return {
  3 |         # vgg16
  4 |         'vgg16_roi_feature_size': (7, 7, 512),
  5 |         'roi_head_keep_dropout_rate': 0.5,
  6 |         'vgg16_roi_pooling_max_pooling_flag': True,
  7 | 
  8 |         # resnet
  9 |         'resnet_roi_feature_size': (7, 7, 1024),
 10 |         'resnet_roi_pooling_max_pooling_flag': False,
 11 | 
 12 |         # base configs
 13 |         'num_classes': 21,
 14 |         'weight_decay': 0.0001,
 15 | 
 16 |         # anchors configs
 17 |         'ratios': [0.5, 1.0, 2.0],
 18 |         'scales': [8, 16, 32],
 19 |         'extractor_stride': 16,
 20 | 
 21 |         # training configs
 22 |         'learning_rate_multi_decay_steps': [80000],  # 50000 for pascal 2007, 80000 for pascal 0712
 23 |         'learning_rate_multi_lrs': [1e-3, 1e-4],
 24 |         'learning_rate_bias_double': True,
 25 |         'optimizer_momentum': 0.9,
 26 |         'epochs': 8,  # 14 for pascal 2007, 8 for pascal 0712
 27 | 
 28 |         # preprocessing configs
 29 |         'image_max_size': 1000,
 30 |         'image_min_size': 600,
 31 |         'bgr_pixel_means': [103.939, 116.779, 123.68],
 32 |         # 'bgr_pixel_means': [102.9801, 115.9465, 122.7717],  # for tf-faster-rcnn
 33 | 
 34 |         # predict & evaluate configs
 35 |         'evaluate_iou_threshold': 0.5,  # 计算map时使用，pred与gt的iou大于该阈值，则当前pred为TP，否则为FP
 36 |         'max_objects_per_class_per_image': 50,
 37 |         'max_objects_per_image': 50,
 38 |         'prediction_nms_iou_threshold': 0.3,
 39 |         'prediction_score_threshold': 0.0,
 40 |         'show_image_score_threshold': 0.3,  # 用于图像展示
 41 | 
 42 |         # anchor target & region proposal
 43 |         'rpn_proposal_means': [0, 0, 0, 0],
 44 |         'rpn_proposal_stds': [1.0, 1.0, 1.0, 1.0],
 45 | 
 46 |         # anchor target
 47 |         'rpn_sigma': 3.0,
 48 |         'rpn_pos_iou_threshold': 0.7,
 49 |         'rpn_neg_iou_threshold': 0.3,
 50 |         'rpn_total_sample_number': 256,
 51 |         'rpn_pos_sample_max_number': 128,
 52 | 
 53 |         # region proposal
 54 |         'rpn_proposal_train_pre_nms_sample_number': 12000,
 55 |         'rpn_proposal_train_after_nms_sample_number': 2000,
 56 |         'rpn_proposal_test_pre_nms_sample_number': 6000,
 57 |         'rpn_proposal_test_after_nms_sample_number': 300,
 58 |         'rpn_proposal_nms_iou_threshold': 0.7,
 59 | 
 60 |         # proposal target & prediction
 61 |         'roi_proposal_means': [0, 0, 0, 0],
 62 |         'roi_proposal_stds': [0.1, 0.1, 0.2, 0.2],
 63 | 
 64 |         # roi pooling
 65 |         'roi_pooling_size': 7,
 66 | 
 67 |         # proposal target
 68 |         'roi_sigma': 1.0,
 69 |         'roi_pos_iou_threshold': 0.5,
 70 |         'roi_neg_iou_threshold': 0.,
 71 |         'roi_total_sample_number': 128,
 72 |         'roi_pos_sample_max_number': 32,
 73 | 
 74 |     }
 75 | 
 76 | 
 77 | def get_default_coco_faster_rcnn_config():
 78 |     return {
 79 |         # vgg16
 80 |         'vgg16_roi_feature_size': (7, 7, 512),
 81 |         'roi_head_keep_dropout_rate': 0.5,
 82 |         'vgg16_roi_pooling_max_pooling_flag': True,
 83 | 
 84 |         # resnet
 85 |         'resnet_roi_feature_size': (7, 7, 1024),
 86 |         'resnet_roi_pooling_max_pooling_flag': False,
 87 | 
 88 |         # base configs
 89 |         'num_classes': 81,
 90 |         'weight_decay': 0.0001,
 91 | 
 92 |         # anchors configs
 93 |         'ratios': [0.5, 1.0, 2.0],
 94 |         'scales': [4, 8, 16, 32],
 95 |         'extractor_stride': 16,
 96 | 
 97 |         # training configs
 98 |         'learning_rate_multi_decay_steps': [350000],
 99 |         'learning_rate_multi_lrs': [1e-3, 1e-4],
100 |         'learning_rate_bias_double': True,
101 |         'optimizer_momentum': 0.9,
102 |         'epochs': 6,
103 | 
104 |         # preprocessing configs
105 |         'image_max_size': 1000,
106 |         'image_min_size': 600,
107 |         # 'bgr_pixel_means': [103.939, 116.779, 123.68],
108 |         'bgr_pixel_means': [102.9801, 115.9465, 122.7717],  # for tf-faster-rcnn
109 | 
110 |         # predict & evaluate configs
111 |         'evaluate_iou_threshold': 0.5,  # 计算map时使用，pred与gt的iou大于该阈值，则当前pred为TP，否则为FP
112 |         'max_objects_per_class_per_image': 100,
113 |         'max_objects_per_image': 100,
114 |         'prediction_nms_iou_threshold': 0.3,
115 |         'prediction_score_threshold': 0.0,
116 |         'show_image_score_threshold': 0.3,  # 用于图像展示
117 | 
118 |         # anchor target & region proposal
119 |         'rpn_proposal_means': [0, 0, 0, 0],
120 |         'rpn_proposal_stds': [1.0, 1.0, 1.0, 1.0],
121 | 
122 |         # anchor target
123 |         'rpn_sigma': 3.0,
124 |         'rpn_pos_iou_threshold': 0.7,
125 |         'rpn_neg_iou_threshold': 0.3,
126 |         'rpn_total_sample_number': 256,
127 |         'rpn_pos_sample_max_number': 128,
128 | 
129 |         # region proposal
130 |         'rpn_proposal_train_pre_nms_sample_number': 12000,
131 |         'rpn_proposal_train_after_nms_sample_number': 2000,
132 |         'rpn_proposal_test_pre_nms_sample_number': 6000,
133 |         'rpn_proposal_test_after_nms_sample_number': 300,
134 |         'rpn_proposal_nms_iou_threshold': 0.7,
135 | 
136 |         # proposal target & prediction
137 |         'roi_proposal_means': [0, 0, 0, 0],
138 |         'roi_proposal_stds': [0.1, 0.1, 0.2, 0.2],
139 | 
140 |         # roi pooling
141 |         'roi_pooling_size': 7,
142 | 
143 |         # proposal target
144 |         'roi_sigma': 1.0,
145 |         'roi_pos_iou_threshold': 0.5,
146 |         'roi_neg_iou_threshold': 0.,
147 |         'roi_total_sample_number': 128,
148 |         'roi_pos_sample_max_number': 32,
149 |     }
150 | 
151 | 
152 | PASCAL_CONFIG = get_default_pascal_faster_rcnn_config()
153 | COCO_CONFIG = get_default_coco_faster_rcnn_config()
154 | 


--------------------------------------------------------------------------------
/object_detection/config/fpn_config.py:
--------------------------------------------------------------------------------
 1 | def get_default_pascal_faster_rcnn_config():
 2 |     return {
 3 |         # 不同backbone参数
 4 |         'resnet_roi_feature_size': [7, 7, 256],
 5 |         'roi_head_keep_dropout_rate': 0.5,
 6 | 
 7 |         # base configs
 8 |         'num_classes': 21,
 9 | 
10 |         # fpn 特有参数
11 |         'level_name_list': ['p2', 'p3', 'p4', 'p5', 'p6'],
12 |         'min_level': 2,
13 |         'max_level': 5,
14 |         'top_down_dims': 256,
15 | 
16 |         # preprocessing configs
17 |         'image_max_size': 1000,
18 |         'image_min_size': 600,
19 |         'bgr_pixel_means': [103.939, 116.779, 123.68],
20 | 
21 |         # predict & evaluate configs
22 |         'evaluate_iou_threshold': 0.5,  # 计算map时使用，pred与gt的iou大于该阈值，则当前pred为TP，否则为FP
23 |         'max_objects_per_class_per_image': 50,
24 |         'max_objects_per_image': 50,
25 |         'prediction_nms_iou_threshold': 0.3,
26 |         'prediction_score_threshold': 0.0,
27 |         'show_image_score_threshold': 0.3,  # 用于图像展示
28 | 
29 |         # anchors configs
30 |         'ratios': [0.5, 1.0, 2.0],
31 |         'scales': [1.],
32 |         'anchor_stride_list': [4, 8, 16, 32, 64],
33 |         'base_anchor_size_list': [32, 64, 128, 256, 512],
34 | 
35 |         # training configs
36 |         'learning_rate_multi_decay_steps': [60000, 80000],
37 |         'learning_rate_multi_lrs': [1e-3, 1e-4, 1e-5],
38 |         'optimizer_momentum': 0.9,
39 |         'learning_rate_bias_double': False,
40 |         'weight_decay': 0.0001,
41 |         'epochs': 30,
42 | 
43 |         # rpn net configs
44 |         'rpn_proposal_means': [0, 0, 0, 0],
45 |         'rpn_proposal_stds': [1.0, 1.0, 1.0, 1.0],
46 |         'rpn_sigma': 3.0,
47 |         'rpn_pos_iou_threshold': 0.7,
48 |         'rpn_neg_iou_threshold': 0.3,
49 |         'rpn_total_sample_number': 256,
50 |         'rpn_pos_sample_max_number': 128,
51 |         'rpn_proposal_train_pre_nms_sample_number': 12000,
52 |         'rpn_proposal_train_after_nms_sample_number': 2000,
53 |         'rpn_proposal_test_pre_nms_sample_number': 6000,
54 |         'rpn_proposal_test_after_nms_sample_number': 1000,
55 |         'rpn_proposal_nms_iou_threshold': 0.7,
56 | 
57 |         'roi_pooling_size': 7,
58 |         'roi_pooling_max_pooling_flag': True,
59 | 
60 |         # roi net configs
61 |         'roi_proposal_means': [0, 0, 0, 0],
62 |         'roi_proposal_stds': [0.1, 0.1, 0.2, 0.2],
63 |         'roi_sigma': 1.0,
64 |         'roi_pos_iou_threshold': 0.5,
65 |         'roi_neg_iou_threshold': 0.,
66 |         'roi_total_sample_number': 256,
67 |         'roi_pos_sample_max_number': 64,
68 | 
69 |     }
70 | 
71 | 
72 | PASCAL_CONFIG = get_default_pascal_faster_rcnn_config()
73 | 


--------------------------------------------------------------------------------
/object_detection/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Module
 2 | + target: generate `tf.data.Dataset` object for object detection tasks.
 3 | + classification:
 4 |     + training dataset
 5 |     + eval dataset
 6 |     + utils
 7 | 
 8 | ---
 9 | 
10 | ## 1. training dataset
11 | 
12 | ### 1.1. iter features
13 | + every iter for each training set generate 3 features: preprocessed image, bboxes and labels.
14 | + preprocessed image: 
15 |     + dtype: `tf.float32`
16 |     + shape: `[1, None, None, 3]`
17 |     + PS: `bgr` format.
18 | + bboxes: 
19 |     + dytpe: `tf.float32`
20 |     + shape: `[1, None, 4]`
21 |     + format: `ymin, xmin, ymax, xmax`
22 |     + range: `[0, image_height - 1]` or `[0, image_width - 1]`
23 | + labels:
24 |     + dtype: `tf.int32` or `tf.int64`
25 |     + shape: `[1, None,]`
26 | 
27 | ### 1.2. data flow
28 | + input: rgb uint8 raw image.
29 | + data argument:
30 |     + random flip left and right.
31 | + resize image with min_edge and max_edge.
32 | + preprocessing(one of the following methods):
33 |     + method 1(caffe): convert 'rgb' to 'bgr', and then subject imagenet means.
34 |     + method 2(tf): convert `[0, 255]` to `[-1, 1]`
35 | 
36 | ---
37 | 
38 | ## 2. eval dataset
39 | 
40 | ### 2.1. iter features
41 | + every iter for each training set generate 3 features: preprocessed image, image scale , image raw height and image raw width.
42 | + preprocessed image: 
43 |     + dtype: `tf.float32`
44 |     + shape: `[1, None, None, 3]`
45 |     + PS: `bgr` or `rgb`
46 | + image scale: 
47 |     + dytpe: `tf.float64`
48 |     + shape: `[1,]`
49 | + image height:
50 |     + dtype: `tf.int32` or `tf.int64`
51 |     + shape: `[1,]`
52 | + image width:
53 |     + dtype: `tf.int32` or `tf.int64`
54 |     + shape: `[1,]`
55 | + image_id:
56 |     + dtype: `tf.float32`
57 |     + shape: `[1,]`
58 |     + PS: for COCO dataset only. used in coco eval tools.
59 | 
60 | ### 2.2. data flow
61 | + input: rgb uint8 raw image.
62 | + resize image with min_edge and max_edge.
63 | + preprocessing(one of the following methods):
64 |     + method 1(caffe): convert 'rgb' to 'bgr', and then subject imagenet means.
65 |     + method 2(tf): convert `[0, 255]` to `[-1, 1]`
66 | + convert `rgb` to `bgr` if necessary.
67 | 


--------------------------------------------------------------------------------
/object_detection/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/dataset/__init__.py


--------------------------------------------------------------------------------
/object_detection/dataset/coco_tf_dataset_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from functools import partial
  5 | from pycocotools.coco import COCO
  6 | 
  7 | from object_detection.dataset.utils.tf_dataset_utils import image_argument_with_imgaug, preprocessing_training_func, \
  8 |     preprocessing_eval_func
  9 | 
 10 | _COCO_TRAIN_DATASET = None
 11 | _COCO_VAL_DATASET = None
 12 | _COCO_TEST_DATASET = None
 13 | 
 14 | 
 15 | def _get_global_dataset(mode, year, root_dir):
 16 |     global _COCO_TRAIN_DATASET, _COCO_VAL_DATASET, _COCO_TEST_DATASET
 17 |     if mode not in ['train', 'val', 'test', 'minival']:
 18 |         raise ValueError('unknown mode {}'.format(mode))
 19 |     if mode == 'train':
 20 |         if _COCO_TRAIN_DATASET is None:
 21 |             _COCO_TRAIN_DATASET = CocoDataset(root_dir=root_dir, sub_dir=mode, year=year)
 22 |         coco_dataset = _COCO_TRAIN_DATASET
 23 |     elif mode == 'val':
 24 |         if _COCO_VAL_DATASET is None:
 25 |             _COCO_VAL_DATASET = CocoDataset(root_dir=root_dir, sub_dir=mode, year=year)
 26 |         coco_dataset = _COCO_VAL_DATASET
 27 |     else:
 28 |         if _COCO_TEST_DATASET is None:
 29 |             _COCO_TEST_DATASET = CocoDataset(root_dir=root_dir, sub_dir=mode, year=year)
 30 |         coco_dataset = _COCO_TEST_DATASET
 31 |     return coco_dataset
 32 | 
 33 | 
 34 | class CocoDataset:
 35 |     def __init__(self, root_dir='/ssd/zhangyiyang/COCO2017', sub_dir='train', year="2017",
 36 |                  min_edge=32, ):
 37 |         if sub_dir not in ['train', 'val', 'minival']:
 38 |             raise ValueError('unknown sub dir {}'.format(sub_dir))
 39 |         if year not in ['2014', '2017']:
 40 |             raise ValueError('unknown year dir {}'.format(year))
 41 | 
 42 |         annotation_file_path = os.path.join(root_dir, 'annotations', 'instances_{}{}.json'.format(sub_dir, year))
 43 |         if sub_dir == 'minival':
 44 |             sub_dir = 'val'
 45 |         self._image_dir = os.path.join(root_dir, sub_dir + year)
 46 | 
 47 |         self._coco = COCO(annotation_file=annotation_file_path)
 48 |         self._get_cat_id_name_dict()
 49 |         self._img_ids, self._img_info_dict = self._filter_images(min_edge=min_edge)
 50 | 
 51 |     @property
 52 |     def img_ids(self):
 53 |         return self._img_ids
 54 | 
 55 |     @property
 56 |     def img_info_dict(self):
 57 |         return self._img_info_dict
 58 | 
 59 |     @property
 60 |     def cat_id_to_name_dict(self):
 61 |         return self._cat_id_to_name_dict
 62 | 
 63 |     @property
 64 |     def name_to_cat_id_dict(self):
 65 |         return self._name_to_cat_id_dict
 66 | 
 67 |     @property
 68 |     def cat_id_to_raw_id(self):
 69 |         return self._cat_id_to_raw_id
 70 | 
 71 |     @property
 72 |     def raw_id_to_cat_id(self):
 73 |         return self._raw_id_to_cat_id
 74 | 
 75 |     def _get_cat_id_name_dict(self):
 76 |         cat_ids = self._coco.getCatIds()
 77 |         cat_id_to_name = {0: 'background'}
 78 |         name_to_cat_id = {'background': 0}
 79 |         cat_id_to_raw_id = {}
 80 |         raw_id_to_cat_id = {}
 81 |         for idx, cat_id in enumerate(cat_ids):
 82 |             cat_name = self._coco.loadCats(cat_id)[0]['name']
 83 |             cat_id_to_name[cat_id] = cat_name
 84 |             name_to_cat_id[cat_name] = cat_id
 85 |             cat_id_to_raw_id[cat_id] = idx + 1
 86 |             raw_id_to_cat_id[idx + 1] = cat_id
 87 |         self._cat_id_to_name_dict = cat_id_to_name
 88 |         self._name_to_cat_id_dict = name_to_cat_id
 89 |         self._cat_id_to_raw_id = cat_id_to_raw_id
 90 |         self._raw_id_to_cat_id = raw_id_to_cat_id
 91 | 
 92 |     def _filter_images(self, min_edge):
 93 |         all_img_ids = list(set([_['image_id'] for _ in self._coco.anns.values()]))
 94 |         img_ids = []
 95 |         img_info_dict = {}
 96 |         for i in all_img_ids:
 97 |             info = self._coco.loadImgs(i)[0]
 98 | 
 99 |             ann_ids = self._coco.getAnnIds(imgIds=i)
100 |             ann_info = self._coco.loadAnns(ann_ids)
101 |             _, labels, _ = self._parse_ann_info(ann_info)
102 | 
103 |             if min(info['width'], info['height']) >= min_edge and labels.shape[0] != 0:
104 |                 img_ids.append(i)
105 |                 img_info_dict[i] = info
106 |         return img_ids, img_info_dict
107 | 
108 |     def _parse_ann_info(self, ann_infos):
109 |         """Parse bbox annotation.
110 | 
111 |         Args
112 |         ---
113 |             ann_info (list[dict]): Annotation info of an image.
114 | 
115 |         Returns
116 |         ---
117 |             dict: A dict containing the following keys: bboxes,
118 |                 bboxes_ignore, labels.
119 |         """
120 |         gt_bboxes = []
121 |         gt_labels = []
122 |         gt_labels_text = []
123 | 
124 |         for i, ann in enumerate(ann_infos):
125 |             if ann.get('ignore', False):
126 |                 continue
127 |             x1, y1, w, h = ann['bbox']
128 |             if ann['area'] <= 0 or w < 1 or h < 1:
129 |                 continue
130 |             bbox = [y1, x1, y1 + h - 1., x1 + w - 1.]
131 |             gt_bboxes.append(bbox)
132 |             gt_labels.append(self._cat_id_to_raw_id[ann['category_id']])
133 |             gt_labels_text.append(self._cat_id_to_name_dict[ann['category_id']])
134 | 
135 |         if gt_bboxes:
136 |             gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
137 |             gt_labels = np.array(gt_labels, dtype=np.int64)
138 |             gt_labels_text = np.array(gt_labels_text, dtype=np.string_)
139 |         else:
140 |             gt_bboxes = np.zeros((0, 4), dtype=np.float32)
141 |             gt_labels = np.array([], dtype=np.int64)
142 |             gt_labels_text = np.array([], dtype=np.string_)
143 | 
144 |         return gt_bboxes, gt_labels, gt_labels_text
145 | 
146 |     def __getitem__(self, img_id):
147 |         # 获取 annotation dict 信息
148 |         ann_ids = self._coco.getAnnIds(imgIds=img_id)
149 |         ann_infos = self._coco.loadAnns(ann_ids)
150 |         gt_bboxes, gt_labels, _ = self._parse_ann_info(ann_infos)
151 | 
152 |         # 设置 bboxes 范围为 [0, 1]
153 |         image_height, image_width = self._img_info_dict[img_id]['height'], self._img_info_dict[img_id]['width']
154 |         gt_bboxes[:, ::2] = gt_bboxes[:, ::2] / image_height
155 |         gt_bboxes[:, 1::2] = gt_bboxes[:, 1::2] / image_width
156 | 
157 |         file_path = os.path.join(self._image_dir, self._img_info_dict[img_id]['file_name'])
158 |         return file_path, gt_bboxes, image_height, image_width, gt_labels
159 | 
160 | 
161 | def get_training_dataset(root_dir='D:\\data\\COCO2017',
162 |                          mode='train', year="2017",
163 |                          min_size=600, max_size=1000,
164 |                          preprocessing_type='caffe', caffe_pixel_means=None,
165 |                          batch_size=1,
166 |                          repeat=1,
167 |                          shuffle=False, shuffle_buffer_size=1000,
168 |                          prefetch=False, prefetch_buffer_size=1000,
169 |                          argument=True, iaa_sequence=None):
170 |     coco_dataset = _get_global_dataset(mode, year, root_dir)
171 | 
172 |     def _parse_coco_data_py(img_id):
173 |         file_path, gt_bboxes, image_height, image_width, gt_labels = coco_dataset[img_id]
174 |         return file_path, gt_bboxes, image_height, image_width, gt_labels
175 | 
176 |     tf_dataset = tf.data.Dataset.from_tensor_slices(coco_dataset.img_ids).map(
177 |         lambda img_id: tuple([*tf.py_func(_parse_coco_data_py, [img_id],
178 |                                           [tf.string, tf.float32, tf.int64, tf.int64, tf.int64])])
179 |     )
180 |     tf_dataset = tf_dataset.map(
181 |         lambda file_path, gt_bboxes, image_height, image_width, gt_labels: tuple([
182 |             tf.image.decode_jpeg(tf.io.read_file(file_path), channels=3),
183 |             gt_bboxes, image_height, image_width, gt_labels
184 |         ])
185 |     )
186 | 
187 |     if argument:
188 |         image_argument_partial = partial(image_argument_with_imgaug, iaa_sequence=iaa_sequence)
189 |         tf_dataset = tf_dataset.map(
190 |             lambda image, bboxes, image_height, image_width, labels: tuple([
191 |                 *tf.py_func(image_argument_partial, [image, bboxes], [image.dtype, bboxes.dtype]),
192 |                 image_height, image_width, labels]),
193 |             num_parallel_calls=5
194 |         )
195 | 
196 |     preprocessing_partial_func = partial(preprocessing_training_func,
197 |                                          min_size=min_size, max_size=max_size,
198 |                                          preprocessing_type=preprocessing_type, caffe_pixel_means=caffe_pixel_means)
199 | 
200 |     tf_dataset = tf_dataset.batch(batch_size=batch_size).map(preprocessing_partial_func, num_parallel_calls=5)
201 | 
202 |     if shuffle:
203 |         tf_dataset = tf_dataset.shuffle(buffer_size=shuffle_buffer_size)
204 |     if prefetch:
205 |         tf_dataset = tf_dataset.prefetch(buffer_size=prefetch_buffer_size)
206 | 
207 |     return tf_dataset.repeat(repeat)
208 | 
209 | 
210 | def get_eval_dataset(root_dir='D:\\data\\COCO2017',
211 |                      mode='train', year='2017',
212 |                      min_size=600, max_size=1000,
213 |                      preprocessing_type='caffe', caffe_pixel_means=None,
214 |                      batch_size=1,
215 |                      repeat=1, ):
216 |     coco_dataset = _get_global_dataset(mode, year, root_dir)
217 | 
218 |     preprocessing_partial_func = partial(preprocessing_eval_func,
219 |                                          min_size=min_size, max_size=max_size,
220 |                                          preprocessing_type=preprocessing_type, caffe_pixel_means=caffe_pixel_means)
221 | 
222 |     def _parse_coco_data_py(img_id):
223 |         file_path, _, img_height, img_width, _ = coco_dataset[img_id]
224 |         img = tf.image.decode_jpeg(tf.io.read_file(file_path), channels=3)
225 |         return img, img_height, img_width, img_id
226 | 
227 |     def _preprocessing_after_batch(img, img_height, img_width, img_id):
228 |         img, img_scale, img_height, img_width = preprocessing_partial_func(img, img_height, img_width)
229 |         return img, img_scale, img_height, img_width, img_id[0]
230 | 
231 |     tf_dataset = tf.data.Dataset.from_tensor_slices(coco_dataset.img_ids).map(
232 |         lambda img_id: tuple([*tf.py_func(_parse_coco_data_py, [img_id],
233 |                                           [tf.uint8, tf.int64, tf.int64, tf.int32])])
234 |     ).batch(batch_size).map(_preprocessing_after_batch)
235 | 
236 |     return tf_dataset.repeat(repeat)
237 | 


--------------------------------------------------------------------------------
/object_detection/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from object_detection.dataset.coco_tf_dataset_generator import get_training_dataset as get_coco_train_dataset
 2 | from object_detection.dataset.coco_tf_dataset_generator import get_eval_dataset as get_coco_eval_dataset
 3 | from object_detection.dataset.pascal_tf_dataset_generator import get_dataset as get_pascal_train_dataset
 4 | from object_detection.dataset.eval_pascal_tf_dataset import get_dataset_by_local_file as get_pascal_eval_dataset
 5 | 
 6 | 
 7 | def dataset_factory(dataset_type, mode, configs):
 8 |     if dataset_type == 'pascal':
 9 |         if mode == 'train':
10 |             return get_pascal_train_dataset(**configs)
11 |         elif mode == 'test':
12 |             return get_pascal_eval_dataset('test', **configs)
13 |         raise ValueError('unknown mode {} for dataset type {}'.format(mode, dataset_type))
14 | 
15 |     if dataset_type == 'coco':
16 |         if mode == 'train':
17 |             return get_coco_train_dataset(**configs)
18 |         elif mode == 'val':
19 |             return get_coco_eval_dataset(**configs)
20 |         raise ValueError('unknown mode {} for dataset type {}'.format(mode, dataset_type))
21 | 
22 |     raise ValueError('unknown dataset type {}'.format(dataset_type))
23 | 


--------------------------------------------------------------------------------
/object_detection/dataset/eval_pascal_tf_dataset.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cv2
  4 | import os
  5 | from functools import partial
  6 | 
  7 | 
  8 | __all__ = ['get_dataset_by_tf_records', 'get_dataset_by_local_file']
  9 | 
 10 | 
 11 | def get_dataset_by_local_file(mode, root_path, image_format='bgr',
 12 |                               preprocessing_type='caffe', caffe_pixel_means=None,
 13 |                               min_edge=600, max_edge=1000):
 14 |     """
 15 |     根据 /path/to/VOC2007 or VOC2012/ImageSets/Main/{}.txt 读取图片列表，读取图片
 16 |     :param mode:
 17 |     :param root_path:
 18 |     :param image_format:
 19 |     :param caffe_pixel_means: 
 20 |     :param preprocessing_type:
 21 |     :param min_edge: 
 22 |     :param max_edge: 
 23 |     :return: 
 24 |     """
 25 |     if image_format not in ['rgb', 'bgr']:
 26 |         raise ValueError('unknown image format {}'.format(image_format))
 27 |     with open(os.path.join(root_path, 'ImageSets', 'Main', '%s.txt' % mode), 'r') as f:
 28 |         lines = f.readlines()
 29 |     examples_list = [line.strip() for line in lines]
 30 |     img_dir = os.path.join(root_path, 'JPEGImages')
 31 | 
 32 |     def _map_from_cv2(example):
 33 |         example = example.decode()
 34 |         img_file_path = os.path.join(img_dir, example + '.jpg')
 35 |         img = cv2.imread(img_file_path).astype(np.float32)
 36 |         if preprocessing_type == 'caffe':
 37 |             img -= np.array([[caffe_pixel_means]])
 38 |         elif preprocessing_type == 'tf':
 39 |             img = img / 255.0 * 2.0 - 1.0
 40 |         else:
 41 |             raise ValueError('unknown preprocessing type {}'.format(preprocessing_type))
 42 |         h, w, _ = img.shape
 43 |         scale1 = min_edge / min(h, w)
 44 |         scale2 = max_edge / max(h, w)
 45 |         scale = min(scale1, scale2)
 46 |         new_h = int(scale * h)
 47 |         new_w = int(scale * w)
 48 | 
 49 |         img = cv2.resize(img, (new_w, new_h))
 50 |         if image_format == 'rgb':
 51 |             img = img[..., ::-1]
 52 |         return img, float(scale), h, w
 53 | 
 54 |     dataset = tf.data.Dataset.from_tensor_slices(examples_list).map(
 55 |         lambda example: tf.py_func(_map_from_cv2,
 56 |                                    [example],
 57 |                                    [tf.float32, tf.float64, tf.int64, tf.int64]  # linux
 58 |                                    # [tf.float32, tf.float64, tf.int32, tf.int32]  # windows
 59 |                                    )
 60 |     ).batch(1)
 61 | 
 62 |     return dataset, examples_list
 63 | 
 64 | 
 65 | def _caffe_preprocessing(image, pixel_means):
 66 |     """
 67 |     输入 uint8 RGB 的图像，转换为 tf.float32 BGR 格式，并减去 imagenet 平均数
 68 |     :param image:
 69 |     :return:
 70 |     """
 71 |     image = tf.to_float(image)
 72 |     image = tf.reverse(image, axis=[-1])
 73 |     channels = tf.split(axis=-1, num_or_size_splits=3, value=image)
 74 |     for i in range(3):
 75 |         channels[i] -= pixel_means[i]
 76 |     return tf.concat(axis=-1, values=channels)
 77 | 
 78 | 
 79 | def _tf_preprocessing(image):
 80 |     """
 81 |     输入 uint8 RGB 的图像，转换为 tf.float32 RGB 格式，取值范围[-1, 1]
 82 |     :param image:
 83 |     :return:
 84 |     """
 85 |     return tf.image.convert_image_dtype(image, dtype=tf.float32) * 2.0 - 1.0
 86 | 
 87 | 
 88 | def get_dataset_by_tf_records(mode, root_path,
 89 |                               preprocessing_type='caffe', caffe_pixel_means=None,
 90 |                               min_edge=600, max_edge=1000):
 91 |     with open(os.path.join(root_path, 'ImageSets', 'Main', '%s.txt' % mode), 'r') as f:
 92 |         lines = f.readlines()
 93 |     examples_list = [line.strip() for line in lines]
 94 |     img_dir = os.path.join(root_path, 'JPEGImages')
 95 |     example_path_list = [os.path.join(img_dir, example+'.jpg') for example in examples_list]
 96 | 
 97 |     def _map_from_tf_image(example_path):
 98 |         img = tf.image.decode_jpeg(tf.io.read_file(example_path), channels=3)
 99 |         if preprocessing_type == 'caffe':
100 |             preprocessing_fn = partial(_caffe_preprocessing, pixel_means=caffe_pixel_means)
101 |         elif preprocessing_type == 'tf':
102 |             preprocessing_fn = _tf_preprocessing
103 |         else:
104 |             raise ValueError('unknown preprocessing type {}'.format(preprocessing_type))
105 |         img = preprocessing_fn(img)
106 | 
107 |         # TODO: could not get image shape
108 |         h, w, _ = img.get_shape().as_list()
109 |         scale1 = min_edge / min(h, w)
110 |         scale2 = max_edge / max(h, w)
111 |         scale = min(scale1, scale2)
112 |         img = tf.image.resize_bilinear(img, [tf.to_int32(scale*h), tf.to_int32(scale*w)])
113 |         return img, float(scale), h, w
114 | 
115 |     dataset = tf.data.Dataset.from_tensor_slices(example_path_list).map(_map_from_tf_image).batch(1)
116 | 
117 |     return dataset, examples_list
118 | 


--------------------------------------------------------------------------------
/object_detection/dataset/pascal_tf_dataset_generator.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from functools import partial
  3 | 
  4 | from object_detection.dataset.utils.tf_dataset_utils import image_argument_with_imgaug, preprocessing_training_func
  5 | 
  6 | __all__ = ['get_dataset']
  7 | 
  8 | 
  9 | def _parse_tf_records(serialized_example):
 10 |     features = tf.parse_single_example(serialized_example,
 11 |                                        features={'image/height': tf.FixedLenFeature([1], tf.int64),
 12 |                                                  'image/width': tf.FixedLenFeature([1], tf.int64),
 13 |                                                  'image/filename': tf.FixedLenFeature([1], tf.string),
 14 |                                                  'image/encoded': tf.FixedLenFeature([1], tf.string),
 15 |                                                  'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
 16 |                                                  'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
 17 |                                                  'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
 18 |                                                  'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
 19 |                                                  'image/object/class/label': tf.VarLenFeature(tf.int64),
 20 |                                                  'image/object/class/text': tf.VarLenFeature(tf.string),
 21 |                                                  }
 22 |                                        )
 23 |     features['image/object/bbox/xmin'] = tf.sparse_tensor_to_dense(features['image/object/bbox/xmin'])
 24 |     features['image/object/bbox/xmax'] = tf.sparse_tensor_to_dense(features['image/object/bbox/xmax'])
 25 |     features['image/object/bbox/ymin'] = tf.sparse_tensor_to_dense(features['image/object/bbox/ymin'])
 26 |     features['image/object/bbox/ymax'] = tf.sparse_tensor_to_dense(features['image/object/bbox/ymax'])
 27 |     features['image/object/class/label'] = tf.sparse_tensor_to_dense(features['image/object/class/label'])
 28 |     image = tf.image.decode_jpeg(features['image/encoded'][0])
 29 |     bboxes = tf.transpose(tf.stack((features['image/object/bbox/ymin'],
 30 |                                     features['image/object/bbox/xmin'],
 31 |                                     features['image/object/bbox/ymax'],
 32 |                                     features['image/object/bbox/xmax'])), name='bboxes')
 33 |     return image, bboxes, features['image/height'][0], features['image/width'][0], features['image/object/class/label']
 34 | 
 35 | 
 36 | def get_dataset(tf_records_list,
 37 |                 min_size=600, max_size=1000,
 38 |                 preprocessing_type='caffe', caffe_pixel_means=None,
 39 |                 batch_size=1, repeat=1,
 40 |                 shuffle=False, shuffle_buffer_size=1000,
 41 |                 prefetch=False, prefetch_buffer_size=1000,
 42 |                 argument=True, iaa_sequence=None):
 43 |     """
 44 |     获取数据集，操作过程如下：
 45 | 
 46 |     1) 从 tfrecords 文件中读取基本数据；
 47 |     2) 如果需要数据增强，则通过输入的 iaa_sequence 进行；
 48 |     3) 数据归一化，将 uint8 转换为 float，可能是转换到[0, 1]之间，也可能是减去像素平均数
 49 |     4) shuffle 操作；
 50 |     5) prefetch 操作；
 51 |     6) batch 操作。
 52 |     7) repeat 操作
 53 | 
 54 |     其中，默认数据增强包括：
 55 |     ```
 56 |     iaa_sequence = [
 57 |             iaa.Fliplr(0.5),
 58 |         ]
 59 |     ```
 60 |     1) 随机水平；
 61 | 
 62 |     当通过 itr 进行操作时，该 dataset 返回的数据包括：
 63 |     image, bboxes, labels
 64 |     数据类型分别是：tf.float32([0, 1]), tf.float32([0, 边长]), tf.int32([0, num_classes])
 65 |     shape为：[1, height, width, 3], [1, num_bboxes, 4], [num_bboxes]
 66 | 
 67 |     :param tf_records_list:
 68 |     :param min_size:
 69 |     :param max_size:
 70 |     :param preprocessing_type:
 71 |     :param caffe_pixel_means:
 72 |     :param repeat:
 73 |     :param batch_size:
 74 |     :param shuffle:
 75 |     :param shuffle_buffer_size:
 76 |     :param prefetch:
 77 |     :param prefetch_buffer_size:
 78 |     :param argument:
 79 |     :param iaa_sequence:
 80 |     :return:
 81 |     """
 82 | 
 83 |     dataset = tf.data.TFRecordDataset(tf_records_list).map(_parse_tf_records)
 84 | 
 85 |     if argument:
 86 |         image_argument_partial = partial(image_argument_with_imgaug, iaa_sequence=iaa_sequence)
 87 |         dataset = dataset.map(
 88 |             lambda image, bboxes, image_height, image_width, labels: tuple([
 89 |                 *tf.py_func(image_argument_partial, [image, bboxes], [image.dtype, bboxes.dtype]),
 90 |                 image_height, image_width, labels])
 91 |         )
 92 | 
 93 |     preprocessing_partial_func = partial(preprocessing_training_func,
 94 |                                          min_size=min_size, max_size=max_size,
 95 |                                          preprocessing_type=preprocessing_type,
 96 |                                          caffe_pixel_means=caffe_pixel_means)
 97 | 
 98 |     dataset = dataset.batch(batch_size=batch_size).map(preprocessing_partial_func)
 99 | 
100 |     if shuffle:
101 |         dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
102 |     if prefetch:
103 |         dataset = dataset.prefetch(buffer_size=prefetch_buffer_size)
104 | 
105 |     return dataset.repeat(repeat)
106 | 


--------------------------------------------------------------------------------
/object_detection/dataset/pascal_tf_dataset_local_file.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import cv2
 4 | import os
 5 | import object_detection.dataset.utils.label_map_utils as label_map_utils
 6 | import object_detection.dataset.utils.tf_record_utils as dataset_utils
 7 | from lxml import etree
 8 | 
 9 | 
10 | def _read_image(file_path):
11 |     img = cv2.imread(file_path).astype(np.float32)
12 |     img -= np.array([[[102.9801, 115.9465, 122.7717]]])
13 |     h, w, _ = img.shape
14 |     min_edge = 600
15 |     max_edge = 1000
16 |     scale1 = min_edge / min(h, w)
17 |     scale2 = max_edge / max(h, w)
18 |     scale = min(scale1, scale2)
19 |     img = cv2.resize(img, None, None, fx=scale, fy=scale,
20 |                      interpolation=cv2.INTER_LINEAR)
21 |     return img, scale
22 | 
23 | 
24 | def get_dataset(mode, root_path, label_map_file_path):
25 |     label_map_dict = label_map_utils.get_label_map_dict(label_map_file_path)
26 |     with open(os.path.join(root_path, 'ImageSets', 'Main', 'aeroplane_%s.txt' % mode), 'r') as f:
27 |         lines = f.readlines()
28 |     examples_list = [line.strip().split(' ')[0] for line in lines]
29 |     annotations_dir = os.path.join(root_path, 'Annotations')
30 |     img_dir = os.path.join(root_path, 'JPEGImages')
31 | 
32 |     def _map_from_xml_and_cv2(example):
33 |         example = example.decode()
34 |         with open(os.path.join(annotations_dir, str(example) + '.xml'), 'r') as f:
35 |             xml_str = f.read()
36 |         xml_dict = dataset_utils.recursive_parse_xml_to_dict(etree.fromstring(xml_str))['annotation']
37 |         img_file_path = os.path.join(img_dir, xml_dict['filename'])
38 |         img, img_scale = _read_image(img_file_path)
39 |         xmin = []
40 |         ymin = []
41 |         xmax = []
42 |         ymax = []
43 |         classes = []
44 |         if 'object' in xml_dict:
45 |             for obj in xml_dict['object']:
46 |                 xmin.append((float(obj['bndbox']['xmin']) - 1) * img_scale)
47 |                 ymin.append((float(obj['bndbox']['ymin']) - 1) * img_scale)
48 |                 xmax.append((float(obj['bndbox']['xmax']) - 1) * img_scale)
49 |                 ymax.append((float(obj['bndbox']['ymax']) - 1) * img_scale)
50 |                 classes.append(label_map_dict[obj['name']])
51 | 
52 |         return img, np.stack([ymin, xmin, ymax, xmax], axis=0).transpose().astype(np.float32), np.array(classes).astype(
53 |             np.int32)
54 | 
55 |     dataset = tf.data.Dataset.from_tensor_slices(examples_list).map(
56 |         lambda example: tf.py_func(_map_from_xml_and_cv2,
57 |                                    [example],
58 |                                    [tf.float32, tf.float32, tf.int32])
59 |     ).batch(1)
60 | 
61 |     return dataset
62 | 


--------------------------------------------------------------------------------
/object_detection/dataset/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/dataset/utils/__init__.py


--------------------------------------------------------------------------------
/object_detection/dataset/utils/label_map_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import tensorflow as tf
  4 | from google.protobuf import text_format
  5 | from object_detection.protos import string_int_label_map_pb2
  6 | 
  7 | 
  8 | def _validate_label_map(label_map):
  9 |     """Checks if a label map is valid.
 10 | 
 11 |       Args:
 12 |         label_map: StringIntLabelMap to validate.
 13 | 
 14 |       Raises:
 15 |         ValueError: if label map is invalid.
 16 |     """
 17 |     for item in label_map.item:
 18 |         if item.id < 0:
 19 |             raise ValueError('Label map ids should be >= 0.')
 20 |         if (item.id == 0 and item.name != 'background' and
 21 |                 item.display_name != 'background'):
 22 |             raise ValueError('Label map id 0 is reserved for the background label')
 23 | 
 24 | 
 25 | def create_category_index(categories):
 26 |     """Creates dictionary of COCO compatible categories keyed by category id.
 27 | 
 28 |   Args:
 29 |     categories: a list of dicts, each of which has the following keys:
 30 |       'id': (required) an integer id uniquely identifying this category.
 31 |       'name': (required) string representing category name
 32 |         e.g., 'cat', 'dog', 'pizza'.
 33 | 
 34 |   Returns:
 35 |     category_index: a dict containing the same entries as categories, but keyed
 36 |       by the 'id' field of each category.
 37 |   """
 38 |     category_index = {}
 39 |     for cat in categories:
 40 |         category_index[cat['id']] = cat
 41 |     return category_index
 42 | 
 43 | 
 44 | def get_max_label_map_index(label_map):
 45 |     """Get maximum index in label map.
 46 | 
 47 |   Args:
 48 |     label_map: a StringIntLabelMapProto
 49 | 
 50 |   Returns:
 51 |     an integer
 52 |   """
 53 |     return max([item.id for item in label_map.item])
 54 | 
 55 | 
 56 | def convert_label_map_to_categories(label_map,
 57 |                                     max_num_classes,
 58 |                                     use_display_name=True):
 59 |     """Given label map proto returns categories list compatible with eval.
 60 | 
 61 |   This function converts label map proto and returns a list of dicts, each of
 62 |   which  has the following keys:
 63 |     'id': (required) an integer id uniquely identifying this category.
 64 |     'name': (required) string representing category name
 65 |       e.g., 'cat', 'dog', 'pizza'.
 66 |   We only allow class into the list if its id-label_id_offset is
 67 |   between 0 (inclusive) and max_num_classes (exclusive).
 68 |   If there are several items mapping to the same id in the label map,
 69 |   we will only keep the first one in the categories list.
 70 | 
 71 |   Args:
 72 |     label_map: a StringIntLabelMapProto or None.  If None, a default categories
 73 |       list is created with max_num_classes categories.
 74 |     max_num_classes: maximum number of (consecutive) label indices to include.
 75 |     use_display_name: (boolean) choose whether to load 'display_name' field as
 76 |       category name.  If False or if the display_name field does not exist, uses
 77 |       'name' field as category names instead.
 78 | 
 79 |   Returns:
 80 |     categories: a list of dictionaries representing all possible categories.
 81 |   """
 82 |     categories = []
 83 |     list_of_ids_already_added = []
 84 |     if not label_map:
 85 |         label_id_offset = 1
 86 |         for class_id in range(max_num_classes):
 87 |             categories.append({
 88 |                 'id': class_id + label_id_offset,
 89 |                 'name': 'category_{}'.format(class_id + label_id_offset)
 90 |             })
 91 |         return categories
 92 |     for item in label_map.item:
 93 |         if not 0 < item.id <= max_num_classes:
 94 |             logging.info(
 95 |                 'Ignore item %d since it falls outside of requested '
 96 |                 'label range.', item.id)
 97 |             continue
 98 |         if use_display_name and item.HasField('display_name'):
 99 |             name = item.display_name
100 |         else:
101 |             name = item.name
102 |         if item.id not in list_of_ids_already_added:
103 |             list_of_ids_already_added.append(item.id)
104 |             categories.append({'id': item.id, 'name': name})
105 |     return categories
106 | 
107 | 
108 | def load_labelmap(path):
109 |     """Loads label map proto.
110 | 
111 |   Args:
112 |     path: path to StringIntLabelMap proto text file.
113 |   Returns:
114 |     a StringIntLabelMapProto
115 |   """
116 |     with tf.gfile.GFile(path, 'r') as fid:
117 |         label_map_string = fid.read()
118 |         label_map = string_int_label_map_pb2.StringIntLabelMap()
119 |         try:
120 |             text_format.Merge(label_map_string, label_map)
121 |         except text_format.ParseError:
122 |             label_map.ParseFromString(label_map_string)
123 |     _validate_label_map(label_map)
124 |     return label_map
125 | 
126 | 
127 | def get_label_map_dict(label_map_path,
128 |                        use_display_name=False,
129 |                        fill_in_gaps_and_background=False):
130 |     """Reads a label map and returns a dictionary of label names to id.
131 | 
132 |   Args:
133 |     label_map_path: path to StringIntLabelMap proto text file.
134 |     use_display_name: whether to use the label map items' display names as keys.
135 |     fill_in_gaps_and_background: whether to fill in gaps and background with
136 |     respect to the id field in the proto. The id: 0 is reserved for the
137 |     'background' class and will be added if it is missing. All other missing
138 |     ids in range(1, max(id)) will be added with a dummy class name
139 |     ("class_<id>") if they are missing.
140 | 
141 |   Returns:
142 |     A dictionary mapping label names to id.
143 | 
144 |   Raises:
145 |     ValueError: if fill_in_gaps_and_background and label_map has non-integer or
146 |     negative values.
147 |   """
148 |     label_map = load_labelmap(label_map_path)
149 |     label_map_dict = {}
150 |     for item in label_map.item:
151 |         if use_display_name:
152 |             label_map_dict[item.display_name] = item.id
153 |         else:
154 |             label_map_dict[item.name] = item.id
155 | 
156 |     if fill_in_gaps_and_background:
157 |         values = set(label_map_dict.values())
158 | 
159 |         if 0 not in values:
160 |             label_map_dict['background'] = 0
161 |         if not all(isinstance(value, int) for value in values):
162 |             raise ValueError('The values in label map must be integers in order to'
163 |                              'fill_in_gaps_and_background.')
164 |         if not all(value >= 0 for value in values):
165 |             raise ValueError('The values in the label map must be positive.')
166 | 
167 |         if len(values) != max(values) + 1:
168 |             # there are gaps in the labels, fill in gaps.
169 |             for value in range(1, max(values)):
170 |                 if value not in values:
171 |                     label_map_dict['class_' + str(value)] = value
172 | 
173 |     return label_map_dict
174 | 
175 | 
176 | def create_categories_from_labelmap(label_map_path, use_display_name=True):
177 |     """Reads a label map and returns categories list compatible with eval.
178 | 
179 |   This function converts label map proto and returns a list of dicts, each of
180 |   which  has the following keys:
181 |     'id': an integer id uniquely identifying this category.
182 |     'name': string representing category name e.g., 'cat', 'dog'.
183 | 
184 |   Args:
185 |     label_map_path: Path to `StringIntLabelMap` proto text file.
186 |     use_display_name: (boolean) choose whether to load 'display_name' field
187 |       as category name.  If False or if the display_name field does not exist,
188 |       uses 'name' field as category names instead.
189 | 
190 |   Returns:
191 |     categories: a list of dictionaries representing all possible categories.
192 |   """
193 |     label_map = load_labelmap(label_map_path)
194 |     max_num_classes = max(item.id for item in label_map.item)
195 |     return convert_label_map_to_categories(label_map, max_num_classes,
196 |                                            use_display_name)
197 | 
198 | 
199 | def create_category_index_from_labelmap(label_map_path, use_display_name=True):
200 |     """Reads a label map and returns a category index.
201 | 
202 |       Args:
203 |         label_map_path: Path to `StringIntLabelMap` proto text file.
204 |         use_display_name: (boolean) choose whether to load 'display_name' field
205 |           as category name.  If False or if the display_name field does not exist,
206 |           uses 'name' field as category names instead.
207 | 
208 |       Returns:
209 |         A category index, which is a dictionary that maps integer ids to dicts
210 |         containing categories, e.g.
211 |         {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
212 |     """
213 |     categories = create_categories_from_labelmap(label_map_path, use_display_name)
214 |     return create_category_index(categories)
215 | 
216 | 
217 | def create_class_agnostic_category_index():
218 |     """Creates a category index with a single `object` class."""
219 |     return {1: {'id': 1, 'name': 'object'}}
220 | 


--------------------------------------------------------------------------------
/object_detection/dataset/utils/tf_dataset_utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import imgaug as ia
  3 | from imgaug import augmenters as iaa
  4 | import numpy as np
  5 | from functools import partial
  6 | 
  7 | __all__ = ['image_argument_with_imgaug', 'preprocessing_training_func', 'preprocessing_eval_func']
  8 | 
  9 | 
 10 | def _get_default_iaa_sequence():
 11 |     return [
 12 |         iaa.Fliplr(0.5),
 13 |     ]
 14 | 
 15 | 
 16 | def image_argument_with_imgaug(image, bboxes, iaa_sequence=None):
 17 |     """
 18 |     增强一张图片
 19 |     输入图像是 tf.uint8 类型，数据范围 [0, 255]
 20 |     输入bboxes是 tf.float32 类型，数据范围 [0, 1]
 21 |     返回结果与输入相同
 22 |     :param image:   一张图片，类型为ndarray，shape为[None, None, 3]
 23 |     :param bboxes:  一组bounding box，shape 为 [bbox_number, 4]，顺序为 ymin, xmin, ymax, xmax
 24 |                         float类型，取值范围[0, 1]
 25 |     :param iaa_sequence:
 26 |     :return:        图像增强结果，包括image和bbox，其格式与输入相同
 27 |     """
 28 |     bboxes_list = []
 29 |     height, width, channels = image.shape
 30 |     for bbox in bboxes:
 31 |         ymin, xmin, ymax, xmax = int(bbox[0] * height), int(bbox[1] * width), int(bbox[2] * height), int(
 32 |             bbox[3] * width)
 33 |         bboxes_list.append(ia.BoundingBox(x1=xmin, y1=ymin, x2=xmax, y2=ymax))
 34 |     bboxes_ia = ia.BoundingBoxesOnImage(bboxes_list, shape=image.shape)
 35 | 
 36 |     if iaa_sequence is None:
 37 |         iaa_sequence = _get_default_iaa_sequence()
 38 |     seq = iaa.Sequential(iaa_sequence)
 39 | 
 40 |     seq_det = seq.to_deterministic()
 41 | 
 42 |     image_aug = seq_det.augment_images([image])[0]
 43 |     bbs_aug = seq_det.augment_bounding_boxes([bboxes_ia])[0]
 44 | 
 45 |     bboxes_aug_list = []
 46 |     height, width, channels = image_aug.shape
 47 |     for iaa_bbox in bbs_aug.bounding_boxes:
 48 |         bboxes_aug_list.append([iaa_bbox.y1 / height, iaa_bbox.x1 / width, iaa_bbox.y2 / height, iaa_bbox.x2 / width])
 49 |     bboxes_aug_np = np.array(bboxes_aug_list)
 50 |     bboxes_aug_np[bboxes_aug_np < 0] = 0
 51 |     bboxes_aug_np[bboxes_aug_np > 1] = 1
 52 |     return image_aug, bboxes_aug_np.astype(np.float32)
 53 | 
 54 | 
 55 | def _caffe_preprocessing(image, pixel_means):
 56 |     """
 57 |     输入 uint8 RGB 的图像，转换为 tf.float32 BGR 格式，并减去 imagenet 平均数
 58 |     :param image:
 59 |     :return:
 60 |     """
 61 | 
 62 |     # 使用下面方法会碰到奇怪的问题：构建第二个 dataset 时报错
 63 |     # AttributeError: 'Tensor' object has no attribute '_datatype_enum'
 64 |     # return tf.keras.applications.vgg16.preprocess_input(image)
 65 | 
 66 |     image = tf.to_float(image)
 67 |     image = tf.reverse(image, axis=[-1])
 68 |     channels = tf.split(axis=-1, num_or_size_splits=3, value=image)
 69 |     for i in range(3):
 70 |         channels[i] -= pixel_means[i]
 71 |     return tf.concat(axis=-1, values=channels)
 72 | 
 73 | 
 74 | def _tf_preprocessing(image):
 75 |     """
 76 |     输入 uint8 RGB 的图像，转换为 tf.float32 RGB 格式，取值范围[-1, 1]
 77 |     :param image:
 78 |     :return:
 79 |     """
 80 |     return tf.image.convert_image_dtype(image, dtype=tf.float32) * 2.0 - 1.0
 81 | 
 82 | 
 83 | def preprocessing_training_func(image, bboxes, height, width, labels,
 84 |                                 min_size, max_size, preprocessing_type, caffe_pixel_means=None):
 85 |     """
 86 |     输入 rgb 图片，进行以下预处理
 87 |     1) 短边最短为 min_size，长边最长为 max_size，矛盾时，优先满足长边
 88 |     2) 通过 preprocessing_type 选择 preprocessing 函数
 89 |     :param image:
 90 |     :param bboxes:
 91 |     :param width:
 92 |     :param height:
 93 |     :param labels:
 94 |     :param max_size:
 95 |     :param min_size:
 96 |     :param preprocessing_type:
 97 |     :param caffe_pixel_means:
 98 |     :return:
 99 |     """
100 | 
101 |     if preprocessing_type == 'caffe':
102 |         preprocessing_fn = partial(_caffe_preprocessing, pixel_means=caffe_pixel_means)
103 |     elif preprocessing_type == 'tf':
104 |         preprocessing_fn = _tf_preprocessing
105 |     else:
106 |         raise ValueError('unknown preprocessing type {}'.format(preprocessing_type))
107 |     image = preprocessing_fn(image)
108 | 
109 |     height = tf.to_float(height[0])
110 |     width = tf.to_float(width[0])
111 |     scale1 = min_size / tf.minimum(height, width)
112 |     scale2 = max_size / tf.maximum(height, width)
113 |     scale = tf.minimum(scale1, scale2)
114 |     n_height = tf.to_int32(scale * height)
115 |     n_width = tf.to_int32(scale * width)
116 | 
117 |     image = tf.image.resize_bilinear(image, (n_height, n_width))
118 | 
119 |     channels = tf.split(axis=-1, num_or_size_splits=4, value=bboxes)
120 |     channels[0] = channels[0] * tf.to_float(n_height - 1)
121 |     channels[1] = channels[1] * tf.to_float(n_width - 1)
122 |     channels[2] = channels[2] * tf.to_float(n_height - 1)
123 |     channels[3] = channels[3] * tf.to_float(n_width - 1)
124 |     bboxes = tf.concat(channels, axis=-1)
125 | 
126 |     return image, bboxes, labels
127 | 
128 | 
129 | def preprocessing_eval_func(image, height, width,
130 |                             min_size, max_size, preprocessing_type, caffe_pixel_means=None):
131 |     """
132 |     输入 rgb 图片，进行以下预处理
133 |     1) 短边最短为 min_size，长边最长为 max_size，矛盾时，优先满足长边
134 |     2) 通过 preprocessing_type 选择 preprocessing 函数
135 |     """
136 |     if preprocessing_type == 'caffe':
137 |         preprocessing_fn = partial(_caffe_preprocessing, pixel_means=caffe_pixel_means)
138 |     elif preprocessing_type == 'tf':
139 |         preprocessing_fn = _tf_preprocessing
140 |     else:
141 |         raise ValueError('unknown preprocessing type {}'.format(preprocessing_type))
142 |     image = preprocessing_fn(image)
143 | 
144 |     height = tf.to_float(height[0])
145 |     width = tf.to_float(width[0])
146 |     scale1 = min_size / tf.minimum(height, width)
147 |     scale2 = max_size / tf.maximum(height, width)
148 |     scale = tf.minimum(scale1, scale2)
149 |     n_height = tf.to_int32(scale * height)
150 |     n_width = tf.to_int32(scale * width)
151 | 
152 |     image = tf.image.resize_bilinear(image, (n_height, n_width))
153 | 
154 |     return image, scale, tf.to_int32(height), tf.to_int32(width)
155 | 


--------------------------------------------------------------------------------
/object_detection/dataset/utils/tf_record_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | 
 4 | 
 5 | def int64_feature(value):
 6 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 7 | 
 8 | 
 9 | def int64_list_feature(value):
10 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
11 | 
12 | 
13 | def bytes_feature(value):
14 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
15 | 
16 | 
17 | def bytes_list_feature(value):
18 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
19 | 
20 | 
21 | def float_list_feature(value):
22 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value))
23 | 
24 | 
25 | def recursive_parse_xml_to_dict(xml):
26 |     if not xml:
27 |         return {xml.tag: xml.text}
28 |     result = {}
29 |     for child in xml:
30 |         child_result = recursive_parse_xml_to_dict(child)
31 |         if child.tag != 'object':
32 |             result[child.tag] = child_result[child.tag]
33 |         else:
34 |             if child.tag not in result:
35 |                 result[child.tag] = []
36 |             result[child.tag].append(child_result[child.tag])
37 |     return {xml.tag: result}
38 | 
39 | 
40 | def get_multi_tf_record_writers(base_path, file_pattern, year, number, mode):
41 |     writers_path = [os.path.join(base_path, file_pattern % (year, mode, i)) for i in range(number)]
42 |     return [tf.python_io.TFRecordWriter(writer_path) for writer_path in writers_path]
43 | 


--------------------------------------------------------------------------------
/object_detection/evaluation/detectron_pascal_evaluation_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Fast/er R-CNN
 19 | # Licensed under The MIT License [see LICENSE for details]
 20 | # Written by Bharath Hariharan
 21 | # --------------------------------------------------------
 22 | 
 23 | """Python implementation of the PASCAL VOC devkit's AP evaluation code."""
 24 | 
 25 | import logging
 26 | import numpy as np
 27 | import os
 28 | import xml.etree.ElementTree as ET
 29 | import pickle
 30 | 
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | 
 34 | def parse_rec(filename):
 35 |     """Parse a PASCAL VOC xml file."""
 36 |     tree = ET.parse(filename)
 37 |     objects = []
 38 |     for obj in tree.findall('object'):
 39 |         obj_struct = {}
 40 |         obj_struct['name'] = obj.find('name').text
 41 |         obj_struct['pose'] = obj.find('pose').text
 42 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 43 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 44 |         bbox = obj.find('bndbox')
 45 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 46 |                               int(bbox.find('ymin').text),
 47 |                               int(bbox.find('xmax').text),
 48 |                               int(bbox.find('ymax').text)]
 49 |         objects.append(obj_struct)
 50 | 
 51 |     return objects
 52 | 
 53 | 
 54 | def voc_ap(rec, prec, use_07_metric=False):
 55 |     """Compute VOC AP given precision and recall. If use_07_metric is true, uses
 56 |     the VOC 07 11-point method (default:False).
 57 |     """
 58 |     if use_07_metric:
 59 |         # 11 point metric
 60 |         ap = 0.
 61 |         for t in np.arange(0., 1.1, 0.1):
 62 |             if np.sum(rec >= t) == 0:
 63 |                 p = 0
 64 |             else:
 65 |                 p = np.max(prec[rec >= t])
 66 |             ap = ap + p / 11.
 67 |     else:
 68 |         # correct AP calculation
 69 |         # first append sentinel values at the end
 70 |         mrec = np.concatenate(([0.], rec, [1.]))
 71 |         mpre = np.concatenate(([0.], prec, [0.]))
 72 | 
 73 |         # compute the precision envelope
 74 |         for i in range(mpre.size - 1, 0, -1):
 75 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 76 | 
 77 |         # to calculate area under PR curve, look for points
 78 |         # where X axis (recall) changes value
 79 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 80 | 
 81 |         # and sum (\Delta recall) * prec
 82 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 83 |     return ap
 84 | 
 85 | 
 86 | def voc_eval(detpath,
 87 |              annopath,
 88 |              imagesetfile,
 89 |              classname,
 90 |              cachedir,
 91 |              ovthresh=0.5,
 92 |              use_07_metric=True):
 93 |     """rec, prec, ap = voc_eval(detpath,
 94 |                                 annopath,
 95 |                                 imagesetfile,
 96 |                                 classname,
 97 |                                 [ovthresh],
 98 |                                 [use_07_metric])
 99 | 
100 |     Top level function that does the PASCAL VOC evaluation.
101 | 
102 |     detpath: Path to detections
103 |         detpath.format(classname) should produce the detection results file.
104 |     annopath: Path to annotations
105 |         annopath.format(imagename) should be the xml annotations file.
106 |     imagesetfile: Text file containing the list of images, one image per line.
107 |     classname: Category name (duh)
108 |     cachedir: Directory for caching the annotations
109 |     [ovthresh]: Overlap threshold (default = 0.5)
110 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
111 |         (default False)
112 |     """
113 |     # assumes detections are in detpath.format(classname)
114 |     # assumes annotations are in annopath.format(imagename)
115 |     # assumes imagesetfile is a text file with each line an image name
116 |     # cachedir caches the annotations in a pickle file
117 | 
118 |     # first load gt
119 |     if not os.path.isdir(cachedir):
120 |         os.mkdir(cachedir)
121 |     imageset = os.path.splitext(os.path.basename(imagesetfile))[0]
122 |     cachefile = os.path.join(cachedir, imageset + '_annots.pkl')
123 |     # read list of images
124 |     with open(imagesetfile, 'r') as f:
125 |         lines = f.readlines()
126 |     imagenames = [x.strip() for x in lines]
127 | 
128 |     if not os.path.isfile(cachefile):
129 |         # load annots
130 |         recs = {}
131 |         for i, imagename in enumerate(imagenames):
132 |             recs[imagename] = parse_rec(annopath.format(imagename))
133 |             if i % 100 == 0:
134 |                 logger.info(
135 |                     'Reading annotation for {:d}/{:d}'.format(
136 |                         i + 1, len(imagenames)))
137 |         # save
138 |         logger.info('Saving cached annotations to {:s}'.format(cachefile))
139 |         with open(cachefile, 'wb') as f:
140 |             pickle.dump(recs, f)
141 |     else:
142 |         with open(cachefile, 'rb') as f:
143 |             recs = pickle.load(f)
144 | 
145 |     # extract gt objects for this class
146 |     class_recs = {}
147 |     npos = 0
148 |     for imagename in imagenames:
149 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
150 |         bbox = np.array([x['bbox'] for x in R])
151 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
152 |         det = [False] * len(R)
153 |         npos = npos + sum(~difficult)
154 |         class_recs[imagename] = {'bbox': bbox,
155 |                                  'difficult': difficult,
156 |                                  'det': det}
157 | 
158 |     # read dets
159 |     detfile = detpath.format(classname)
160 |     with open(detfile, 'r') as f:
161 |         lines = f.readlines()
162 | 
163 |     splitlines = [x.strip().split(' ') for x in lines]
164 |     image_ids = [x[0] for x in splitlines]
165 |     confidence = np.array([float(x[1]) for x in splitlines])
166 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
167 | 
168 |     # sort by confidence
169 |     sorted_ind = np.argsort(-confidence)
170 |     BB = BB[sorted_ind, :]
171 |     image_ids = [image_ids[x] for x in sorted_ind]
172 | 
173 |     # go down dets and mark TPs and FPs
174 |     nd = len(image_ids)
175 |     tp = np.zeros(nd)
176 |     fp = np.zeros(nd)
177 |     for d in range(nd):
178 |         R = class_recs[image_ids[d]]
179 |         bb = BB[d, :].astype(float)
180 |         ovmax = -np.inf
181 |         BBGT = R['bbox'].astype(float)
182 | 
183 |         if BBGT.size > 0:
184 |             # compute overlaps
185 |             # intersection
186 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
187 |             iymin = np.maximum(BBGT[:, 1], bb[1])
188 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
189 |             iymax = np.minimum(BBGT[:, 3], bb[3])
190 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
191 |             ih = np.maximum(iymax - iymin + 1., 0.)
192 |             inters = iw * ih
193 | 
194 |             # union
195 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
196 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
197 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
198 | 
199 |             overlaps = inters / uni
200 |             ovmax = np.max(overlaps)
201 |             jmax = np.argmax(overlaps)
202 | 
203 |         if ovmax > ovthresh:
204 |             if not R['difficult'][jmax]:
205 |                 if not R['det'][jmax]:
206 |                     tp[d] = 1.
207 |                     R['det'][jmax] = 1
208 |                 else:
209 |                     fp[d] = 1.
210 |         else:
211 |             fp[d] = 1.
212 | 
213 |     # compute precision recall
214 |     fp = np.cumsum(fp)
215 |     tp = np.cumsum(tp)
216 |     rec = tp / float(npos)
217 |     # avoid divide by zero in case the first detection matches a difficult
218 |     # ground truth
219 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
220 |     ap = voc_ap(rec, prec, use_07_metric)
221 | 
222 |     return rec, prec, ap
223 | 


--------------------------------------------------------------------------------
/object_detection/evaluation/pascal_eval_files_utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | from object_detection.dataset.eval_pascal_tf_dataset import get_dataset_by_local_file, get_dataset_by_tf_records
  5 | from object_detection.utils.bbox_transform import decode_bbox_with_mean_and_std
  6 | from object_detection.utils.bbox_tf import bboxes_clip_filter
  7 | 
  8 | num_classes = 21
  9 | class_list = ('__background__',  # always index 0
 10 |               'aeroplane', 'bicycle', 'bird', 'boat',  'bottle', 'bus', 'car', 'cat', 'chair',
 11 |               'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant',
 12 |               'sheep', 'sofa', 'train', 'tvmonitor')
 13 | class_name_to_id_dict = dict(list(zip(class_list, list(range(num_classes)))))
 14 | class_id_to_name_dict = dict(list(zip(list(range(num_classes)), class_list)))
 15 | 
 16 | __all__ = ['get_prediction_files']
 17 | 
 18 | 
 19 | def get_prediction_files(cur_model,
 20 |                          dataset_type='tf', image_format='bgr',
 21 |                          preprocessing_type='caffe', caffe_pixel_means=None,
 22 |                          min_edge=600, max_edge=1000,
 23 |                          data_root_path=None,
 24 |                          mode='test',
 25 |                          result_file_format='/path/to/results/{:s}.txt',
 26 |                          score_threshold=0.0, iou_threshold=0.5,
 27 |                          max_objects_per_class=50, max_objects_per_image=50,
 28 |                          target_means=None, target_stds=None,
 29 |                          min_size=10):
 30 |     """
 31 |     使用模型，生成预测结果文件
 32 |     :param cur_model:                   已导入pre-trained model的模型
 33 |     :param dataset_type:                预测数据集类型，有 cv 和 tf 两个选项
 34 |     :param image_format:
 35 |     :param caffe_pixel_means:
 36 |     :param preprocessing_type:
 37 |     :param min_edge:
 38 |     :param max_edge:
 39 |     :param data_root_path:              数据集所在位置
 40 |     :param mode:                        需要预测的数据集类型，train val trainval test
 41 |     :param result_file_format:          `result_file_format.format(class_name)` 就是对应类型输出结果文件具体路径
 42 |     :param score_threshold:             预测结果最小得分
 43 |     :param iou_threshold:               进行nms时的 iou threshold
 44 |     :param max_objects_per_class:       一张图中，每个类型最多能够输出多少个预测结果
 45 |     :param max_objects_per_image:       一张图片中，一共最多能生成多少预测结果
 46 |     :param target_means:                decode_bbox_with_mean_and_std 参数
 47 |     :param target_stds:                 decode_bbox_with_mean_and_std 参数
 48 |     :param min_size:                    最终结果最小边长（像素）
 49 |     :return:
 50 |     """
 51 |     if image_format not in ['bgr', 'rgb']:
 52 |         raise ValueError('unknown image format {}'.format(image_format))
 53 | 
 54 |     if dataset_type == 'cv2':
 55 |         eval_dataset, image_sets = get_dataset_by_local_file(mode, data_root_path,
 56 |                                                              image_format=image_format,
 57 |                                                              preprocessing_type=preprocessing_type,
 58 |                                                              caffe_pixel_means=caffe_pixel_means,
 59 |                                                              min_edge=min_edge, max_edge=max_edge)
 60 |     elif dataset_type == 'tf':
 61 |         eval_dataset, image_sets = get_dataset_by_tf_records(mode, data_root_path,
 62 |                                                              preprocessing_type=preprocessing_type,
 63 |                                                              caffe_pixel_means=caffe_pixel_means,
 64 |                                                              min_edge=min_edge, max_edge=max_edge)
 65 |     else:
 66 |         raise ValueError('unknown dataset type {}'.format(dataset_type))
 67 | 
 68 |     if target_stds is None:
 69 |         target_stds = [0.1, 0.1, 0.2, 0.2]
 70 |     if target_means is None:
 71 |         target_means = [0, 0, 0, 0]
 72 | 
 73 |     all_boxes = [[[] for _ in range(len(image_sets))]
 74 |                  for _ in range(num_classes)]
 75 |     i = 0
 76 |     for img, img_scale, raw_h, raw_w in tqdm(eval_dataset):
 77 |         raw_h = tf.to_float(raw_h)
 78 |         raw_w = tf.to_float(raw_w)
 79 |         scores, roi_txtytwth, rois = cur_model.im_detect(img, img_scale)
 80 |         roi_txtytwth = tf.reshape(roi_txtytwth, [-1, num_classes, 4])
 81 |         for j in range(1, num_classes):
 82 |             inds = tf.where(scores[:, j] > score_threshold)[:, 0]
 83 |             cls_scores = tf.gather(scores[:, j], inds)
 84 |             cls_boxes = decode_bbox_with_mean_and_std(tf.gather(rois, inds),
 85 |                                                       tf.gather(roi_txtytwth[:, j, :], inds),
 86 |                                                       target_means=target_means, target_stds=target_stds)
 87 |             cls_boxes, inds = bboxes_clip_filter(cls_boxes, 0, raw_h, raw_w, min_size)
 88 |             cls_scores = tf.gather(cls_scores, inds)
 89 |             keep = tf.image.non_max_suppression(cls_boxes, cls_scores, max_objects_per_class,
 90 |                                                 iou_threshold=iou_threshold)
 91 | 
 92 |             cls_scores = cls_scores.numpy()
 93 |             cls_boxes = cls_boxes.numpy()
 94 |             cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
 95 |                 .astype(np.float32, copy=False)
 96 |             cls_dets = cls_dets[keep.numpy(), :]
 97 |             all_boxes[j][i] = cls_dets
 98 | 
 99 |         if max_objects_per_image > 0:
100 |             image_scores = np.hstack([all_boxes[j][i][:, -1]
101 |                                       for j in range(1, num_classes)])
102 |             if len(image_scores) > max_objects_per_image:
103 |                 image_thresh = np.sort(image_scores)[-max_objects_per_image]
104 |                 for j in range(1, num_classes):
105 |                     keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
106 |                     all_boxes[j][i] = all_boxes[j][i][keep, :]
107 |         i += 1
108 | 
109 |     for cls_ind, cls in enumerate(class_list):
110 |         if cls == '__background__':
111 |             continue
112 |         tf.logging.info('Writing {} VOC results file'.format(cls))
113 |         filename = result_file_format.format(cls)
114 |         with open(filename, 'wt') as f:
115 |             for im_ind, index in enumerate(image_sets):
116 |                 dets = np.array(all_boxes[cls_ind][im_ind])
117 |                 if dets == []:
118 |                     continue
119 |                 # the VOCdevkit expects 1-based indices
120 |                 for k in range(dets.shape[0]):
121 |                     f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
122 |                             format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
123 | 


--------------------------------------------------------------------------------
/object_detection/evaluation/pascal_voc_map_utils.py:
--------------------------------------------------------------------------------
  1 | # copy from https://github.com/chenyuntc/simple-faster-rcnn-pytorch/blob/master/utils/eval_tool.py
  2 | from __future__ import division
  3 | 
  4 | from collections import defaultdict
  5 | import itertools
  6 | import numpy as np
  7 | import six
  8 | from object_detection.utils.bbox_tf import pairwise_iou
  9 | 
 10 | 
 11 | def eval_detection_voc(
 12 |         pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
 13 |         gt_difficults=None,
 14 |         iou_thresh=0.5, use_07_metric=False):
 15 |     """Calculate average precisions based on evaluation code of PASCAL VOC.
 16 | 
 17 |     This function evaluates predicted bounding boxes obtained from a dataset
 18 |     which has :math:`N` images by using average precision for each class.
 19 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
 20 | 
 21 |     Args:
 22 |         pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
 23 |             sets of bounding boxes.
 24 |             Its index corresponds to an index for the base dataset.
 25 |             Each element of :obj:`pred_bboxes` is a set of coordinates
 26 |             of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
 27 |             where :math:`R` corresponds
 28 |             to the number of bounding boxes, which may vary among boxes.
 29 |             The second axis corresponds to
 30 |             :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
 31 |         pred_labels (iterable of numpy.ndarray): An iterable of labels.
 32 |             Similar to :obj:`pred_bboxes`, its index corresponds to an
 33 |             index for the base dataset. Its length is :math:`N`.
 34 |         pred_scores (iterable of numpy.ndarray): An iterable of confidence
 35 |             scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
 36 |             its index corresponds to an index for the base dataset.
 37 |             Its length is :math:`N`.
 38 |         gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
 39 |             bounding boxes
 40 |             whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
 41 |             bounding box whose shape is :math:`(R, 4)`. Note that the number of
 42 |             bounding boxes in each image does not need to be same as the number
 43 |             of corresponding predicted boxes.
 44 |         gt_labels (iterable of numpy.ndarray): An iterable of ground truth
 45 |             labels which are organized similarly to :obj:`gt_bboxes`.
 46 |         gt_difficults (iterable of numpy.ndarray): An iterable of boolean
 47 |             arrays which is organized similarly to :obj:`gt_bboxes`.
 48 |             This tells whether the
 49 |             corresponding ground truth bounding box is difficult or not.
 50 |             By default, this is :obj:`None`. In that case, this function
 51 |             considers all bounding boxes to be not difficult.
 52 |         iou_thresh (float): A prediction is correct if its Intersection over
 53 |             Union with the ground truth is above this value.
 54 |         use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
 55 |             for calculating average precision. The default value is
 56 |             :obj:`False`.
 57 | 
 58 |     Returns:
 59 |         dict:
 60 | 
 61 |         The keys, value-types and the description of the values are listed
 62 |         below.
 63 | 
 64 |         * **ap** (*numpy.ndarray*): An array of average precisions. \
 65 |             The :math:`l`-th value corresponds to the average precision \
 66 |             for class :math:`l`. If class :math:`l` does not exist in \
 67 |             either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
 68 |             value is set to :obj:`numpy.nan`.
 69 |         * **map** (*float*): The average of Average Precisions over classes.
 70 | 
 71 |     """
 72 | 
 73 |     prec, rec = calc_detection_voc_prec_rec(
 74 |         pred_bboxes, pred_labels, pred_scores,
 75 |         gt_bboxes, gt_labels, gt_difficults,
 76 |         iou_thresh=iou_thresh)
 77 | 
 78 |     ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
 79 | 
 80 |     return {'ap': ap, 'map': np.nanmean(ap)}
 81 | 
 82 | 
 83 | def calc_detection_voc_prec_rec(
 84 |         pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
 85 |         gt_difficults=None,
 86 |         iou_thresh=0.5):
 87 |     """Calculate precision and recall based on evaluation code of PASCAL VOC.
 88 | 
 89 |     This function calculates precision and recall of
 90 |     predicted bounding boxes obtained from a dataset which has :math:`N`
 91 |     images.
 92 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
 93 | 
 94 |     Args:
 95 |         pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
 96 |             sets of bounding boxes.
 97 |             Its index corresponds to an index for the base dataset.
 98 |             Each element of :obj:`pred_bboxes` is a set of coordinates
 99 |             of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
100 |             where :math:`R` corresponds
101 |             to the number of bounding boxes, which may vary among boxes.
102 |             The second axis corresponds to
103 |             :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
104 |         pred_labels (iterable of numpy.ndarray): An iterable of labels.
105 |             Similar to :obj:`pred_bboxes`, its index corresponds to an
106 |             index for the base dataset. Its length is :math:`N`.
107 |         pred_scores (iterable of numpy.ndarray): An iterable of confidence
108 |             scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
109 |             its index corresponds to an index for the base dataset.
110 |             Its length is :math:`N`.
111 |         gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
112 |             bounding boxes
113 |             whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
114 |             bounding box whose shape is :math:`(R, 4)`. Note that the number of
115 |             bounding boxes in each image does not need to be same as the number
116 |             of corresponding predicted boxes.
117 |         gt_labels (iterable of numpy.ndarray): An iterable of ground truth
118 |             labels which are organized similarly to :obj:`gt_bboxes`.
119 |         gt_difficults (iterable of numpy.ndarray): An iterable of boolean
120 |             arrays which is organized similarly to :obj:`gt_bboxes`.
121 |             This tells whether the
122 |             corresponding ground truth bounding box is difficult or not.
123 |             By default, this is :obj:`None`. In that case, this function
124 |             considers all bounding boxes to be not difficult.
125 |         iou_thresh (float): A prediction is correct if its Intersection over
126 |             Union with the ground truth is above this value..
127 | 
128 |     Returns:
129 |         tuple of two lists:
130 |         This function returns two lists: :obj:`prec` and :obj:`rec`.
131 | 
132 |         * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
133 |             for class :math:`l`. If class :math:`l` does not exist in \
134 |             either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
135 |             set to :obj:`None`.
136 |         * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
137 |             for class :math:`l`. If class :math:`l` that is not marked as \
138 |             difficult does not exist in \
139 |             :obj:`gt_labels`, :obj:`rec[l]` is \
140 |             set to :obj:`None`.
141 | 
142 |     """
143 | 
144 |     pred_bboxes = iter(pred_bboxes)
145 |     pred_labels = iter(pred_labels)
146 |     pred_scores = iter(pred_scores)
147 |     gt_bboxes = iter(gt_bboxes)
148 |     gt_labels = iter(gt_labels)
149 |     if gt_difficults is None:
150 |         gt_difficults = itertools.repeat(None)
151 |     else:
152 |         gt_difficults = iter(gt_difficults)
153 | 
154 |     n_pos = defaultdict(int)
155 |     score = defaultdict(list)
156 |     match = defaultdict(list)
157 | 
158 |     for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
159 |             six.moves.zip(
160 |                 pred_bboxes, pred_labels, pred_scores,
161 |                 gt_bboxes, gt_labels, gt_difficults):
162 | 
163 |         if gt_difficult is None:
164 |             gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
165 | 
166 |         for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
167 |             pred_mask_l = pred_label == l
168 |             pred_bbox_l = pred_bbox[pred_mask_l]
169 |             pred_score_l = pred_score[pred_mask_l]
170 |             # sort by score
171 |             order = pred_score_l.argsort()[::-1]
172 |             pred_bbox_l = pred_bbox_l[order]
173 |             pred_score_l = pred_score_l[order]
174 | 
175 |             gt_mask_l = gt_label == l
176 |             gt_bbox_l = gt_bbox[gt_mask_l]
177 |             gt_difficult_l = gt_difficult[gt_mask_l]
178 | 
179 |             n_pos[l] += np.logical_not(gt_difficult_l).sum()
180 |             score[l].extend(pred_score_l)
181 | 
182 |             if len(pred_bbox_l) == 0:
183 |                 continue
184 |             if len(gt_bbox_l) == 0:
185 |                 match[l].extend((0,) * pred_bbox_l.shape[0])
186 |                 continue
187 | 
188 |             # VOC evaluation follows integer typed bounding boxes.
189 |             pred_bbox_l = pred_bbox_l.copy()
190 |             pred_bbox_l[:, 2:] += 1
191 |             gt_bbox_l = gt_bbox_l.copy()
192 |             gt_bbox_l[:, 2:] += 1
193 | 
194 |             iou = pairwise_iou(pred_bbox_l, gt_bbox_l).numpy()
195 |             gt_index = iou.argmax(axis=1)
196 |             # set -1 if there is no matching ground truth
197 |             gt_index[iou.max(axis=1) < iou_thresh] = -1
198 |             del iou
199 | 
200 |             selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
201 |             for gt_idx in gt_index:
202 |                 if gt_idx >= 0:
203 |                     if gt_difficult_l[gt_idx]:
204 |                         match[l].append(-1)
205 |                     else:
206 |                         if not selec[gt_idx]:
207 |                             match[l].append(1)
208 |                         else:
209 |                             match[l].append(0)
210 |                     selec[gt_idx] = True
211 |                 else:
212 |                     match[l].append(0)
213 | 
214 |     for iter_ in (
215 |             pred_bboxes, pred_labels, pred_scores,
216 |             gt_bboxes, gt_labels, gt_difficults):
217 |         if next(iter_, None) is not None:
218 |             raise ValueError('Length of input iterables need to be same.')
219 | 
220 |     n_fg_class = max(n_pos.keys()) + 1
221 |     prec = [None] * n_fg_class
222 |     rec = [None] * n_fg_class
223 | 
224 |     for l in n_pos.keys():
225 |         score_l = np.array(score[l])
226 |         match_l = np.array(match[l], dtype=np.int8)
227 | 
228 |         order = score_l.argsort()[::-1]
229 |         match_l = match_l[order]
230 | 
231 |         tp = np.cumsum(match_l == 1)
232 |         fp = np.cumsum(match_l == 0)
233 | 
234 |         # If an element of fp + tp is 0,
235 |         # the corresponding element of prec[l] is nan.
236 |         prec[l] = tp / (fp + tp)
237 |         # If n_pos[l] is 0, rec[l] is None.
238 |         if n_pos[l] > 0:
239 |             rec[l] = tp / n_pos[l]
240 | 
241 |     return prec, rec
242 | 
243 | 
244 | def calc_detection_voc_ap(prec, rec, use_07_metric=False):
245 |     """Calculate average precisions based on evaluation code of PASCAL VOC.
246 | 
247 |     This function calculates average precisions
248 |     from given precisions and recalls.
249 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
250 | 
251 |     Args:
252 |         prec (list of numpy.array): A list of arrays.
253 |             :obj:`prec[l]` indicates precision for class :math:`l`.
254 |             If :obj:`prec[l]` is :obj:`None`, this function returns
255 |             :obj:`numpy.nan` for class :math:`l`.
256 |         rec (list of numpy.array): A list of arrays.
257 |             :obj:`rec[l]` indicates recall for class :math:`l`.
258 |             If :obj:`rec[l]` is :obj:`None`, this function returns
259 |             :obj:`numpy.nan` for class :math:`l`.
260 |         use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
261 |             for calculating average precision. The default value is
262 |             :obj:`False`.
263 | 
264 |     Returns:
265 |         ~numpy.ndarray:
266 |         This function returns an array of average precisions.
267 |         The :math:`l`-th value corresponds to the average precision
268 |         for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
269 |         :obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
270 | 
271 |     """
272 | 
273 |     n_fg_class = len(prec)
274 |     ap = np.empty(n_fg_class)
275 |     for l in six.moves.range(n_fg_class):
276 |         if prec[l] is None or rec[l] is None:
277 |             ap[l] = np.nan
278 |             continue
279 | 
280 |         if use_07_metric:
281 |             # 11 point metric
282 |             ap[l] = 0
283 |             for t in np.arange(0., 1.1, 0.1):
284 |                 if np.sum(rec[l] >= t) == 0:
285 |                     p = 0
286 |                 else:
287 |                     p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
288 |                 ap[l] += p / 11
289 |         else:
290 |             # correct AP calculation
291 |             # first append sentinel values at the end
292 |             mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
293 |             mrec = np.concatenate(([0], rec[l], [1]))
294 | 
295 |             mpre = np.maximum.accumulate(mpre[::-1])[::-1]
296 | 
297 |             # to calculate area under PR curve, look for points
298 |             # where X axis (recall) changes value
299 |             i = np.where(mrec[1:] != mrec[:-1])[0]
300 | 
301 |             # and sum (\Delta recall) * prec
302 |             ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
303 | 
304 |     return ap
305 | 


--------------------------------------------------------------------------------
/object_detection/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/model/__init__.py


--------------------------------------------------------------------------------
/object_detection/model/anchor_target.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from object_detection.utils.bbox_transform import encode_bbox_with_mean_and_std
  3 | from object_detection.utils.bbox_tf import pairwise_iou, bboxes_range_filter
  4 | from tensorflow.python.platform import tf_logging
  5 | 
  6 | 
  7 | class AnchorTarget(tf.keras.Model):
  8 |     def __init__(self,
  9 |                  pos_iou_threshold=0.7,
 10 |                  neg_iou_threshold=0.3,
 11 |                  total_num_samples=256,
 12 |                  max_pos_samples=128,
 13 |                  target_means=None,
 14 |                  target_stds=None):
 15 |         super().__init__()
 16 | 
 17 |         self._pos_iou_threshold = pos_iou_threshold
 18 |         self._neg_iou_threshold = neg_iou_threshold
 19 |         self._total_num_samples = total_num_samples
 20 |         self._max_pos_samples = max_pos_samples
 21 | 
 22 |         if target_stds is None:
 23 |             target_stds = [1, 1, 1, 1]
 24 |         if target_means is None:
 25 |             target_means = [0, 0, 0, 0]
 26 |         self._target_means = target_means
 27 |         self._target_stds = target_stds
 28 | 
 29 |     def call(self, inputs, training=None, mask=None):
 30 |         """
 31 |         不需要训练
 32 |         生成训练rpn用的训练数据
 33 |         总体过程：
 34 |         1. 对 anchors 进行过滤，筛选符合边界要求的 anchor，之后操作都基于筛选后的结果。
 35 |         2. 计算 anchors 与gt_bboxes（即输入数据中的bbox）的iou。
 36 |         3. 设置与 gt_bboxes 的 max_iou > 0.7的anchor为正例，设置 max_iou < 0.3 的anchor为反例。
 37 |         4. 设置与每个 gt_bboxes 的iou最大的anchor为正例。
 38 |         5. 对正例、反例有数量限制，正例数量不大于 max_pos_samples，正例反例总数不超过 max_pos_samples。
 39 |         6. 最终输出5个结果：
 40 |                 1）所有anchors的label [all_anchors_num, ],-1表示不参加训练，0表示反例，1表示正例
 41 |                 2）所有anchors对应的 txtwtyth [all_anchors_num, 4]，只有正例参加训练，反例不参加训练
 42 |                 3）smooth l1 loss 中的 bbox_inside_weights [all_anchors_num, 4]
 43 |                 3）smooth l1 loss 中的 bbox_outside_weights [all_anchors_num, 4]
 44 |         :param inputs:
 45 |         :param training:
 46 |         :param mask:
 47 |         :return:
 48 |         """
 49 |         gt_bboxes, image_shape, all_anchors = inputs
 50 |         total_anchors = all_anchors.get_shape().as_list()[0]
 51 | 
 52 |         # 1. 对 anchors 进行过滤，筛选符合边界要求的 anchor，之后操作都基于筛选后的结果。
 53 |         tf_logging.debug('anchor target, before filter has %d anchors' % all_anchors.shape[0])
 54 |         selected_anchor_idx = bboxes_range_filter(all_anchors, image_shape[0], image_shape[1])
 55 |         anchors = tf.gather(all_anchors, selected_anchor_idx)
 56 |         tf_logging.debug('anchor target, after filter has %d anchors' % anchors.shape[0])
 57 | 
 58 |         # 准备工作
 59 |         labels = -tf.ones((anchors.shape[0],), tf.int32)
 60 |         overlaps = pairwise_iou(anchors, gt_bboxes)  # [anchors_size, gt_bboxes_size]
 61 |         argmax_overlaps = tf.argmax(overlaps, axis=1, output_type=tf.int32)
 62 |         max_overlaps = tf.reduce_max(overlaps, axis=1)
 63 |         gt_max_overlaps = tf.reduce_max(overlaps, axis=0)
 64 |         gt_argmax_overlaps = tf.where(tf.equal(overlaps, gt_max_overlaps))[:, 0]
 65 | 
 66 |         # 设置labels
 67 |         labels = tf.where(max_overlaps < self._neg_iou_threshold, tf.zeros_like(labels), labels)
 68 |         labels = tf.scatter_update(tf.Variable(labels), gt_argmax_overlaps, 1)
 69 |         labels = tf.where(max_overlaps >= self._pos_iou_threshold, tf.ones_like(labels), labels)
 70 | 
 71 |         # 筛选正例反例
 72 |         fg_inds = tf.where(tf.equal(labels, 1))[:, 0]
 73 |         if tf.size(fg_inds) > self._max_pos_samples:
 74 |             fg_inds = tf.random_shuffle(fg_inds)
 75 |             disable_inds = fg_inds[self._max_pos_samples:]
 76 |             fg_inds = fg_inds[:self._max_pos_samples]
 77 |             labels = tf.scatter_update(tf.Variable(labels), disable_inds, -1)
 78 |         num_bg = self._total_num_samples - tf.reduce_sum(tf.to_int32(tf.equal(labels, 1)))
 79 |         bg_inds = tf.where(tf.equal(labels, 0))[:, 0]
 80 |         if tf.size(bg_inds) > num_bg:
 81 |             bg_inds = tf.random_shuffle(bg_inds)
 82 |             disable_inds = bg_inds[num_bg:]
 83 |             bg_inds = bg_inds[:num_bg]
 84 |             labels = tf.scatter_update(tf.Variable(labels), disable_inds, -1)
 85 |         tf.logging.debug('anchor target generate %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds)))
 86 | 
 87 |         # 计算 bboxes targets，作为 rpn reg loss 的 ground truth
 88 |         bboxes_targets = encode_bbox_with_mean_and_std(anchors, tf.gather(gt_bboxes, argmax_overlaps),
 89 |                                                        target_means=self._target_means,
 90 |                                                        target_stds=self._target_stds)
 91 | 
 92 |         # 只有正例才有 reg loss
 93 |         bbox_inside_weights = tf.zeros((anchors.shape[0], 4), dtype=tf.float32)
 94 |         bbox_inside_weights = tf.scatter_update(tf.Variable(bbox_inside_weights),
 95 |                                                 tf.where(tf.equal(labels, 1))[:, 0], 1)
 96 | 
 97 |         # 实质就是对 reg loss / num_rpn_samples
 98 |         bbox_outside_weights = tf.zeros((anchors.shape[0], 4), dtype=tf.float32)
 99 |         num_examples = tf.reduce_sum(tf.to_float(labels >= 0))
100 |         bbox_outside_weights = tf.scatter_update(tf.Variable(bbox_outside_weights),
101 |                                                  tf.where(labels >= 0)[:, 0], 1.0 / num_examples)
102 | 
103 |         # 生成最终结果
104 |         return tf.stop_gradient(_unmap(labels, total_anchors, selected_anchor_idx, -1)), \
105 |                tf.stop_gradient(_unmap(bboxes_targets, total_anchors, selected_anchor_idx, 0)), \
106 |                tf.stop_gradient(_unmap(bbox_inside_weights, total_anchors, selected_anchor_idx, 0)), \
107 |                tf.stop_gradient(_unmap(bbox_outside_weights, total_anchors, selected_anchor_idx, 0))
108 | 
109 | 
110 | def _unmap(data, count, inds, fill=0):
111 |     """
112 |     将 filter anchors 后的结果映射到 原始 anchors 中，主要就是 index 的转换
113 |     :param data:
114 |     :param count:
115 |     :param inds:
116 |     :param fill:
117 |     :return:
118 |     """
119 |     if len(data.shape) == 1:
120 |         ret = tf.ones([count], dtype=tf.float32) * fill
121 |         ret = tf.scatter_update(tf.Variable(ret), inds, tf.to_float(data))
122 |     else:
123 |         ret = tf.ones([count, ] + data.get_shape().as_list()[1:], dtype=tf.float32) * fill
124 |         ret = tf.scatter_update(tf.Variable(ret), inds, tf.to_float(data))
125 |     return ret
126 | 


--------------------------------------------------------------------------------
/object_detection/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/model/faster_rcnn/__init__.py


--------------------------------------------------------------------------------
/object_detection/model/faster_rcnn/vgg16_faster_rcnn.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from object_detection.model.faster_rcnn.base_faster_rcnn_model import BaseFasterRcnn
  3 | 
  4 | __all__ = ['Vgg16FasterRcnn']
  5 | layers = tf.keras.layers
  6 | VGG_16_WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
  7 |                        'releases/download/v0.1/'
  8 |                        'vgg16_weights_tf_dim_ordering_tf_kernels.h5')
  9 | 
 10 | 
 11 | class Vgg16FasterRcnn(BaseFasterRcnn):
 12 |     def __init__(self,
 13 |                  # Vgg16FasterRcnn 特有参数
 14 |                  slim_ckpt_file_path=None,
 15 |                  roi_head_keep_dropout_rate=0.5,
 16 |                  roi_feature_size=(7, 7, 512),
 17 | 
 18 |                  # 通用参数
 19 |                  num_classes=21,
 20 |                  weight_decay=0.0001,
 21 |                  ratios=(0.5, 1.0, 2.0),
 22 |                  scales=(8, 16, 32),
 23 |                  extractor_stride=16,
 24 | 
 25 |                  # region proposal & anchor target 通用参数
 26 |                  rpn_proposal_means=(0, 0, 0, 0),
 27 |                  rpn_proposal_stds=(1.0, 1.0, 1.0, 1.0),
 28 | 
 29 |                  # region proposal 参数
 30 |                  rpn_proposal_num_pre_nms_train=12000,
 31 |                  rpn_proposal_num_post_nms_train=2000,
 32 |                  rpn_proposal_num_pre_nms_test=6000,
 33 |                  rpn_proposal_num_post_nms_test=300,
 34 |                  rpn_proposal_nms_iou_threshold=0.7,
 35 | 
 36 |                  # anchor target 以及相关损失函数参数
 37 |                  rpn_sigma=3.0,
 38 |                  rpn_training_pos_iou_threshold=0.7,
 39 |                  rpn_training_neg_iou_threshold=0.3,
 40 |                  rpn_training_total_num_samples=256,
 41 |                  rpn_training_max_pos_samples=128,
 42 | 
 43 |                  # roi head & proposal target 参数
 44 |                  roi_proposal_means=(0, 0, 0, 0),
 45 |                  roi_proposal_stds=(0.1, 0.1, 0.2, 0.2),
 46 | 
 47 |                  # roi pooling 参数
 48 |                  roi_pool_size=7,
 49 |                  roi_pooling_max_pooling_flag=True,
 50 | 
 51 |                  # proposal target 以及相关损失函数参数
 52 |                  roi_sigma=1,
 53 |                  roi_training_pos_iou_threshold=0.5,
 54 |                  roi_training_neg_iou_threshold=0.1,
 55 |                  roi_training_total_num_samples=128,
 56 |                  roi_training_max_pos_samples=32,
 57 | 
 58 |                  # prediction 参数
 59 |                  prediction_max_objects_per_image=50,
 60 |                  prediction_max_objects_per_class=50,
 61 |                  prediction_nms_iou_threshold=0.3,
 62 |                  prediction_score_threshold=0.3, ):
 63 |         self._slim_ckpt_file_path = slim_ckpt_file_path
 64 |         self._roi_feature_size = roi_feature_size
 65 |         self._roi_head_keep_dropout_rate = roi_head_keep_dropout_rate
 66 |         super().__init__(num_classes=num_classes,
 67 |                          weight_decay=weight_decay,
 68 | 
 69 |                          ratios=ratios,
 70 |                          scales=scales,
 71 |                          extractor_stride=extractor_stride,
 72 | 
 73 |                          rpn_proposal_means=rpn_proposal_means,
 74 |                          rpn_proposal_stds=rpn_proposal_stds,
 75 | 
 76 |                          rpn_proposal_num_pre_nms_train=rpn_proposal_num_pre_nms_train,
 77 |                          rpn_proposal_num_post_nms_train=rpn_proposal_num_post_nms_train,
 78 |                          rpn_proposal_num_pre_nms_test=rpn_proposal_num_pre_nms_test,
 79 |                          rpn_proposal_num_post_nms_test=rpn_proposal_num_post_nms_test,
 80 |                          rpn_proposal_nms_iou_threshold=rpn_proposal_nms_iou_threshold,
 81 | 
 82 |                          rpn_sigma=rpn_sigma,
 83 |                          rpn_training_pos_iou_threshold=rpn_training_pos_iou_threshold,
 84 |                          rpn_training_neg_iou_threshold=rpn_training_neg_iou_threshold,
 85 |                          rpn_training_total_num_samples=rpn_training_total_num_samples,
 86 |                          rpn_training_max_pos_samples=rpn_training_max_pos_samples,
 87 | 
 88 |                          roi_proposal_means=roi_proposal_means,
 89 |                          roi_proposal_stds=roi_proposal_stds,
 90 | 
 91 |                          roi_pool_size=roi_pool_size,
 92 |                          roi_pooling_max_pooling_flag=roi_pooling_max_pooling_flag,
 93 | 
 94 |                          roi_sigma=roi_sigma,
 95 |                          roi_training_pos_iou_threshold=roi_training_pos_iou_threshold,
 96 |                          roi_training_neg_iou_threshold=roi_training_neg_iou_threshold,
 97 |                          roi_training_total_num_samples=roi_training_total_num_samples,
 98 |                          roi_training_max_pos_samples=roi_training_max_pos_samples,
 99 | 
100 |                          prediction_max_objects_per_image=prediction_max_objects_per_image,
101 |                          prediction_max_objects_per_class=prediction_max_objects_per_class,
102 |                          prediction_nms_iou_threshold=prediction_nms_iou_threshold,
103 |                          prediction_score_threshold=prediction_score_threshold,
104 |                          )
105 | 
106 |     def _get_roi_head(self):
107 |         return Vgg16RoiHead(self.num_classes,
108 |                             roi_feature_size=self._roi_feature_size,
109 |                             keep_rate=self._roi_head_keep_dropout_rate,
110 |                             weight_decay=self.weight_decay,
111 |                             slim_ckpt_file_path=self._slim_ckpt_file_path)
112 | 
113 |     def _get_extractor(self):
114 |         return Vgg16Extractor(weight_decay=self.weight_decay,
115 |                               slim_ckpt_file_path=self._slim_ckpt_file_path)
116 | 
117 |     def load_tf_faster_rcnn_tf_weights(self, ckpt_file_path):
118 |         reader = tf.train.load_checkpoint(ckpt_file_path)
119 |         extractor = self.get_layer('vgg16')
120 |         extractor_dict = {
121 |             "vgg_16/conv1/conv1_1/": "block1_conv1",
122 |             "vgg_16/conv1/conv1_2/": "block1_conv2",
123 | 
124 |             "vgg_16/conv2/conv2_1/": "block2_conv1",
125 |             "vgg_16/conv2/conv2_2/": "block2_conv2",
126 | 
127 |             "vgg_16/conv3/conv3_1/": "block3_conv1",
128 |             "vgg_16/conv3/conv3_2/": "block3_conv2",
129 |             "vgg_16/conv3/conv3_3/": "block3_conv3",
130 | 
131 |             "vgg_16/conv4/conv4_1/": "block4_conv1",
132 |             "vgg_16/conv4/conv4_2/": "block4_conv2",
133 |             "vgg_16/conv4/conv4_3/": "block4_conv3",
134 | 
135 |             "vgg_16/conv5/conv5_1/": "block5_conv1",
136 |             "vgg_16/conv5/conv5_2/": "block5_conv2",
137 |             "vgg_16/conv5/conv5_3/": "block5_conv3",
138 |         }
139 |         for slim_tensor_name_pre in extractor_dict.keys():
140 |             extractor.get_layer(name=extractor_dict[slim_tensor_name_pre]).set_weights([
141 |                 reader.get_tensor(slim_tensor_name_pre + 'weights'),
142 |                 reader.get_tensor(slim_tensor_name_pre + 'biases'),
143 |             ])
144 |             tf.logging.info('successfully loaded weights for {}'.format(extractor_dict[slim_tensor_name_pre]))
145 | 
146 |         rpn_head = self.get_layer('rpn_head')
147 |         rpn_head_dict = {
148 |             'vgg_16/rpn_conv/3x3/': 'rpn_first_conv',
149 |             'vgg_16/rpn_cls_score/': 'rpn_score_conv',
150 |             'vgg_16/rpn_bbox_pred/': 'rpn_bbox_conv',
151 |         }
152 |         for slim_tensor_name_pre in rpn_head_dict.keys():
153 |             rpn_head.get_layer(rpn_head_dict[slim_tensor_name_pre]).set_weights([
154 |                 reader.get_tensor(slim_tensor_name_pre + 'weights'),
155 |                 reader.get_tensor(slim_tensor_name_pre + 'biases')
156 |             ])
157 |             tf.logging.info('successfully loaded weights for {}'.format(rpn_head_dict[slim_tensor_name_pre]))
158 | 
159 |         roi_head = self.get_layer('vgg16_roi_head')
160 |         roi_head_dict = {
161 |             'vgg_16/fc6/': 'fc1',
162 |             'vgg_16/fc7/': 'fc2',
163 |             'vgg_16/bbox_pred/': 'roi_head_bboxes',
164 |             'vgg_16/cls_score/': 'roi_head_score'
165 |         }
166 |         for slim_tensor_name_pre in roi_head_dict.keys():
167 |             roi_head.get_layer(roi_head_dict[slim_tensor_name_pre]).set_weights([
168 |                 reader.get_tensor(slim_tensor_name_pre + 'weights'),
169 |                 reader.get_tensor(slim_tensor_name_pre + 'biases')
170 |             ])
171 |             tf.logging.info('successfully loaded weights for {}'.format(roi_head_dict[slim_tensor_name_pre]))
172 | 
173 |     def disable_biases(self):
174 |         # vgg16 doesn't need to diable biases
175 |         pass
176 | 
177 | 
178 | class Vgg16RoiHead(tf.keras.Model):
179 |     def __init__(self, num_classes,
180 |                  roi_feature_size=(7, 7, 512),
181 |                  keep_rate=0.5, weight_decay=0.0005,
182 |                  slim_ckpt_file_path=None, ):
183 |         super().__init__()
184 |         self._num_classes = num_classes
185 | 
186 |         self._fc1 = layers.Dense(4096, name='fc1', activation='relu',
187 |                                  kernel_initializer=tf.random_normal_initializer(0, 0.01),
188 |                                  kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
189 |                                  input_shape=[roi_feature_size]
190 |                                  )
191 |         self._dropout1 = layers.Dropout(rate=1 - keep_rate)
192 | 
193 |         self._fc2 = layers.Dense(4096, name='fc2', activation='relu',
194 |                                  kernel_initializer=tf.random_normal_initializer(0, 0.01),
195 |                                  kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
196 |                                  )
197 |         self._dropout2 = layers.Dropout(rate=1 - keep_rate)
198 | 
199 |         self._score_layer = layers.Dense(num_classes, name='roi_head_score', activation=None,
200 |                                          kernel_initializer=tf.random_normal_initializer(0, 0.01),
201 |                                          kernel_regularizer=tf.keras.regularizers.l2(weight_decay))
202 |         self._roi_bboxes_layer = layers.Dense(4 * num_classes, name='roi_head_bboxes', activation=None,
203 |                                               kernel_initializer=tf.random_normal_initializer(0, 0.001),
204 |                                               kernel_regularizer=tf.keras.regularizers.l2(weight_decay))
205 |         self._flatten_layer = layers.Flatten()
206 | 
207 |         self.build((None, *roi_feature_size))
208 | 
209 |         if slim_ckpt_file_path is None:
210 |             self._load_keras_weights()
211 |         else:
212 |             self._load_slim_weights(slim_ckpt_file_path)
213 | 
214 |     def _load_slim_weights(self, ckpt_file_path):
215 |         reader = tf.train.NewCheckpointReader(ckpt_file_path)
216 |         slim_to_keras = {
217 |             "vgg_16/fc6/": "fc1",
218 |             "vgg_16/fc7/": "fc2",
219 |         }
220 | 
221 |         for slim_tensor_name_pre in slim_to_keras.keys():
222 |             cur_layer = self.get_layer(name=slim_to_keras[slim_tensor_name_pre])
223 |             cur_layer.set_weights([
224 |                 reader.get_tensor(slim_tensor_name_pre + 'weights').reshape(
225 |                     cur_layer.variables[0].get_shape().as_list()),
226 |                 reader.get_tensor(slim_tensor_name_pre + 'biases').reshape(
227 |                     cur_layer.variables[1].get_shape().as_list()),
228 |             ])
229 |         tf.logging.info('successfully loaded slim vgg weights for roi head.')
230 | 
231 |     def _load_keras_weights(self):
232 |         weights_path = tf.keras.utils.get_file(
233 |             'vgg16_weights_tf_dim_ordering_tf_kernels.h5',
234 |             VGG_16_WEIGHTS_PATH,
235 |             cache_subdir='models',
236 |             file_hash='64373286793e3c8b2b4e3219cbf3544b')
237 |         self.load_weights(weights_path, by_name=True)
238 |         tf.logging.info('successfully load pretrained weights for roi head.')
239 | 
240 |     def call(self, inputs, training=None):
241 |         """
242 |         输入 roi pooling 的结果
243 |         对每个 roi pooling 的结果进行预测（预测bboxes）
244 |         :param inputs:  roi_features, [num_rois, pool_size, pool_size, num_channels]
245 |         :param training:
246 |         :param mask:
247 |         :return:
248 |         """
249 |         x = self._flatten_layer(inputs)
250 |         x = self._fc1(x)
251 |         x = self._dropout1(x, training)
252 |         x = self._fc2(x)
253 |         x = self._dropout2(x, training)
254 |         score = self._score_layer(x)
255 |         bboxes = self._roi_bboxes_layer(x)
256 | 
257 |         return score, bboxes
258 | 
259 | 
260 | class Vgg16Extractor(tf.keras.Sequential):
261 |     def __init__(self, weight_decay=0.0001,
262 |                  slim_ckpt_file_path=None):
263 |         super().__init__(name='vgg16')
264 |         # Block 1
265 |         self.add(layers.Conv2D(64, (3, 3),
266 |                                activation='relu',
267 |                                padding='same',
268 |                                name='block1_conv1', trainable=False,
269 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
270 |                                input_shape=(None, None, 3)))
271 |         self.add(layers.Conv2D(64, (3, 3),
272 |                                activation='relu',
273 |                                padding='same',
274 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
275 |                                name='block1_conv2', trainable=False))
276 |         self.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', padding='same'))
277 | 
278 |         # Block 2
279 |         self.add(layers.Conv2D(128, (3, 3),
280 |                                activation='relu',
281 |                                padding='same',
282 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
283 |                                name='block2_conv1', trainable=False))
284 |         self.add(layers.Conv2D(128, (3, 3),
285 |                                activation='relu',
286 |                                padding='same',
287 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
288 |                                name='block2_conv2', trainable=False))
289 |         self.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', padding='same'))
290 | 
291 |         # Block 3
292 |         self.add(layers.Conv2D(256, (3, 3),
293 |                                activation='relu',
294 |                                padding='same',
295 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
296 |                                name='block3_conv1'))
297 |         self.add(layers.Conv2D(256, (3, 3),
298 |                                activation='relu',
299 |                                padding='same',
300 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
301 |                                name='block3_conv2'))
302 |         self.add(layers.Conv2D(256, (3, 3),
303 |                                activation='relu',
304 |                                padding='same',
305 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
306 |                                name='block3_conv3'))
307 |         self.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', padding='same'))
308 | 
309 |         # Block 4
310 |         self.add(layers.Conv2D(512, (3, 3),
311 |                                activation='relu',
312 |                                padding='same',
313 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
314 |                                name='block4_conv1'))
315 |         self.add(layers.Conv2D(512, (3, 3),
316 |                                activation='relu',
317 |                                padding='same',
318 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
319 |                                name='block4_conv2'))
320 |         self.add(layers.Conv2D(512, (3, 3),
321 |                                activation='relu',
322 |                                padding='same',
323 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
324 |                                name='block4_conv3'))
325 |         self.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', padding='same'))
326 | 
327 |         # Block 5
328 |         self.add(layers.Conv2D(512, (3, 3),
329 |                                activation='relu',
330 |                                padding='same',
331 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
332 |                                name='block5_conv1'))
333 |         self.add(layers.Conv2D(512, (3, 3),
334 |                                activation='relu',
335 |                                padding='same',
336 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
337 |                                name='block5_conv2'))
338 |         self.add(layers.Conv2D(512, (3, 3),
339 |                                activation='relu',
340 |                                padding='same',
341 |                                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
342 |                                name='block5_conv3'))
343 |         if slim_ckpt_file_path:
344 |             self.load_slim_weights(slim_ckpt_file_path)
345 |         else:
346 |             self._load_keras_weights()
347 | 
348 |     def _load_keras_weights(self):
349 |         weights_path = tf.keras.utils.get_file(
350 |             'vgg16_weights_tf_dim_ordering_tf_kernels.h5',
351 |             VGG_16_WEIGHTS_PATH,
352 |             cache_subdir='models',
353 |             file_hash='64373286793e3c8b2b4e3219cbf3544b')
354 |         self.load_weights(weights_path, by_name=True)
355 |         tf.logging.info('successfully loaded keras vgg weights for vgg16 extractor.')
356 | 
357 |     def load_slim_weights(self, slim_ckpt_file_path):
358 |         reader = tf.train.NewCheckpointReader(slim_ckpt_file_path)
359 |         slim_to_keras = {
360 |             "vgg_16/conv1/conv1_1/": "block1_conv1",
361 |             "vgg_16/conv1/conv1_2/": "block1_conv2",
362 | 
363 |             "vgg_16/conv2/conv2_1/": "block2_conv1",
364 |             "vgg_16/conv2/conv2_2/": "block2_conv2",
365 | 
366 |             "vgg_16/conv3/conv3_1/": "block3_conv1",
367 |             "vgg_16/conv3/conv3_2/": "block3_conv2",
368 |             "vgg_16/conv3/conv3_3/": "block3_conv3",
369 | 
370 |             "vgg_16/conv4/conv4_1/": "block4_conv1",
371 |             "vgg_16/conv4/conv4_2/": "block4_conv2",
372 |             "vgg_16/conv4/conv4_3/": "block4_conv3",
373 | 
374 |             "vgg_16/conv5/conv5_1/": "block5_conv1",
375 |             "vgg_16/conv5/conv5_2/": "block5_conv2",
376 |             "vgg_16/conv5/conv5_3/": "block5_conv3",
377 |         }
378 |         for slim_tensor_name_pre in slim_to_keras.keys():
379 |             if slim_tensor_name_pre == 'vgg_16/conv1/conv1_1/':
380 |                 weights = reader.get_tensor(slim_tensor_name_pre + 'weights')[:, :, ::-1, :]
381 |                 self.get_layer(name=slim_to_keras[slim_tensor_name_pre]).set_weights([
382 |                     weights,
383 |                     reader.get_tensor(slim_tensor_name_pre + 'biases'),
384 |                 ])
385 |             else:
386 |                 self.get_layer(name=slim_to_keras[slim_tensor_name_pre]).set_weights([
387 |                     reader.get_tensor(slim_tensor_name_pre + 'weights'),
388 |                     reader.get_tensor(slim_tensor_name_pre + 'biases'),
389 |                 ])
390 |         tf.logging.info('successfully loaded slim vgg weights for vgg16 extractor.')
391 | 


--------------------------------------------------------------------------------
/object_detection/model/fpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/model/fpn/__init__.py


--------------------------------------------------------------------------------
/object_detection/model/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def cls_loss(logits, labels, weight=1):
 5 |     """
 6 | 
 7 |     :param weight:
 8 |     :param logits: [num_anchors, 2]
 9 |     :param labels: [num_anchors, ]，取值[0, num_classes)(roi training) 或 [0, 1](rpn training)
10 |     :return:
11 |     """
12 |     return tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=tf.to_int32(labels),
13 |                                                   weights=weight)
14 | 
15 | 
16 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
17 |     sigma_2 = sigma ** 2
18 |     box_diff = bbox_pred - bbox_targets
19 |     in_box_diff = bbox_inside_weights * box_diff
20 |     abs_in_box_diff = tf.abs(in_box_diff)
21 |     sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
22 |     in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * sign + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - sign)
23 |     out_loss_box = bbox_outside_weights * in_loss_box
24 |     loss_box = tf.reduce_mean(tf.reduce_sum(
25 |         out_loss_box,
26 |         axis=dim
27 |     ))
28 |     return loss_box
29 | 


--------------------------------------------------------------------------------
/object_detection/model/model_factory.py:
--------------------------------------------------------------------------------
  1 | from object_detection.model.fpn.resnet_fpn import ResnetV1Fpn
  2 | from object_detection.model.faster_rcnn.resnet_faster_rcnn import ResNetFasterRcnn
  3 | from object_detection.model.faster_rcnn.vgg16_faster_rcnn import Vgg16FasterRcnn
  4 | 
  5 | __all__ = ['model_factory']
  6 | 
  7 | 
  8 | def model_factory(model_type, backbone, config):
  9 |     if model_type == 'faster_rcnn':
 10 |         if backbone == 'vgg16':
 11 |             return _get_faster_rcnn_vgg16_model(None, config)
 12 |         elif backbone == 'resnet50':
 13 |             return _get_faster_rcnn_resnet_model(50, config)
 14 |         elif backbone == 'resnet101':
 15 |             return _get_faster_rcnn_resnet_model(101, config)
 16 |         elif backbone == 'resnet152':
 17 |             return _get_faster_rcnn_resnet_model(152, config)
 18 |         else:
 19 |             raise ValueError('unknown backbone {}'.format(backbone))
 20 |     elif model_type == 'fpn':
 21 |         if backbone == 'resnet50':
 22 |             return _get_fpn_resnet_model(50, config)
 23 |         elif backbone == 'resnet101':
 24 |             return _get_fpn_resnet_model(101, config)
 25 |         elif backbone == 'resnet152':
 26 |             return _get_fpn_resnet_model(152, config)
 27 |         else:
 28 |             raise ValueError('unknown backbone {}'.format(backbone))
 29 |     else:
 30 |         raise ValueError('unknown model type {}'.format(model_type))
 31 | 
 32 | 
 33 | def _get_fpn_resnet_model(depth, config):
 34 |     return ResnetV1Fpn(
 35 |         depth=depth,
 36 |         roi_head_keep_dropout_rate=config['roi_head_keep_dropout_rate'],
 37 | 
 38 |         roi_feature_size=config['resnet_roi_feature_size'],
 39 |         num_classes=config['num_classes'],
 40 |         weight_decay=config['weight_decay'],
 41 | 
 42 |         level_name_list=config['level_name_list'],
 43 |         min_level=config['min_level'],
 44 |         max_level=config['max_level'],
 45 |         top_down_dims=config['top_down_dims'],
 46 | 
 47 |         anchor_stride_list=config['anchor_stride_list'],
 48 |         base_anchor_size_list=config['base_anchor_size_list'],
 49 |         ratios=config['ratios'],
 50 |         scales=config['scales'],
 51 | 
 52 |         rpn_proposal_means=config['rpn_proposal_means'],
 53 |         rpn_proposal_stds=config['rpn_proposal_stds'],
 54 | 
 55 |         rpn_proposal_num_pre_nms_train=config['rpn_proposal_train_pre_nms_sample_number'],
 56 |         rpn_proposal_num_post_nms_train=config['rpn_proposal_train_after_nms_sample_number'],
 57 |         rpn_proposal_num_pre_nms_test=config['rpn_proposal_test_pre_nms_sample_number'],
 58 |         rpn_proposal_num_post_nms_test=config['rpn_proposal_test_after_nms_sample_number'],
 59 |         rpn_proposal_nms_iou_threshold=config['rpn_proposal_nms_iou_threshold'],
 60 | 
 61 |         rpn_sigma=config['rpn_sigma'],
 62 |         rpn_training_pos_iou_threshold=config['rpn_pos_iou_threshold'],
 63 |         rpn_training_neg_iou_threshold=config['rpn_neg_iou_threshold'],
 64 |         rpn_training_total_num_samples=config['rpn_total_sample_number'],
 65 |         rpn_training_max_pos_samples=config['rpn_pos_sample_max_number'],
 66 | 
 67 |         roi_proposal_means=config['roi_proposal_means'],
 68 |         roi_proposal_stds=config['roi_proposal_stds'],
 69 | 
 70 |         roi_pool_size=config['roi_pooling_size'],
 71 |         roi_pooling_max_pooling_flag=config['roi_pooling_max_pooling_flag'],
 72 | 
 73 |         roi_sigma=config['roi_sigma'],
 74 |         roi_training_pos_iou_threshold=config['roi_pos_iou_threshold'],
 75 |         roi_training_neg_iou_threshold=config['roi_neg_iou_threshold'],
 76 |         roi_training_total_num_samples=config['roi_total_sample_number'],
 77 |         roi_training_max_pos_samples=config['roi_pos_sample_max_number'],
 78 | 
 79 |         prediction_max_objects_per_image=config['max_objects_per_image'],
 80 |         prediction_max_objects_per_class=config['max_objects_per_class_per_image'],
 81 |         prediction_nms_iou_threshold=config['prediction_nms_iou_threshold'],
 82 |         prediction_score_threshold=config['prediction_score_threshold'],
 83 |     )
 84 | 
 85 | 
 86 | def _get_faster_rcnn_resnet_model(depth, config):
 87 |     return ResNetFasterRcnn(
 88 |         depth=depth,
 89 |         roi_feature_size=config['resnet_roi_feature_size'],
 90 | 
 91 |         num_classes=config['num_classes'],
 92 |         weight_decay=config['weight_decay'],
 93 | 
 94 |         ratios=config['ratios'],
 95 |         scales=config['scales'],
 96 |         extractor_stride=config['extractor_stride'],
 97 | 
 98 |         rpn_proposal_means=config['rpn_proposal_means'],
 99 |         rpn_proposal_stds=config['rpn_proposal_stds'],
100 | 
101 |         rpn_proposal_num_pre_nms_train=config['rpn_proposal_train_pre_nms_sample_number'],
102 |         rpn_proposal_num_post_nms_train=config['rpn_proposal_train_after_nms_sample_number'],
103 |         rpn_proposal_num_pre_nms_test=config['rpn_proposal_test_pre_nms_sample_number'],
104 |         rpn_proposal_num_post_nms_test=config['rpn_proposal_test_after_nms_sample_number'],
105 |         rpn_proposal_nms_iou_threshold=config['rpn_proposal_nms_iou_threshold'],
106 | 
107 |         rpn_sigma=config['rpn_sigma'],
108 |         rpn_training_pos_iou_threshold=config['rpn_pos_iou_threshold'],
109 |         rpn_training_neg_iou_threshold=config['rpn_neg_iou_threshold'],
110 |         rpn_training_total_num_samples=config['rpn_total_sample_number'],
111 |         rpn_training_max_pos_samples=config['rpn_pos_sample_max_number'],
112 | 
113 |         roi_proposal_means=config['roi_proposal_means'],
114 |         roi_proposal_stds=config['roi_proposal_stds'],
115 | 
116 |         roi_pool_size=config['roi_pooling_size'],
117 |         roi_pooling_max_pooling_flag=config['resnet_roi_pooling_max_pooling_flag'],
118 | 
119 |         roi_sigma=config['roi_sigma'],
120 |         roi_training_pos_iou_threshold=config['roi_pos_iou_threshold'],
121 |         roi_training_neg_iou_threshold=config['roi_neg_iou_threshold'],
122 |         roi_training_total_num_samples=config['roi_total_sample_number'],
123 |         roi_training_max_pos_samples=config['roi_pos_sample_max_number'],
124 | 
125 |         prediction_max_objects_per_image=config['max_objects_per_image'],
126 |         prediction_max_objects_per_class=config['max_objects_per_class_per_image'],
127 |         prediction_nms_iou_threshold=config['prediction_nms_iou_threshold'],
128 |         prediction_score_threshold=config['prediction_score_threshold'],
129 |     )
130 | 
131 | 
132 | def _get_faster_rcnn_vgg16_model(slim_ckpt_file_path, config):
133 |     return Vgg16FasterRcnn(
134 |         slim_ckpt_file_path=slim_ckpt_file_path,
135 |         roi_head_keep_dropout_rate=config['roi_head_keep_dropout_rate'],
136 |         roi_feature_size=config['vgg16_roi_feature_size'],
137 | 
138 |         num_classes=config['num_classes'],
139 |         weight_decay=config['weight_decay'],
140 | 
141 |         ratios=config['ratios'],
142 |         scales=config['scales'],
143 |         extractor_stride=config['extractor_stride'],
144 | 
145 |         rpn_proposal_means=config['rpn_proposal_means'],
146 |         rpn_proposal_stds=config['rpn_proposal_stds'],
147 | 
148 |         rpn_proposal_num_pre_nms_train=config['rpn_proposal_train_pre_nms_sample_number'],
149 |         rpn_proposal_num_post_nms_train=config['rpn_proposal_train_after_nms_sample_number'],
150 |         rpn_proposal_num_pre_nms_test=config['rpn_proposal_test_pre_nms_sample_number'],
151 |         rpn_proposal_num_post_nms_test=config['rpn_proposal_test_after_nms_sample_number'],
152 |         rpn_proposal_nms_iou_threshold=config['rpn_proposal_nms_iou_threshold'],
153 | 
154 |         rpn_sigma=config['rpn_sigma'],
155 |         rpn_training_pos_iou_threshold=config['rpn_pos_iou_threshold'],
156 |         rpn_training_neg_iou_threshold=config['rpn_neg_iou_threshold'],
157 |         rpn_training_total_num_samples=config['rpn_total_sample_number'],
158 |         rpn_training_max_pos_samples=config['rpn_pos_sample_max_number'],
159 | 
160 |         roi_proposal_means=config['roi_proposal_means'],
161 |         roi_proposal_stds=config['roi_proposal_stds'],
162 | 
163 |         roi_pool_size=config['roi_pooling_size'],
164 |         roi_pooling_max_pooling_flag=config['vgg16_roi_pooling_max_pooling_flag'],
165 | 
166 |         roi_sigma=config['roi_sigma'],
167 |         roi_training_pos_iou_threshold=config['roi_pos_iou_threshold'],
168 |         roi_training_neg_iou_threshold=config['roi_neg_iou_threshold'],
169 |         roi_training_total_num_samples=config['roi_total_sample_number'],
170 |         roi_training_max_pos_samples=config['roi_pos_sample_max_number'],
171 | 
172 |         prediction_max_objects_per_image=config['max_objects_per_image'],
173 |         prediction_max_objects_per_class=config['max_objects_per_class_per_image'],
174 |         prediction_nms_iou_threshold=config['prediction_nms_iou_threshold'],
175 |         prediction_score_threshold=config['prediction_score_threshold'],
176 |     )
177 | 


--------------------------------------------------------------------------------
/object_detection/model/prediction.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | from object_detection.utils.bbox_transform import decode_bbox_with_mean_and_std
  4 | from object_detection.utils.bbox_tf import bboxes_clip_filter as bboxes_clip_filter_tf
  5 | 
  6 | 
  7 | __all__ = ['post_ops_prediction']
  8 | 
  9 | 
 10 | def predict_after_roi(roi_scores_softmax, roi_txtytwth, rois, image_shape,
 11 |                       target_means, target_stds,
 12 |                       max_num_per_class=5,
 13 |                       max_num_per_image=5,
 14 |                       nms_iou_threshold=0.3,
 15 |                       score_threshold=0.3,
 16 |                       extractor_stride=16,
 17 |                       ):
 18 |     """
 19 |     copy from https://github.com/Viredery/tf-eager-fasterrcnn/blob/master/detection/models/bbox_heads/bbox_head.py
 20 |     :param roi_scores_softmax:
 21 |     :param roi_txtytwth:
 22 |     :param rois:
 23 |     :param image_shape:
 24 |     :param target_means:
 25 |     :param target_stds:
 26 |     :param max_num_per_class:
 27 |     :param max_num_per_image:
 28 |     :param nms_iou_threshold:
 29 |     :param score_threshold:
 30 |     :param extractor_stride:
 31 |     :return:
 32 |     """
 33 | 
 34 |     # Class IDs per ROI
 35 |     class_ids = tf.argmax(roi_scores_softmax, axis=1, output_type=tf.int32)
 36 | 
 37 |     # Class probability of the top class of each ROI
 38 |     indices = tf.stack([tf.range(roi_scores_softmax.shape[0]), class_ids], axis=1)
 39 |     class_scores = tf.gather_nd(roi_scores_softmax, indices)
 40 |     # Class-specific bounding box deltas
 41 |     deltas_specific = tf.gather_nd(roi_txtytwth, indices)
 42 |     # Apply bounding box deltas
 43 |     # Shape: [num_rois, (y1, x1, y2, x2)] in normalized coordinates
 44 |     refined_rois = decode_bbox_with_mean_and_std(rois, deltas_specific,
 45 |                                                  target_means, target_stds)
 46 |     refined_rois, refined_rois_idx = bboxes_clip_filter_tf(refined_rois, 0, image_shape[0], image_shape[1],
 47 |                                                            min_edge=None)
 48 |     # TODO: remove min edge
 49 | 
 50 |     # Filter out background boxes
 51 |     keep = tf.where(class_ids > 0)[:, 0]
 52 | 
 53 |     # Filter out low confidence boxes
 54 |     score_keep = tf.where(class_scores >= score_threshold)[:, 0]
 55 |     keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
 56 |                                     tf.expand_dims(score_keep, 0))
 57 |     keep = tf.sparse_tensor_to_dense(keep)[0]
 58 | 
 59 |     # Apply per-class NMS
 60 |     # 1. Prepare variables
 61 |     pre_nms_class_ids = tf.gather(class_ids, keep)
 62 |     pre_nms_scores = tf.gather(class_scores, keep)
 63 |     pre_nms_rois = tf.gather(refined_rois, keep)
 64 |     unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
 65 | 
 66 |     def nms_keep_map(class_id):
 67 |         # Indices of ROIs of the given class
 68 |         ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
 69 |         # Apply NMS
 70 |         class_keep = tf.image.non_max_suppression(
 71 |             tf.gather(pre_nms_rois, ixs),
 72 |             tf.gather(pre_nms_scores, ixs),
 73 |             max_output_size=max_num_per_class,
 74 |             iou_threshold=nms_iou_threshold)
 75 |         # Map indices
 76 |         class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
 77 |         tf.logging.debug('nms keep map is {}'.format(class_keep))
 78 |         return class_keep
 79 | 
 80 |     # 2. Map over class IDs
 81 |     nms_keep = []
 82 |     for i in range(unique_pre_nms_class_ids.shape[0]):
 83 |         nms_keep.append(nms_keep_map(unique_pre_nms_class_ids[i]))
 84 | 
 85 |     if len(nms_keep) == 0:
 86 |         return None, None, None
 87 |     nms_keep = tf.concat(nms_keep, axis=0)
 88 | 
 89 |     # 3. Compute intersection between keep and nms_keep
 90 |     keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
 91 |                                     tf.expand_dims(nms_keep, 0))
 92 |     keep = tf.sparse_tensor_to_dense(keep)[0]
 93 |     # Keep top detections
 94 |     roi_count = max_num_per_image
 95 |     class_scores_keep = tf.gather(class_scores, keep)
 96 |     num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
 97 |     top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
 98 |     keep = tf.gather(keep, top_ids)
 99 | 
100 |     return tf.gather(refined_rois, keep), tf.gather(class_ids, keep), tf.gather(class_scores, keep)
101 | 
102 | 
103 | def post_ops_prediction(roi_scores_softmax, roi_txtytwth, rois, image_shape,
104 |                         target_means, target_stds,
105 |                         max_num_per_class=50,
106 |                         max_num_per_image=150,
107 |                         nms_iou_threshold=0.3,
108 |                         score_threshold=0.05,
109 |                         extractor_stride=16,
110 |                         num_classes=21,
111 |                         ):
112 |     """
113 | 
114 |     :param roi_scores_softmax:      [num_rois, num_classes]
115 |     :param roi_txtytwth:            [num_rois, num_classes, 4]
116 |     :param rois:                    [num_rois, 4]
117 |     :param image_shape:             [2,]
118 |     :param target_means:            [4,]
119 |     :param target_stds:             [4,]
120 |     :param max_num_per_class:
121 |     :param max_num_per_image:
122 |     :param nms_iou_threshold:
123 |     :param score_threshold:
124 |     :param extractor_stride:
125 |     :param num_classes:
126 |     :return:
127 |     """
128 |     if target_stds is None:
129 |         target_stds = [1, 1, 1, 1]
130 |     if target_means is None:
131 |         target_means = [0, 0, 0, 0]
132 |     res_scores = []
133 |     res_bboxes = []
134 |     res_cls = []
135 |     for i in range(1, num_classes):
136 |         inds = tf.where(roi_scores_softmax[:, i] > score_threshold)[:, 0]
137 |         cls_score = tf.gather(roi_scores_softmax[:, i], inds)
138 |         final_bboxes = decode_bbox_with_mean_and_std(tf.gather(rois, inds),
139 |                                                      tf.gather(roi_txtytwth[:, i, :], inds),
140 |                                                      target_means, target_stds)
141 |         final_bboxes, clip_selected_idx = bboxes_clip_filter_tf(final_bboxes, 0,
142 |                                                                 image_shape[0], image_shape[1],
143 |                                                                 extractor_stride)
144 |         cls_score = tf.gather(cls_score, clip_selected_idx)
145 | 
146 |         keep = tf.image.non_max_suppression(final_bboxes, cls_score, max_num_per_class, iou_threshold=nms_iou_threshold)
147 |         if tf.size(keep).numpy() == 0:
148 |             continue
149 |         res_scores.append(tf.gather(cls_score, keep))
150 |         res_bboxes.append(tf.gather(final_bboxes, keep))
151 |         res_cls.append(tf.ones_like(keep, dtype=tf.int32) * i)
152 | 
153 |     if len(res_scores) == 0:
154 |         return None, None, None
155 | 
156 |     scores_after_nms = tf.concat(res_scores, axis=0)
157 |     bboxes_after_nms = tf.concat(res_bboxes, axis=0)
158 |     cls_after_nms = tf.concat(res_cls, axis=0)
159 | 
160 |     _, final_idx = tf.nn.top_k(scores_after_nms, k=tf.minimum(max_num_per_image, tf.size(scores_after_nms)),
161 |                                sorted=False)
162 |     return tf.gather(bboxes_after_nms, final_idx), tf.gather(cls_after_nms, final_idx), tf.gather(scores_after_nms,
163 |                                                                                                   final_idx)
164 | 


--------------------------------------------------------------------------------
/object_detection/model/proposal_target.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | 
  4 | from object_detection.utils.bbox_tf import pairwise_iou
  5 | from object_detection.utils.bbox_transform import encode_bbox_with_mean_and_std
  6 | 
  7 | 
  8 | class ProposalTarget(tf.keras.Model):
  9 |     def __init__(self,
 10 |                  num_classes=21,
 11 |                  pos_iou_threshold=0.5,
 12 |                  neg_iou_threshold=0.5,
 13 |                  total_num_samples=128,
 14 |                  max_pos_samples=32,
 15 |                  target_means=None,
 16 |                  target_stds=None):
 17 |         super().__init__()
 18 | 
 19 |         self._num_classes = num_classes
 20 |         self._pos_iou_threshold = pos_iou_threshold
 21 |         self._neg_iou_threshold = neg_iou_threshold
 22 |         self._total_num_samples = total_num_samples
 23 |         self._max_pos_samples = max_pos_samples
 24 | 
 25 |         if target_stds is None:
 26 |             target_stds = [1, 1, 1, 1]
 27 |         if target_means is None:
 28 |             target_means = [0, 0, 0, 0]
 29 |         self._target_means = target_means
 30 |         self._target_stds = target_stds
 31 | 
 32 |     def call(self, inputs, training=None, mask=None):
 33 |         """
 34 |         不需要训练
 35 |         生成训练roi用的数据
 36 |         总体过程：
 37 |         1. 计算 rois 与 gt_bboxes（即输入数据中的bbox）的iou
 38 |         2. 设置与 gt_bboxes 的 max_iou > pos_iou_threshold 的 roi 为正例，设置 max_iou < neg_iou_threshold 的 roi 为反例
 39 |         3. 对正例、反例有数量限制：
 40 |             正例数量不大于 max_pos_samples
 41 |             正例反例总数不超过 max_pos_samples
 42 |             反例数量如果过少，则通过 numpy.random.choice 随机填充
 43 |         4. 最终输出5个结果：
 44 |                 1）rois [128, 4]
 45 |                 2）每个 roi 对应的 label [128,]，如果我为0则表示为反例，>0则表示为正例
 46 |                 3）每个 roi 对应的 txtytwth [128, num_classes * 4]
 47 |                 4）计算 smooth l1 loss时的 bbox_inside_weights [128, num_classes * 4]
 48 |                 5）计算 smooth l1 loss时的 bbox_outside_weights [128, num_classes * 4]
 49 |         :param inputs:
 50 |         :param training:
 51 |         :param mask:
 52 |         :return:
 53 |         """
 54 |         rois, gt_bboxes, gt_labels = inputs
 55 | 
 56 |         iou = pairwise_iou(rois, gt_bboxes)  # [rois_size, gt_bboxes_size]
 57 |         max_overlaps = tf.reduce_max(iou, axis=1)  # [rois_size, ]
 58 |         gt_assignment = tf.argmax(iou, axis=1)  # [rois_size, ]
 59 |         labels = tf.gather(gt_labels, gt_assignment)  # [rois_size, ]
 60 | 
 61 |         # 根据条件获取 前景 背景
 62 |         fg_inds = tf.where(max_overlaps >= self._pos_iou_threshold)[:, 0]
 63 |         bg_inds = tf.where(tf.logical_and(max_overlaps < self._pos_iou_threshold,
 64 |                                           max_overlaps >= self._neg_iou_threshold))[:, 0]
 65 | 
 66 |         # 筛选 前景/背景
 67 |         if tf.size(fg_inds) > self._max_pos_samples:
 68 |             fg_inds = tf.random_shuffle(fg_inds)[:self._max_pos_samples]
 69 |         if tf.size(bg_inds) > self._total_num_samples - tf.size(fg_inds):
 70 |             # 如果bg sample的数量多于要求值，则随机筛选
 71 |             bg_inds = tf.random_shuffle(bg_inds)[:(self._total_num_samples - tf.size(fg_inds))]
 72 |         elif tf.size(bg_inds).numpy() == (self._total_num_samples - tf.size(fg_inds)).numpy():
 73 |             pass
 74 |         else:
 75 |             # 如果bg sample的数量少于要求数值，则重复获取
 76 |             target_size = (self._total_num_samples - tf.size(fg_inds)).numpy()
 77 |             bg_inds = np.random.choice(bg_inds.numpy(), size=int(target_size), replace=True)
 78 | 
 79 |         tf.logging.debug('proposal target generate %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds)))
 80 | 
 81 |         keep_inds = tf.concat([fg_inds, bg_inds], axis=0)
 82 |         final_rois = tf.gather(rois, keep_inds)  # rois[keep_inds]
 83 |         final_labels = tf.gather(labels, keep_inds)  # labels[keep_inds]
 84 |         # labels[fg_inds_size:] = 0
 85 |         final_labels = tf.scatter_update(tf.Variable(final_labels),
 86 |                                          tf.range(tf.size(fg_inds), tf.size(keep_inds), dtype=tf.int32), 0)
 87 | 
 88 |         # inside weights 只有正例才会设置，其他均为0
 89 |         bbox_inside_weights = tf.zeros((tf.size(keep_inds), self._num_classes, 4), dtype=tf.float32)
 90 |         if tf.size(fg_inds) > 0:
 91 |             # memory leak bug for tf.scatter_nd_update
 92 |             # https://github.com/tensorflow/tensorflow/issues/27288
 93 |             # cur_index = tf.stack([tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds)], axis=1)
 94 |             # bbox_inside_weights = tf.scatter_nd_update(tf.Variable(bbox_inside_weights),
 95 |             #                                            cur_index,
 96 |             #                                            tf.ones([tf.size(fg_inds), 4]))
 97 |             bbox_inside_weights = bbox_inside_weights.numpy()
 98 |             for idx, fg_ind in enumerate(fg_inds.numpy()):
 99 |                 bbox_inside_weights[idx, labels[idx]] = 1
100 |         bbox_inside_weights = tf.reshape(bbox_inside_weights, [-1, self._num_classes * 4])
101 | 
102 |         # final bbox target 只有正例才会设置，其他均为0
103 |         final_bbox_targets = tf.zeros((tf.size(keep_inds), self._num_classes, 4), dtype=tf.float32)
104 |         if tf.size(fg_inds) > 0:
105 |             bbox_targets = encode_bbox_with_mean_and_std(tf.gather(final_rois, tf.range(tf.size(fg_inds))),
106 |                                                          tf.gather(gt_bboxes, tf.gather(gt_assignment, fg_inds)),
107 |                                                          target_stds=self._target_stds, target_means=self._target_means,
108 |                                                          )
109 |             # memory leak bug for tf.scatter_nd_update
110 |             # https://github.com/tensorflow/tensorflow/issues/27288
111 |             # final_bbox_targets = tf.scatter_nd_update(tf.Variable(final_bbox_targets),
112 |             #                                           tf.stack([tf.range(tf.size(fg_inds)),
113 |             #                                                     tf.gather(labels, fg_inds)], axis=1), bbox_targets)
114 |             final_bbox_targets = final_bbox_targets.numpy()
115 |             bbox_targets = bbox_targets.numpy()
116 |             for idx, fg_ind in enumerate(fg_inds.numpy()):
117 |                 final_bbox_targets[idx, labels[idx]] = bbox_targets[idx]
118 | 
119 |         final_bbox_targets = tf.reshape(final_bbox_targets, [-1, self._num_classes * 4])
120 | 
121 |         # 这个好像没啥用
122 |         bbox_outside_weights = tf.ones_like(bbox_inside_weights, dtype=tf.float32)
123 |         return tf.stop_gradient(final_rois), tf.stop_gradient(final_labels), tf.stop_gradient(final_bbox_targets), \
124 |                tf.stop_gradient(bbox_inside_weights), tf.stop_gradient(bbox_outside_weights)
125 | 


--------------------------------------------------------------------------------
/object_detection/model/region_proposal.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from object_detection.utils.bbox_transform import decode_bbox_with_mean_and_std
 3 | from object_detection.utils.bbox_tf import bboxes_clip_filter
 4 | from tensorflow.python.platform import tf_logging
 5 | 
 6 | layers = tf.keras.layers
 7 | 
 8 | __all__ = ['RegionProposal']
 9 | 
10 | 
11 | class RegionProposal(tf.keras.Model):
12 |     def __init__(self,
13 |                  num_anchors=9,
14 |                  num_pre_nms_train=12000,
15 |                  num_post_nms_train=2000,
16 |                  num_pre_nms_test=6000,
17 |                  num_post_nms_test=300,
18 |                  nms_iou_threshold=0.7,
19 |                  target_means=None,
20 |                  target_stds=None):
21 |         super().__init__()
22 | 
23 |         self._num_anchors = num_anchors
24 |         self._num_pre_nms_train = num_pre_nms_train
25 |         self._num_post_nms_train = num_post_nms_train
26 |         self._num_pre_nms_test = num_pre_nms_test
27 |         self._num_post_nms_test = num_post_nms_test
28 |         self._nms_iou_threshold = nms_iou_threshold
29 | 
30 |         if target_stds is None:
31 |             target_stds = [1, 1, 1, 1]
32 |         if target_means is None:
33 |             target_means = [0, 0, 0, 0]
34 |         self._target_means = target_means
35 |         self._target_stds = target_stds
36 | 
37 |     def call(self, inputs, training=None, mask=None):
38 |         """
39 |         生成 rpn 的结果，即一组 bboxes，用于后续 roi pooling
40 |         总体过程：
41 |         1. 使用anchors和rpn_pred修正，获取所有预测结果。
42 |         2. 对选中修正后的anchors进行处理（剪裁）。
43 |         3. 根据rpn_score获取num_pre_nms个anchors。
44 |         4. 进行nms。
45 |         5. 根据rpn_score排序，获取num_post_nms个anchors作为proposal结果。
46 |         :param inputs:
47 |         :param training:
48 |         :param mask:
49 |         :return:
50 |         """
51 |         # bboxes_txtytwth shape: [num_anchors*feature_width*feature_height, 4]
52 |         # anchors shape: [num_anchors*feature_width*feature_height, 4]
53 |         # scores shape: [feature_width*feature_height*num_anchors,]
54 |         # image_shape shape: [2, ]
55 |         bboxes_txtytwth, anchors, scores, image_shape = inputs
56 | 
57 |         # 1. 使用anchors使用rpn_pred修正，获取所有预测结果。
58 |         # [num_anchors*feature_width*feature_height, 4]
59 |         decoded_bboxes = decode_bbox_with_mean_and_std(anchors, bboxes_txtytwth,
60 |                                                        self._target_means, self._target_stds)
61 | 
62 |         # 2. 对选中修正后的anchors进行处理
63 |         decoded_bboxes, _ = bboxes_clip_filter(decoded_bboxes, 0, image_shape[0], image_shape[1])
64 | 
65 |         # # 3. 根据rpn_score获取num_pre_nms个anchors。
66 |         # num_pre_nms = self._num_pre_nms_train if training else self._num_pre_nms_test
67 |         # cur_top_k = tf.minimum(num_pre_nms, tf.size(scores))
68 |         # scores, selected_idx = tf.nn.top_k(scores, k=cur_top_k, sorted=False)
69 |         # decoded_bboxes = tf.gather(decoded_bboxes, selected_idx)
70 | 
71 |         # 4. 进行nms。
72 |         # 5. 根据rpn_score排序，获取num_post_nms个anchors作为proposal结果。
73 |         num_post_nms = self._num_post_nms_train if training else self._num_post_nms_test
74 |         selected_idx = tf.image.non_max_suppression(tf.to_float(decoded_bboxes), scores,
75 |                                                     max_output_size=num_post_nms,
76 |                                                     iou_threshold=self._nms_iou_threshold)
77 | 
78 |         tf_logging.debug('rpn proposal net generate %d proposals' % tf.size(selected_idx))
79 | 
80 |         # 不参与训练，所以需要设置 tf.stop_gradient
81 |         return tf.stop_gradient(tf.gather(decoded_bboxes, selected_idx))
82 | 


--------------------------------------------------------------------------------
/object_detection/model/roi_pooling.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | layers = tf.keras.layers
  4 | 
  5 | __all__ = ['RoiPoolingCropAndResize', 'RoiPoolingRoiAlign', 'RoiPoolingCropAndResize2']
  6 | 
  7 | 
  8 | class RoiPoolingCropAndResize2(tf.keras.Model):
  9 |     def __init__(self, pool_size):
 10 |         super().__init__()
 11 |         self._pool_size = pool_size
 12 |         self._concat_layer = layers.Concatenate(axis=0)
 13 |         self._max_pool = layers.MaxPooling2D(padding='same')
 14 | 
 15 |     def call(self, inputs, training=None, mask=None):
 16 |         """
 17 |         输入 backbone 的结果和 rpn proposals 的结果(即 RegionProosal 的输出)
 18 |         输出 roi pooloing 的结果，即在特征图上，对每个rpn proposal获取一个固定尺寸的特征图
 19 |         :param inputs:
 20 |         :param training:
 21 |         :param mask:
 22 |         :return:
 23 |         """
 24 |         # [1, height, width, channels]  [num_rois, 4]
 25 |         shared_layers, rois, image_shape = inputs
 26 |         h, w = tf.to_float(image_shape[0]), tf.to_float(image_shape[1])
 27 | 
 28 |         batch_ids = tf.zeros([tf.shape(rois)[0]], dtype=tf.int32)
 29 |         roi_channels = tf.split(rois, 4, axis=1)
 30 |         bboxes = tf.concat([
 31 |             roi_channels[1] / tf.to_float(h),
 32 |             roi_channels[0] / tf.to_float(w),
 33 |             roi_channels[3] / tf.to_float(h),
 34 |             roi_channels[2] / tf.to_float(w),
 35 |         ], axis=1)
 36 |         pre_pool_size = self._pool_size * 2
 37 |         crops = tf.image.crop_and_resize(shared_layers,
 38 |                                          tf.stop_gradient(bboxes),
 39 |                                          box_ind=tf.to_int32(batch_ids),
 40 |                                          crop_size=[pre_pool_size, pre_pool_size],
 41 |                                          name="crops")
 42 |         return self._max_pool(crops)
 43 | 
 44 | 
 45 | class RoiPoolingCropAndResize(tf.keras.Model):
 46 |     def __init__(self, pool_size, max_pooling_flag=True):
 47 |         super().__init__()
 48 |         self._pool_size = pool_size
 49 |         self._max_pooling_flag = max_pooling_flag
 50 |         self._concat_layer = layers.Concatenate(axis=0)
 51 |         self._max_pool = layers.MaxPooling2D(padding='same')
 52 | 
 53 |     def call(self, inputs, training=None, mask=None):
 54 |         """
 55 |         输入 backbone 的结果和 rpn proposals 的结果(即 RegionProosal 的输出)
 56 |         输出 roi pooloing 的结果，即在特征图上，对每个rpn proposal获取一个固定尺寸的特征图
 57 |         :param inputs:
 58 |         :param training:
 59 |         :param mask:
 60 |         :return:
 61 |         """
 62 |         # [1, height, width, channels]  [num_rois, 4]
 63 |         shared_layers, rois, extractor_stride = inputs
 64 |         rois = rois / extractor_stride
 65 | 
 66 |         batch_ids = tf.zeros([tf.shape(rois)[0]], dtype=tf.int32)
 67 |         h, w = shared_layers.get_shape().as_list()[1:3]
 68 |         roi_channels = tf.split(rois, 4, axis=1)
 69 |         bboxes = tf.concat([
 70 |             roi_channels[1] / tf.to_float(h - 1),
 71 |             roi_channels[0] / tf.to_float(w - 1),
 72 |             roi_channels[3] / tf.to_float(h - 1),
 73 |             roi_channels[2] / tf.to_float(w - 1),
 74 |         ], axis=1)
 75 |         if self._max_pooling_flag:
 76 |             pre_pool_size = self._pool_size * 2
 77 | 
 78 |             # 重大bug…… shared_layers 还是需要参与反向传播的……，bboxes不参加
 79 |             crops = tf.image.crop_and_resize(shared_layers,
 80 |                                              tf.stop_gradient(bboxes),
 81 |                                              box_ind=tf.to_int32(batch_ids),
 82 |                                              crop_size=[pre_pool_size, pre_pool_size],
 83 |                                              name="crops")
 84 |             return self._max_pool(crops)
 85 |         else:
 86 |             return tf.image.crop_and_resize(shared_layers,
 87 |                                             tf.stop_gradient(bboxes),
 88 |                                             box_ind=tf.to_int32(batch_ids),
 89 |                                             crop_size=[self._pool_size, self._pool_size],
 90 |                                             name="crops")
 91 | 
 92 | 
 93 | def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):
 94 |     assert isinstance(crop_size, int), crop_size
 95 |     boxes = tf.stop_gradient(boxes)
 96 | 
 97 |     # TF's crop_and_resize produces zeros on border
 98 |     if pad_border:
 99 |         # this can be quite slow
100 |         image = tf.pad(image, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='SYMMETRIC')
101 |         boxes = boxes + 1
102 | 
103 |     def transform_fpcoor_for_tf(boxes, image_shape, crop_shape):
104 |         """
105 |         The way tf.image.crop_and_resize works (with normalized box):
106 |         Initial point (the value of output[0]): x0_box * (W_img - 1)
107 |         Spacing: w_box * (W_img - 1) / (W_crop - 1)
108 |         Use the above grid to bilinear sample.
109 |         However, what we want is (with fpcoor box):
110 |         Spacing: w_box / W_crop
111 |         Initial point: x0_box + spacing/2 - 0.5
112 |         (-0.5 because bilinear sample (in my definition) assumes floating point coordinate
113 |          (0.0, 0.0) is the same as pixel value (0, 0))
114 |         This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize
115 |         Returns:
116 |             y1x1y2x2
117 |         """
118 |         x0, y0, x1, y1 = tf.split(boxes, 4, axis=1)
119 | 
120 |         spacing_w = (x1 - x0) / tf.cast(crop_shape[1], tf.float32)
121 |         spacing_h = (y1 - y0) / tf.cast(crop_shape[0], tf.float32)
122 | 
123 |         imshape = [tf.cast(image_shape[0] - 1, tf.float32), tf.cast(image_shape[1] - 1, tf.float32)]
124 |         nx0 = (x0 + spacing_w / 2 - 0.5) / imshape[1]
125 |         ny0 = (y0 + spacing_h / 2 - 0.5) / imshape[0]
126 | 
127 |         nw = spacing_w * tf.cast(crop_shape[1] - 1, tf.float32) / imshape[1]
128 |         nh = spacing_h * tf.cast(crop_shape[0] - 1, tf.float32) / imshape[0]
129 | 
130 |         return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)
131 | 
132 |     image_shape = tf.shape(image)[1:3]
133 |     boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size])
134 |     ret = tf.image.crop_and_resize(
135 |         image, boxes, tf.cast(box_ind, tf.int32),
136 |         crop_size=[crop_size, crop_size])
137 |     return ret
138 | 
139 | 
140 | def roi_align(featuremap, boxes, resolution):
141 |     """
142 |     Args:
143 |         featuremap: 1xHxWxC
144 |         boxes: [0, 1]
145 |         resolution: output spatial resolution
146 |     Returns:
147 |         NxCx res x res
148 |     """
149 |     # sample 4 locations per roi bin
150 |     ret = crop_and_resize(
151 |         featuremap, boxes,
152 |         tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32),
153 |         resolution * 2)
154 |     ret = tf.nn.avg_pool(ret, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
155 |     return ret
156 | 
157 | 
158 | class RoiPoolingRoiAlign(tf.keras.Model):
159 |     def __init__(self, pool_size):
160 |         super().__init__()
161 |         self._pool_size = pool_size
162 |         self._concat_layer = layers.Concatenate(axis=0)
163 | 
164 |     def call(self, inputs, training=None, mask=None):
165 |         """
166 |         输入 backbone 的结果和 rpn proposals 的结果(即 RegionProosal 的输出)
167 |         输出 roi pooloing 的结果，即在特征图上，对每个rpn proposal获取一个固定尺寸的特征图
168 |         :param inputs:
169 |         :param training:
170 |         :param mask:
171 |         :return:
172 |         """
173 |         # [1, height, width, channels]  [num_rois, 4]
174 |         shared_layers, rois, extractor_stride = inputs
175 |         rois = rois / extractor_stride
176 |         net = roi_align(shared_layers, tf.stop_gradient(rois), self._pool_size)
177 |         return net
178 | 


--------------------------------------------------------------------------------
/object_detection/protos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/protos/__init__.py


--------------------------------------------------------------------------------
/object_detection/protos/string_int_label_map.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | package object_detection.protos;
 4 | 
 5 | message StringIntLabelMapItem {
 6 |   // String name. The most common practice is to set this to a MID or synsets
 7 |   // id.
 8 |   optional string name = 1;
 9 | 
10 |   // Integer id that maps to the string name above. Label ids should start from
11 |   // 1.
12 |   optional int32 id = 2;
13 | 
14 |   // Human readable string label.
15 |   optional string display_name = 3;
16 | };
17 | 
18 | message StringIntLabelMap {
19 |   repeated StringIntLabelMapItem item = 1;
20 | };


--------------------------------------------------------------------------------
/object_detection/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irvingzhang0512/tf_eager_object_detection/e977217cb9d5fc2975292d55b1e5cae484f9e0dd/object_detection/utils/__init__.py


--------------------------------------------------------------------------------
/object_detection/utils/anchor_generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from six.moves import range
  4 | 
  5 | __all__ = ['generate_anchor_base', 'generate_by_anchor_base_np', 'generate_by_anchor_base_tf', 'make_anchors']
  6 | 
  7 | """
  8 | 参考了多处代码，包括：
  9 | Numpy实现主要参考了：
 10 | https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py
 11 | 
 12 | TF实现主要参考了（其实主要还是参考了Numpy代码，只是改写成TF）：
 13 | https://github.com/Viredery/tf-eager-fasterrcnn/blob/master/detection/core/anchor/anchor_generator.py
 14 | 
 15 | 
 16 | 使用方式：
 17 | 要么使用 `generate_anchor_base` 和 `generate_by_anchor_base_np`/`generate_by_anchor_base_tf`
 18 | 要么使用 `generate_anchors_tf` 或 `generate_anchors_np`
 19 | 具体实例参考本文件主函数中内容
 20 | """
 21 | 
 22 | 
 23 | def generate_by_anchor_base_np(anchor_base, feat_stride, height, width):
 24 |     # Enumerate all shifted anchors:
 25 |     #
 26 |     # add A anchors (1, A, 4) to
 27 |     # cell K shifts (K, 1, 4) to get
 28 |     # shift anchors (K, A, 4)
 29 |     # reshape to (K*A, 4) shifted anchors
 30 |     # return (K*A, 4)
 31 | 
 32 |     import numpy as xp
 33 |     shift_y = xp.arange(0, height, feat_stride)
 34 |     shift_x = xp.arange(0, width, feat_stride)
 35 |     shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
 36 |     shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
 37 |                       shift_y.ravel(), shift_x.ravel()), axis=1)
 38 | 
 39 |     A = anchor_base.shape[0]
 40 |     K = shift.shape[0]
 41 |     anchor = anchor_base.reshape((1, A, 4)) + shift.reshape((1, K, 4)).transpose((1, 0, 2))
 42 |     anchor = anchor.reshape((K * A, 4)).astype(np.float32)
 43 |     return anchor
 44 | 
 45 | 
 46 | def generate_by_anchor_base_tf(anchor_base, feat_stride, height, width):
 47 |     shift_x = tf.range(width) * feat_stride  # width
 48 |     shift_y = tf.range(height) * feat_stride  # height
 49 |     shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
 50 |     sx = tf.reshape(shift_x, shape=(-1,))
 51 |     sy = tf.reshape(shift_y, shape=(-1,))
 52 |     shifts = tf.transpose(tf.stack([sx, sy, sx, sy]))
 53 | 
 54 |     K = tf.multiply(width, height)
 55 |     A = anchor_base.shape[0]
 56 |     shifts = tf.transpose(tf.reshape(shifts, shape=[1, K, 4]), perm=(1, 0, 2))
 57 |     anchor_constant = tf.to_float(tf.reshape(anchor_base, (1, A, 4)))
 58 |     anchors_tf = tf.reshape(tf.add(anchor_constant, tf.to_float(shifts)), shape=(-1, 4))
 59 | 
 60 |     return tf.cast(anchors_tf, dtype=tf.float32)
 61 | 
 62 | 
 63 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2],
 64 |                          scales=2 ** np.arange(3, 6)):
 65 |     """
 66 |     有两种生成 anchor base 的方法，这种好像是原论文中使用的
 67 |     anchor base 决定了最终 anchors 的长宽，后续 generate_by_anchor_base 函数的作用是确定anchor的中心点
 68 |     输入的三个参数都会影响到最终的长宽：
 69 |     ratios 确定了长宽的比例
 70 |     base_size 和 scales 共同决定了 anchor 的具体尺寸，即 base_size * scales 就是最终 anchors 的尺寸
 71 |       Generate anchor (reference) windows by enumerating aspect ratios X
 72 |       scales wrt a reference (0, 0, 15, 15) window.
 73 |     """
 74 | 
 75 |     ratios = np.array(ratios)
 76 |     scales = np.array(scales)
 77 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 78 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 79 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 80 |                          for i in range(ratio_anchors.shape[0])])
 81 |     return anchors
 82 | 
 83 | 
 84 | def _whctrs(anchor):
 85 |     """
 86 |   Return width, height, x center, and y center for an anchor (window).
 87 |   """
 88 | 
 89 |     w = anchor[2] - anchor[0] + 1
 90 |     h = anchor[3] - anchor[1] + 1
 91 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 92 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 93 |     return w, h, x_ctr, y_ctr
 94 | 
 95 | 
 96 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 97 |     """
 98 |   Given a vector of widths (ws) and heights (hs) around a center
 99 |   (x_ctr, y_ctr), output a set of anchors (windows).
100 |   """
101 | 
102 |     ws = ws[:, np.newaxis]
103 |     hs = hs[:, np.newaxis]
104 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
105 |                          y_ctr - 0.5 * (hs - 1),
106 |                          x_ctr + 0.5 * (ws - 1),
107 |                          y_ctr + 0.5 * (hs - 1)))
108 |     return anchors
109 | 
110 | 
111 | def _ratio_enum(anchor, ratios):
112 |     """
113 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
114 |   """
115 | 
116 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
117 |     size = w * h
118 |     size_ratios = size / ratios
119 |     ws = np.round(np.sqrt(size_ratios))
120 |     hs = np.round(ws * ratios)
121 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
122 |     return anchors
123 | 
124 | 
125 | def _scale_enum(anchor, scales):
126 |     """
127 |   Enumerate a set of anchors for each scale wrt an anchor.
128 |   """
129 | 
130 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
131 |     ws = w * scales
132 |     hs = h * scales
133 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
134 |     return anchors
135 | 
136 | 
137 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios,
138 |                  featuremap_height, featuremap_width,
139 |                  stride, name='make_anchors'):
140 |     with tf.variable_scope(name):
141 |         base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32)  # [x_center, y_center, w, h]
142 | 
143 |         ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales),
144 |                              anchor_ratios)  # per locations ws and hs
145 | 
146 |         x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride
147 |         y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride
148 | 
149 |         x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
150 | 
151 |         ws, x_centers = tf.meshgrid(ws, x_centers)
152 |         hs, y_centers = tf.meshgrid(hs, y_centers)
153 | 
154 |         anchor_centers = tf.stack([x_centers, y_centers], 2)
155 |         anchor_centers = tf.reshape(anchor_centers, [-1, 2])
156 | 
157 |         box_sizes = tf.stack([ws, hs], axis=2)
158 |         box_sizes = tf.reshape(box_sizes, [-1, 2])
159 |         # anchors = tf.concat([anchor_centers, box_sizes], axis=1)
160 |         anchors = tf.concat([anchor_centers - 0.5 * box_sizes,
161 |                              anchor_centers + 0.5 * box_sizes], axis=1)
162 |         return anchors
163 | 
164 | 
165 | def enum_scales(base_anchor, anchor_scales):
166 |     anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1))
167 |     return anchor_scales
168 | 
169 | 
170 | def enum_ratios(anchors, anchor_ratios):
171 |     ws = anchors[:, 2]  # for base anchor: w == h
172 |     hs = anchors[:, 3]
173 |     sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios))
174 | 
175 |     ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1])
176 |     hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1])
177 | 
178 |     return hs, ws
179 | 


--------------------------------------------------------------------------------
/object_detection/utils/bbox_np.py:
--------------------------------------------------------------------------------
  1 | # copy from https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/utils/np_box_ops.py
  2 | import numpy as np
  3 | 
  4 | 
  5 | __all__ = ['pairwise_iou', 'ioa', 'bboxes_clip_filter', 'bboxes_range_filter']
  6 | 
  7 | 
  8 | def area(boxes):
  9 |     """Computes area of boxes.
 10 |     Args:
 11 |       boxes: Numpy array with shape [N, 4] holding N boxes
 12 |     Returns:
 13 |       a numpy array with shape [N*1] representing box areas
 14 |     """
 15 |     return (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
 16 | 
 17 | 
 18 | def intersection(boxes1, boxes2):
 19 |     """Compute pairwise intersection areas between boxes.
 20 |     Args:
 21 |       boxes1: a numpy array with shape [N, 4] holding N boxes
 22 |       boxes2: a numpy array with shape [M, 4] holding M boxes
 23 |     Returns:
 24 |       a numpy array with shape [N*M] representing pairwise intersection area
 25 |     """
 26 |     [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
 27 |     [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
 28 | 
 29 |     all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
 30 |     all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
 31 |     intersect_heights = np.maximum(
 32 |         np.zeros(all_pairs_max_ymin.shape, dtype='f4'),
 33 |         all_pairs_min_ymax - all_pairs_max_ymin + 1)
 34 |     all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
 35 |     all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
 36 |     intersect_widths = np.maximum(
 37 |         np.zeros(all_pairs_max_xmin.shape, dtype='f4'),
 38 |         all_pairs_min_xmax - all_pairs_max_xmin + 1)
 39 |     return intersect_heights * intersect_widths
 40 | 
 41 | 
 42 | def pairwise_iou(boxes1, boxes2):
 43 |     """Computes pairwise intersection-over-union between box collections.
 44 |     Args:
 45 |       boxes1: a numpy array with shape [N, 4] holding N boxes.
 46 |       boxes2: a numpy array with shape [M, 4] holding M boxes.
 47 |     Returns:
 48 |       a numpy array with shape [N, M] representing pairwise iou scores.
 49 |     """
 50 |     intersect = intersection(boxes1, boxes2)
 51 |     area1 = area(boxes1)
 52 |     area2 = area(boxes2)
 53 |     union = np.expand_dims(area1, axis=1) + np.expand_dims(
 54 |         area2, axis=0) - intersect
 55 |     return intersect / union
 56 | 
 57 | 
 58 | def ioa(boxes1, boxes2):
 59 |     """Computes pairwise intersection-over-area between box collections.
 60 |     Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
 61 |     their intersection area over box2's area. Note that ioa is not symmetric,
 62 |     that is, IOA(box1, box2) != IOA(box2, box1).
 63 |     Args:
 64 |       boxes1: a numpy array with shape [N, 4] holding N boxes.
 65 |       boxes2: a numpy array with shape [M, 4] holding N boxes.
 66 |     Returns:
 67 |       a numpy array with shape [N, M] representing pairwise ioa scores.
 68 |     """
 69 |     intersect = intersection(boxes1, boxes2)
 70 |     inv_areas = np.expand_dims(1.0 / area(boxes2), axis=0)
 71 |     return intersect * inv_areas
 72 | 
 73 | 
 74 | def bboxes_clip_filter(rpn_proposals, min_value, max_height, max_width, min_edge=None):
 75 |     """
 76 |     numpy 操作
 77 |     根据边界、最小边长过滤 proposals
 78 |     :param rpn_proposals:           bboxes
 79 |     :param min_value:
 80 |     :param max_height:
 81 |     :param max_width:
 82 |     :param min_edge:
 83 |     :return:
 84 |     """
 85 |     rpn_proposals[rpn_proposals < min_value] = min_value
 86 |     rpn_proposals[:, ::2][rpn_proposals[:, ::2] > max_height - 1.0] = max_height - 1.0
 87 |     rpn_proposals[:, 1::2][rpn_proposals[:, 1::2] > max_width - 1.0] = max_width - 1.0
 88 | 
 89 |     if min_edge is None:
 90 |         return rpn_proposals, np.arange(len(rpn_proposals))
 91 | 
 92 |     new_rpn_proposals = []
 93 |     rpn_proposals_idx = []
 94 |     for idx, (ymin, xmin, ymax, xmax) in enumerate(rpn_proposals):
 95 |         if (ymax - ymin + 1.0) >= min_edge and (xmax - xmin + 1.0) >= min_edge:
 96 |             new_rpn_proposals.append([ymin, xmin, ymax, xmax])
 97 |             rpn_proposals_idx.append(idx)
 98 |     return np.array(new_rpn_proposals), np.array(rpn_proposals_idx)
 99 | 
100 | 
101 | def bboxes_range_filter(anchors, max_height, max_width):
102 |     """
103 |     过滤 anchors，超出图像范围的 anchors 都不要
104 |     :param anchors:
105 |     :param max_height:
106 |     :param max_width:
107 |     :return:
108 |     """
109 |     index_inside = np.where(
110 |         (anchors[:, 0] >= 0) &
111 |         (anchors[:, 1] >= 0) &
112 |         (anchors[:, 2] <= max_height - 1) &
113 |         (anchors[:, 3] <= max_width - 1)
114 |     )[0]
115 |     return index_inside
116 | 


--------------------------------------------------------------------------------
/object_detection/utils/bbox_tf.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | 
  4 | __all__ = ['pairwise_iou', 'bboxes_clip_filter', 'bboxes_range_filter']
  5 | 
  6 | 
  7 | def area(boxes):
  8 |     """
  9 |     Args:
 10 |       boxes: nx4 floatbox
 11 |     Returns:
 12 |       n
 13 |     """
 14 |     x_min, y_min, x_max, y_max = tf.split(boxes, 4, axis=1)
 15 |     return tf.squeeze((y_max - y_min + 1.0) * (x_max - x_min + 1.0), [1])
 16 | 
 17 | 
 18 | def pairwise_intersection(boxlist1, boxlist2):
 19 |     """Compute pairwise intersection areas between boxes.
 20 |     Args:
 21 |       boxlist1: Nx4 floatbox
 22 |       boxlist2: Mx4
 23 |     Returns:
 24 |       a tensor with shape [N, M] representing pairwise intersections
 25 |     """
 26 |     x_min1, y_min1, x_max1, y_max1 = tf.split(boxlist1, 4, axis=1)
 27 |     x_min2, y_min2, x_max2, y_max2 = tf.split(boxlist2, 4, axis=1)
 28 |     all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
 29 |     all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
 30 |     intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin + 1.0)
 31 |     all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
 32 |     all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
 33 |     intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin + 1.0)
 34 |     return intersect_heights * intersect_widths
 35 | 
 36 | 
 37 | def pairwise_iou(boxlist1, boxlist2):
 38 |     """Computes pairwise intersection-over-union between box collections.
 39 |     copy from https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/utils/box_ops.py
 40 |     Args:
 41 |       boxlist1: Nx4 floatbox
 42 |       boxlist2: Mx4
 43 |     Returns:
 44 |       a tensor with shape [N, M] representing pairwise iou scores.
 45 |     """
 46 |     boxlist1 = tf.to_float(boxlist1)
 47 |     boxlist2 = tf.to_float(boxlist2)
 48 | 
 49 |     intersections = pairwise_intersection(boxlist1, boxlist2)
 50 |     areas1 = area(boxlist1)
 51 |     areas2 = area(boxlist2)
 52 |     unions = (
 53 |         tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
 54 |     return tf.where(
 55 |         tf.equal(intersections, 0.0),
 56 |         tf.zeros_like(intersections), tf.truediv(intersections, unions))
 57 | 
 58 | 
 59 | def bboxes_clip_filter(rpn_proposals, min_value, max_height, max_width, min_edge=None):
 60 |     """
 61 |     numpy 操作
 62 |     根据边界、最小边长过滤 proposals
 63 |     :param rpn_proposals:           bboxes
 64 |     :param min_value:
 65 |     :param max_height:
 66 |     :param max_width:
 67 |     :param min_edge:
 68 |     :return:
 69 |     """
 70 |     channels = tf.split(rpn_proposals, 4, axis=1)
 71 |     channels[0] = tf.maximum(tf.minimum(channels[0], max_width - 1), min_value)
 72 |     channels[1] = tf.maximum(tf.minimum(channels[1], max_height - 1), min_value)
 73 |     channels[2] = tf.maximum(tf.minimum(channels[2], max_width - 1), min_value)
 74 |     channels[3] = tf.maximum(tf.minimum(channels[3], max_height - 1), min_value)
 75 |     rpn_proposals = tf.concat(channels, axis=1)
 76 | 
 77 |     if min_edge is None:
 78 |         return rpn_proposals, tf.range(rpn_proposals.shape[0])
 79 | 
 80 |     min_edge = tf.to_float(min_edge)
 81 |     y_len = tf.to_float(channels[2] - channels[0] + 1.0)
 82 |     x_len = tf.to_float(channels[3] - channels[1] + 1.0)
 83 |     rpn_proposals_idx = tf.where(tf.logical_and(x_len >= min_edge, y_len >= min_edge))[:, 0]
 84 |     return tf.gather(rpn_proposals, rpn_proposals_idx), rpn_proposals_idx
 85 | 
 86 | 
 87 | def bboxes_range_filter(anchors, max_height, max_width):
 88 |     """
 89 |     过滤 anchors，超出图像范围的 anchors 都不要
 90 |     :param anchors:
 91 |     :param max_height:
 92 |     :param max_width:
 93 |     :return:
 94 |     """
 95 |     index_inside = tf.where(
 96 |         tf.logical_and(
 97 |             tf.logical_and((anchors[:, 0] >= 0), (anchors[:, 1] >= 0)),
 98 |             tf.logical_and((anchors[:, 2] <= max_width - 1), (anchors[:, 3] <= max_height - 1)),
 99 |         )
100 |     )[:, 0]
101 |     return index_inside
102 | 


--------------------------------------------------------------------------------
/object_detection/utils/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def encode_bbox_with_mean_and_std(src_bbox, dst_bbox, target_means, target_stds):
 5 |     target_means = tf.constant(target_means, dtype=tf.float32)
 6 |     target_stds = tf.constant(target_stds, dtype=tf.float32)
 7 | 
 8 |     box = tf.cast(src_bbox, tf.float32)
 9 |     gt_box = tf.cast(dst_bbox, tf.float32)
10 | 
11 |     width = box[..., 2] - box[..., 0] + 1.0
12 |     height = box[..., 3] - box[..., 1] + 1.0
13 |     center_x = box[..., 0] + 0.5 * width
14 |     center_y = box[..., 1] + 0.5 * height
15 | 
16 |     gt_width = gt_box[..., 2] - gt_box[..., 0] + 1.0
17 |     gt_height = gt_box[..., 3] - gt_box[..., 1] + 1.0
18 |     gt_center_x = gt_box[..., 0] + 0.5 * gt_width
19 |     gt_center_y = gt_box[..., 1] + 0.5 * gt_height
20 | 
21 |     dx = (gt_center_x - center_x) / width
22 |     dy = (gt_center_y - center_y) / height
23 |     dw = tf.log(gt_width / width)
24 |     dh = tf.log(gt_height / height)
25 | 
26 |     delta = tf.stack([dx, dy, dw, dh], axis=-1)
27 |     delta = (delta - target_means) / target_stds
28 | 
29 |     return delta
30 | 
31 | 
32 | def decode_bbox_with_mean_and_std(anchors, bboxes_txtytwth, target_means, target_stds):
33 |     target_means = tf.constant(
34 |         target_means, dtype=tf.float32)
35 |     target_stds = tf.constant(
36 |         target_stds, dtype=tf.float32)
37 |     delta = bboxes_txtytwth * target_stds + target_means
38 | 
39 |     # TODO fix whether to use +1 in the following two lines.
40 |     width = anchors[:, 2] - anchors[:, 0] + 1
41 |     height = anchors[:, 3] - anchors[:, 1] + 1
42 |     center_x = anchors[:, 0] + 0.5 * width
43 |     center_y = anchors[:, 1] + 0.5 * height
44 | 
45 |     center_x += delta[:, 0] * width
46 |     center_y += delta[:, 1] * height
47 |     width *= tf.exp(delta[:, 2])
48 |     height *= tf.exp(delta[:, 3])
49 | 
50 |     x1 = center_x - 0.5 * width
51 |     y1 = center_y - 0.5 * height
52 |     x2 = x1 + width
53 |     y2 = y1 + height
54 |     result = tf.stack([x1, y1, x2, y2], axis=1)
55 |     return result
56 | 


--------------------------------------------------------------------------------
/object_detection/utils/pytorch_to_tf.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | 
 4 | 
 5 | def pytorch_to_tf_np(v):
 6 |     if v.ndim == 4:
 7 |         # OUT, IN, H, W --> H, W, IN, OUT
 8 |         return np.ascontiguousarray(v.transpose(2, 3, 1, 0))
 9 |     if v.ndim == 2:
10 |         return np.ascontiguousarray(v.transpose())
11 |     return v
12 | 
13 | 
14 | def convert_pth_to_dict(pth_dir, dict_path):
15 |     import torch
16 |     torch_file = torch.load(pth_dir)
17 | 
18 |     tf_dict = {}
19 |     for key in torch_file['model'].keys():
20 |         tf_dict[key] = pytorch_to_tf_np(torch_file['model'][key].cpu().numpy())
21 | 
22 |     with open(dict_path, 'wb') as f:
23 |         pickle.dump(tf_dict, f)
24 | 


--------------------------------------------------------------------------------
/object_detection/utils/visual_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import cv2
 5 | 
 6 | 
 7 | def draw_bboxes_with_labels(image, bboxes, label_texts):
 8 |     """
 9 |     在ndarray或tf.Tensor对象上，画bboxes和对应的labels
10 |     :param image:       一张图片，shape 为 [height, width, channels]
11 |     :param bboxes:      一组bounding box，shape 为 [bbox_number, 4]，顺序为 ymin, xmin, ymax, xmax
12 |                         float类型，取值范围[0, height/width]
13 |     :param label_texts:      要显示的标签，shape为(bbox_number, )
14 |     :return:        画完bbox的图片，为ndarray类型，shape与输入相同
15 |     """
16 |     if isinstance(image, tf.Tensor):
17 |         image = image.numpy()
18 |     if isinstance(bboxes, tf.Tensor):
19 |         bboxes = bboxes.numpy()
20 |     if isinstance(label_texts, tf.Tensor):
21 |         label_texts = label_texts.numpy()
22 |     idx = 0
23 |     for bbox in bboxes:
24 |         ymin, xmin, ymax, xmax = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
25 |         cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
26 |         if label_texts is not None:
27 |             cv2.putText(img=image,
28 |                         text=str(label_texts[idx]),
29 |                         org=(xmin, ymin + 20),
30 |                         fontFace=cv2.FONT_HERSHEY_COMPLEX,
31 |                         fontScale=1e-3 * image.shape[0],
32 |                         color=(0, 0, 255),
33 |                         thickness=2,
34 |                         )
35 |         idx += 1
36 |     return image
37 | 
38 | 
39 | def show_one_image(preprocessed_image, bboxes, labels_text=None, preprocessing_type='caffe', caffe_pixel_means=None,
40 |                    figsize=(15, 10), enable_matplotlib=True):
41 |     """
42 |     显示图片
43 |     :param preprocessed_image:      preprocessed image by `preprocessing_type`, if caffe then bgr, if tf then rgb
44 |     :param bboxes:
45 |     :param labels_text:
46 |     :param preprocessing_type:
47 |     :param caffe_pixel_means:
48 |     :param figsize:
49 |     :param enable_matplotlib:
50 |     :return:
51 |     """
52 |     if isinstance(preprocessed_image, tf.Tensor):
53 |         preprocessed_image = tf.squeeze(preprocessed_image, axis=0).numpy()
54 |     if isinstance(bboxes, tf.Tensor):
55 |         bboxes = bboxes.numpy()
56 |     if isinstance(labels_text, tf.Tensor):
57 |         labels_text = labels_text.numpy()
58 |     if preprocessing_type == 'caffe':
59 |         cur_means = caffe_pixel_means
60 |         preprocessed_image[..., 0] += cur_means[0]
61 |         preprocessed_image[..., 1] += cur_means[1]
62 |         preprocessed_image[..., 2] += cur_means[2]
63 |         preprocessed_image = preprocessed_image[..., ::-1]
64 |         preprocessed_image = preprocessed_image.astype(np.uint8)
65 |     elif preprocessing_type == 'tf':
66 |         preprocessed_image = ((preprocessed_image + 1.0) / 2.0) * 255.0
67 |         preprocessed_image = preprocessed_image.astype(np.uint8)
68 |     elif preprocessing_type is None:
69 |         pass
70 |     else:
71 |         raise ValueError('unknown preprocess_type {}'.format(preprocessing_type))
72 |     image_with_bboxes = draw_bboxes_with_labels(preprocessed_image, bboxes, labels_text)
73 |     if enable_matplotlib:
74 |         plt.figure(figsize=figsize)
75 |         plt.imshow(image_with_bboxes)
76 |         plt.show()
77 | 
78 |     return image_with_bboxes
79 | 


--------------------------------------------------------------------------------
/scripts/eval_coco.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | import argparse
  6 | import json
  7 | 
  8 | from object_detection.dataset.dataset_factory import dataset_factory
  9 | from object_detection.model.model_factory import model_factory
 10 | from object_detection.config.config_factory import config_factory
 11 | from tensorflow.contrib.eager.python import saver as eager_saver
 12 | from object_detection.utils.bbox_transform import decode_bbox_with_mean_and_std
 13 | from object_detection.utils.bbox_tf import bboxes_clip_filter
 14 | 
 15 | from pycocotools.coco import COCO
 16 | from pycocotools.cocoeval import COCOeval
 17 | 
 18 | 
 19 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
 20 | num_classes = 81
 21 | 
 22 | coco_id_to_name_list = [
 23 |         'back_ground', 'person', 'bicycle', 'car', 'motorcycle',
 24 |         'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 25 |         'fire hydrant', 'stop sign', 'parking meter', 'bench',
 26 |         'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
 27 |         'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
 28 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
 29 |         'sports ball', 'kite', 'baseball bat', 'baseball glove',
 30 |         'skateboard', 'surfboard', 'tennis racket', 'bottle',
 31 |         'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 32 |         'banana', 'apple', 'sandwich', 'orange', 'broccoli',
 33 |         'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
 34 |         'couch', 'potted plant', 'bed', 'dining table', 'toilet',
 35 |         'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 36 |         'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
 37 |         'book', 'clock', 'vase', 'scissors', 'teddy bear',
 38 |         'hair drier', 'toothbrush']
 39 | 
 40 | coco_name_to_cat_id_dict = {
 41 |     'back_ground': 0,
 42 |     'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4,
 43 |     'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
 44 |     'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13,
 45 |     'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
 46 |     'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22,
 47 |     'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
 48 |     'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
 49 |     'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
 50 |     'kite': 38, 'baseball bat': 39, 'baseball glove': 40,
 51 |     'skateboard': 41, 'surfboard': 42, 'tennis racket': 43,
 52 |     'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48,
 53 |     'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53,
 54 |     'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57,
 55 |     'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61,
 56 |     'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65,
 57 |     'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73,
 58 |     'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77,
 59 |     'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
 60 |     'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86,
 61 |     'scissors': 87, 'teddy bear': 88, 'hair drier': 89,
 62 |     'toothbrush': 90}
 63 | 
 64 | 
 65 | def eval_by_cocotools(res_file_path, mode, root_path):
 66 |     coco_gt = COCO(os.path.join(root_path, 'annotations', 'instances_{}2017.json'.format(mode)))
 67 |     coco_dt = coco_gt.loadRes(res_file_path)
 68 |     coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
 69 | 
 70 |     coco_eval.params.imgIds = coco_dt.getImgIds()
 71 |     coco_eval.evaluate()
 72 |     coco_eval.accumulate()
 73 |     coco_eval.summarize()
 74 | 
 75 | 
 76 | def eval_coco(model,
 77 |               result_file_path,
 78 |               dataset_mode,
 79 |               dataset_year,
 80 |               image_format,
 81 |               preprocessing_type,
 82 |               root_path,
 83 |               config,
 84 |               min_size=10,
 85 |               ):
 86 |     """
 87 |     COCO Eval 的总体思路
 88 |     1. 构建tf.data.Dataset对象，返回需要测试COCO数据集的 preprocessed_image, raw_image_height, raw_image_width, image_id。
 89 |     2. 通过训练好的模型以及 preprocessed_image 获取预测结果，包括每张图片对应的 image_id, bboxes, classes，scores。
 90 |     2.1. 将预测结果保存为一个序列，序列中每个元素都是一个字典，分别包括image_id, category_id, bbox, score四个对象。
 91 |     2.2. image_id是int32，category_id是int32，bbox是个长度为4的float32数组，score是float32。
 92 |     2.3. 具体细节可以参考官方给出的实例：
 93 |     https://github.com/cocodataset/cocoapi/blob/master/results/instances_val2014_fakebbox100_results.json
 94 |     3. 通过COCOEval工具进行测试。
 95 |     3.1. 具体细节可以参考官方给出的实例：
 96 |     https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
 97 |     3.2. 大概过程就是构建pycocotools.coco.COCO对象，导入结果数组、通过COCO.loadRes构建预测对象，最后通过cocoEval计算结果
 98 |     :param result_file_path:            path to save result json file
 99 |     :param model:                       pre-trained model
100 |     :param dataset_mode:                train or val
101 |     :param dataset_year:
102 |     :param image_format:
103 |     :param preprocessing_type:
104 |     :param root_path:                   VOC的目录，要具体到某一年
105 |     :param config:
106 |     :param min_size:
107 |     :return:
108 |     """
109 |     dataset_configs = {'root_dir': root_path,
110 |                        'mode': dataset_mode, 'year': dataset_year,
111 |                        'min_size': config['image_max_size'], 'max_size': config['image_min_size'],
112 |                        'preprocessing_type': preprocessing_type,
113 |                        'caffe_pixel_means': config['bgr_pixel_means']}
114 |     dataset = dataset_factory(dataset_mode, mode=dataset_mode, **dataset_configs)
115 | 
116 |     res_list = []
117 |     for img, img_scale, raw_h, raw_w, img_id in dataset:
118 |         # final_bboxes, final_labels, final_scores = model(img, False)
119 |         # final_bboxes = final_bboxes / tf.to_float(img_scale)
120 | 
121 |         scores, roi_txtytwth, rois = model.im_detect(img, img_scale)
122 |         roi_txtytwth = tf.reshape(roi_txtytwth, [-1, num_classes, 4])
123 | 
124 |         res_score = []
125 |         res_bbox = []
126 |         res_category = []
127 |         for j in range(1, num_classes):
128 |             inds = tf.where(scores[:, j] > config['prediction_score_threshold'])[:, 0]
129 |             cls_scores = tf.gather(scores[:, j], inds)
130 |             cls_boxes = decode_bbox_with_mean_and_std(tf.gather(rois, inds),
131 |                                                       tf.gather(roi_txtytwth[:, j, :], inds),
132 |                                                       target_means=config['roi_proposal_means'],
133 |                                                       target_stds=config['roi_proposal_stds'])
134 | 
135 |             cls_boxes, inds = bboxes_clip_filter(cls_boxes, 0, raw_h, raw_w, min_size)
136 |             cls_scores = tf.gather(cls_scores, inds)
137 |             keep = tf.image.non_max_suppression(cls_boxes, cls_scores, config['max_objects_per_class_per_image'],
138 |                                                 iou_threshold=config['prediction_nms_iou_threshold'])
139 |             if tf.size(keep).numpy() == 0:
140 |                 continue
141 | 
142 |             res_score.append(tf.gather(cls_scores, keep))
143 |             res_bbox.append(tf.gather(cls_boxes, keep))
144 |             res_category.append(tf.ones_like(keep, dtype=tf.int32) * j)
145 | 
146 |         scores_after_nms = tf.concat(res_score, axis=0)
147 |         bboxes_after_nms = tf.concat(res_bbox, axis=0)
148 |         category_after_nms = tf.concat(res_category, axis=0)
149 | 
150 |         final_scores, final_idx = tf.nn.top_k(scores_after_nms, k=tf.minimum(config['max_objects_per_image'],
151 |                                                                              tf.size(scores_after_nms)),
152 |                                               sorted=False)
153 |         final_bboxes = tf.gather(bboxes_after_nms, final_idx).numpy()
154 |         final_labels = tf.gather(category_after_nms, final_idx).numpy()
155 |         final_scores = final_scores.numpy()
156 | 
157 |         for cur_bbox, cur_label, cur_score in zip(final_bboxes, final_labels, final_scores):
158 |             res_list.append({
159 |                 'image_id': int(img_id),
160 |                 'category_id': int(coco_name_to_cat_id_dict[coco_id_to_name_list[cur_label]]),
161 |                 'bbox': [float(cur_bbox[0]), float(cur_bbox[1]),
162 |                          float(cur_bbox[2] - cur_bbox[0] + 1), float(cur_bbox[3] - cur_bbox[1] + 1)],
163 |                 'score': float(cur_score)
164 |             })
165 | 
166 |     with open(result_file_path, 'w') as f:
167 |         json.dump(res_list, f)
168 |     eval_by_cocotools(result_file_path, dataset_mode, root_path)
169 | 
170 | 
171 | def _load_from_ckpt_file(model, ckpt_file_path):
172 |     saver = eager_saver.Saver(model.variables)
173 |     for var in model.variables:
174 |         tf.logging.info('restore var {}'.format(var.name))
175 |     if tf.train.latest_checkpoint(ckpt_file_path) is not None:
176 |         saver.restore(tf.train.latest_checkpoint(ckpt_file_path))
177 |     else:
178 |         raise ValueError('unknown ckpt file {}'.format(ckpt_file_path))
179 | 
180 | 
181 | def parse_args():
182 |     parser = argparse.ArgumentParser(description='Evaluate a Fast R-CNN model')
183 |     parser.add_argument('ckpt_file_path', type=str, help='target ckpt file path', )
184 |     parser.add_argument('--year', type=str, default=['2014', '2017'], help='one of [2014, 2017]', )
185 | 
186 |     parser.add_argument('--gpu_id', type=str, default='0')
187 | 
188 |     parser.add_argument('--dataset_mode', type=str, default='val', help='one of [train or val]')
189 | 
190 |     parser.add_argument('--model_type', type=str, default='faster_rcnn', help='one of [faster_rcnn, fpn]')
191 |     parser.add_argument('--backbone', type=str, default='vgg16', help='one of [vgg16, resnet50, resnet101, resnet152]')
192 | 
193 |     parser.add_argument('--use_fpn_tensorflow_model', default=False, type=bool,
194 |                         help='load fpn tensorflow model, only support resnet50 backbone')
195 | 
196 |     parser.add_argument('--root_path', help='path to pascal COCO',
197 |                         default='/ssd/zhangyiyang/COCO2017', type=str)
198 |     parser.add_argument('--result_file_dir', help='path to save detection result json file',
199 |                         default='/ssd/zhangyiyang/results/', type=str)
200 |     parser.add_argument('--logs_name', default=None, type=str)
201 | 
202 |     if len(sys.argv) == 1:
203 |         parser.print_help()
204 |         sys.exit(1)
205 | 
206 |     args = parser.parse_args()
207 |     return args
208 | 
209 | 
210 | def main(args):
211 |     # 设置 eager 模式必须的参数
212 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
213 |     config = tf.ConfigProto(allow_soft_placement=True)
214 |     config.gpu_options.allow_growth = True
215 |     tf.enable_eager_execution(config=config)
216 |     tf.logging.set_verbosity(tf.logging.INFO)
217 | 
218 |     # 获取模型并初始化参数
219 |     model_config = config_factory('coco', args.model_type)
220 |     cur_model = model_factory(args.model_type, args.backbone, model_config)
221 |     preprocessing_type = 'caffe'
222 |     cur_model(tf.to_float(np.random.rand(1, 800, 600, 3)), False)
223 | 
224 |     # result file path
225 |     # {result_file_dir}/{model_type}/{backbone}/{logs_name}/coco_res.json
226 |     logs_name = args.logs_name if args.logs_name is not None else 'default'
227 |     final_result_file_dir = os.path.join(args.result_file_dir, args.model_type, args.backbone, logs_name)
228 |     if not os.path.exists(final_result_file_dir):
229 |         os.makedirs(final_result_file_dir)
230 |     final_result_file_path = os.path.join(final_result_file_dir, 'coco_res.json')
231 | 
232 |     # 导入预训练模型
233 |     image_format = 'bgr'
234 |     if args.use_fpn_tensorflow_model:
235 |         image_format = 'rgb'
236 |         cur_model.load_fpn_tensorflow_weights(args.ckpt_file_path)
237 |     else:
238 |         _load_from_ckpt_file(cur_model, args.ckpt_file_path)
239 | 
240 |     # 将预测结果写到文件，并评估结果
241 |     eval_coco(cur_model,
242 |               result_file_path=final_result_file_path,
243 |               dataset_mode=args.dataset_mode,
244 |               image_format=image_format,
245 |               preprocessing_type=preprocessing_type,
246 |               root_path=os.path.join(args.root_path),
247 |               config=model_config,)
248 | 
249 | 
250 | if __name__ == '__main__':
251 |     main(parse_args())
252 | 


--------------------------------------------------------------------------------
/scripts/eval_pascal.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | import argparse
  6 | from object_detection.model.model_factory import model_factory
  7 | from object_detection.config.config_factory import config_factory
  8 | from object_detection.evaluation.pascal_eval_files_utils import get_prediction_files
  9 | from object_detection.evaluation.detectron_pascal_evaluation_utils import voc_eval
 10 | from tensorflow.contrib.eager.python import saver as eager_saver
 11 | 
 12 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
 13 | num_classes = 21,
 14 | class_list = ('__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
 15 |               'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
 16 |               'tvmonitor')
 17 | 
 18 | 
 19 | def eval_from_scratch(model,
 20 |                       dataset_type,
 21 |                       dataset_mode,
 22 |                       image_format,
 23 |                       preprocessing_type,
 24 |                       root_path,
 25 |                       result_file_format,
 26 |                       cache_dir,
 27 |                       use_07_metric,
 28 |                       config,
 29 |                       ):
 30 |     """
 31 | 
 32 |     :param model:                       导入好参数的模型
 33 |     :param dataset_type:                训练时使用的原始数据是通过 cv2 产生还是 tf 产生
 34 |     :param dataset_mode:                train, val, test or trainval
 35 |     :param image_format:
 36 |     :param preprocessing_type:
 37 |     :param root_path:                   VOC的目录，要具体到某一年
 38 |     :param result_file_format:          要将test结果写到文件中，文件路径为 result_file_format.format(class_name)
 39 |     :param cache_dir:                   预测时，会将gt的信息使用pickle进行保存，保存的路径就是 cache_dir+'test_annots.pkl'
 40 |     :param use_07_metric:
 41 |     :param config:
 42 |     :return:
 43 |     """
 44 | 
 45 |     # 生成检测结果的本地文件
 46 |     get_prediction_files(model,
 47 |                          dataset_type=dataset_type,
 48 |                          image_format=image_format,
 49 |                          preprocessing_type=preprocessing_type,
 50 |                          caffe_pixel_means=config['bgr_pixel_means'],
 51 |                          min_edge=config['image_min_size'],
 52 |                          max_edge=config['image_max_size'],
 53 |                          data_root_path=root_path,
 54 |                          mode=dataset_mode,
 55 |                          result_file_format=result_file_format,
 56 |                          score_threshold=config['prediction_score_threshold'],
 57 |                          iou_threshold=config['prediction_nms_iou_threshold'],
 58 |                          max_objects_per_class=config['max_objects_per_class_per_image'],
 59 |                          max_objects_per_image=config['max_objects_per_image'] ,
 60 |                          target_means=config['roi_proposal_means'],
 61 |                          target_stds=config['roi_proposal_stds'],
 62 |                          min_size=10
 63 |                          )
 64 | 
 65 |     # 通过本地文件（包括检测结果和真实结果）计算map
 66 |     eval_by_local_files_and_gt_xmls(root_path,
 67 |                                     result_file_format,
 68 |                                     cache_dir,
 69 |                                     dataset_mode,
 70 |                                     config['evaluate_iou_threshold'],
 71 |                                     use_07_metric=use_07_metric, )
 72 | 
 73 | 
 74 | def eval_by_local_files_and_gt_xmls(root_path,
 75 |                                     result_file_format,
 76 |                                     cache_dir,
 77 |                                     mode,
 78 |                                     prediction_iou_threshold,
 79 |                                     use_07_metric=True):
 80 |     annotation_file_format = os.path.join(root_path, 'Annotations', "{}.xml")
 81 |     imagesetfile = os.path.join(root_path, 'ImageSets', 'Main', '{}.txt'.format(mode))
 82 |     all_ap = .0
 83 |     for cls_name in class_list:
 84 |         if cls_name == '__background__':
 85 |             continue
 86 |         cur_res = voc_eval(result_file_format,
 87 |                            annotation_file_format,
 88 |                            imagesetfile,
 89 |                            cls_name,
 90 |                            cache_dir,
 91 |                            ovthresh=prediction_iou_threshold,
 92 |                            use_07_metric=use_07_metric,
 93 |                            )
 94 |         tf.logging.info('class {} get ap {}'.format(cls_name, cur_res[2]))
 95 |         all_ap += cur_res[2]
 96 |     tf.logging.info('map {}'.format(all_ap / (len(class_list) - 1)))
 97 | 
 98 | 
 99 | def _load_from_ckpt_file(model, ckpt_file_path):
100 |     saver = eager_saver.Saver(model.variables)
101 |     for var in model.variables:
102 |         tf.logging.info('restore var {}'.format(var.name))
103 |     if tf.train.latest_checkpoint(ckpt_file_path) is not None:
104 |         saver.restore(tf.train.latest_checkpoint(ckpt_file_path))
105 |     else:
106 |         raise ValueError('unknown ckpt file {}'.format(ckpt_file_path))
107 | 
108 | 
109 | def parse_args():
110 |     parser = argparse.ArgumentParser(description='Evaluate a Fast R-CNN model')
111 |     parser.add_argument('ckpt_file_path', type=str, help='target ckpt file path', )
112 | 
113 |     parser.add_argument('--gpu_id', type=str, default='0')
114 | 
115 |     parser.add_argument('--dataset_type', help='type of dataset, cv2 or tf', default='cv2', type=str)
116 |     parser.add_argument('--dataset_mode', type=str, default='test', help='one of [test, train, trainval, val]')
117 |     parser.add_argument('--year', type=str, default='2007', help='one of [2007, 2012]')
118 | 
119 |     parser.add_argument('--model_type', type=str, default='faster_rcnn', help='one of [faster_rcnn, fpn]')
120 |     parser.add_argument('--backbone', type=str, default='vgg16', help='one of [vgg16, resnet50, resnet101, resnet152]')
121 | 
122 |     parser.add_argument('--use_tf_faster_rcnn_model', type=bool, default=False,
123 |                         help='load tf-faster-rcnn model, only support resnet101 backbone')
124 |     parser.add_argument('--use_fpn_tensorflow_model', default=False, type=bool,
125 |                         help='load fpn tensorflow model, only support resnet50 backbone')
126 |     parser.add_argument('--use_local_result_files', default=False, type=bool)
127 | 
128 |     parser.add_argument('--use_07_metric', default=True, type=bool)
129 | 
130 |     # parser.add_argument('--root_path', help='path to pascal VOCdevkit',
131 |     #                     default='D:\\data\\VOCdevkit', type=str)
132 |     # parser.add_argument('--result_file_format', help='local detection result file pattern',
133 |     #                     default='D:\\data\\VOCdevkit\\VOC2007\\results\\{:s}.txt', type=str)
134 |     # parser.add_argument('--annotation_cache_dir', help='path to save annotation cache pickle file',
135 |     #                     default='D:\\data\\VOCdevkit\\VOC2007\\results', type=str)
136 | 
137 |     parser.add_argument('--root_path', help='path to pascal VOCdevkit',
138 |                         default='/ssd/zhangyiyang/tf_eager_object_detection/VOCdevkit', type=str)
139 |     parser.add_argument('--annotation_cache_dir', help='path to save annotation cache pickle file',
140 |                         default='/ssd/zhangyiyang/tf_eager_object_detection/results', type=str)
141 | 
142 |     # path to save detection result files
143 |     parser.add_argument('--result_file_dir', help='local detection result file pattern',
144 |                         default='/ssd/zhangyiyang/tf_eager_object_detection/results', type=str)
145 |     parser.add_argument('--logs_name', default=None, type=str)
146 | 
147 |     if len(sys.argv) == 1:
148 |         parser.print_help()
149 |         sys.exit(1)
150 | 
151 |     args = parser.parse_args()
152 |     return args
153 | 
154 | 
155 | def main(args):
156 |     model_config = config_factory('pascal', args.model_type)
157 | 
158 |     # get result file format
159 |     # {args.result_file_dir}/{args.model_type}/{args.backbone}/{logs_name}/{}.txt
160 |     logs_name = args.logs_name if args.logs_name is not None else 'default'
161 |     result_file_dir = os.path.join(args.result_file_dir, args.model_type, args.backbone, logs_name)
162 |     if not os.path.exists(result_file_dir):
163 |         os.makedirs(result_file_dir)
164 |     result_file_path = os.path.join(result_file_dir, '{}.txt')
165 | 
166 |     if args.use_local_result_files:
167 |         # 本地文件已存在，通过本地文件进行评估
168 |         eval_by_local_files_and_gt_xmls(root_path=args.root_path,
169 |                                         result_file_format=result_file_path,
170 |                                         cache_dir=args.annotation_cache_dir,
171 |                                         mode=args.dataset_mode,
172 |                                         prediction_iou_threshold=model_config['evaluate_iou_threshold']
173 |                                         )
174 |         return
175 | 
176 |     # 判断参数合法性
177 |     if args.year not in ['2007', '2012']:
178 |         raise ValueError('unknown pascal year {}'.format(args.year))
179 | 
180 |     # 设置 eager 模式必须的参数
181 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
182 |     config = tf.ConfigProto(allow_soft_placement=True)
183 |     config.gpu_options.allow_growth = True
184 |     tf.enable_eager_execution(config=config)
185 |     tf.logging.set_verbosity(tf.logging.INFO)
186 | 
187 |     # 获取模型并初始化参数
188 |     cur_model = model_factory(args.model_type, args.backbone, model_config)
189 |     preprocessing_type = 'caffe'
190 |     cur_model(tf.to_float(np.random.rand(1, 800, 600, 3)), False)
191 | 
192 |     # 导入预训练模型
193 |     image_format = 'bgr'
194 |     if args.use_tf_faster_rcnn_model:
195 |         cur_model.load_tf_faster_rcnn_tf_weights(args.ckpt_file_path)
196 |     elif args.use_fpn_tensorflow_model:
197 |         image_format = 'rgb'
198 |         cur_model.load_fpn_tensorflow_weights(args.ckpt_file_path)
199 |     else:
200 |         _load_from_ckpt_file(cur_model, args.ckpt_file_path)
201 | 
202 |     # 将预测结果写到文件，并评估结果
203 |     eval_from_scratch(cur_model,
204 |                       dataset_type=args.dataset_type,
205 |                       dataset_mode=args.dataset_mode,
206 |                       image_format=image_format,
207 |                       preprocessing_type=preprocessing_type,
208 |                       root_path=os.path.join(args.root_path, 'VOC' + str(args.year)),
209 |                       result_file_format=result_file_path,
210 |                       cache_dir=args.annotation_cache_dir,
211 |                       use_07_metric=args.use_07_metric,
212 |                       config=model_config)
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     main(parse_args())
217 | 


--------------------------------------------------------------------------------
/scripts/generate_pascal_tf_records.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tensorflow as tf
  4 | import argparse
  5 | import object_detection.dataset.utils.tf_record_utils as dataset_utils
  6 | import object_detection.dataset.utils.label_map_utils as label_map_utils
  7 | from tqdm import tqdm
  8 | from lxml import etree
  9 | 
 10 | 
 11 | def _get_tf_example(xml_dict, label_map_dict, image_path):
 12 |     with open(image_path, 'rb') as image:
 13 |         encoded_jpg = image.read()
 14 |         width = int(xml_dict['size']['width'])
 15 |         height = int(xml_dict['size']['height'])
 16 | 
 17 |     xmin = []
 18 |     ymin = []
 19 |     xmax = []
 20 |     ymax = []
 21 |     classes = []
 22 |     classes_text = []
 23 |     if 'object' in xml_dict:
 24 |         for obj in xml_dict['object']:
 25 |             xmin.append(float(int(obj['bndbox']['xmin']) - 1) / (width - 1))
 26 |             ymin.append(float(int(obj['bndbox']['ymin']) - 1) / (height - 1))
 27 |             xmax.append(float(int(obj['bndbox']['xmax']) - 1) / (width - 1))
 28 |             ymax.append(float(int(obj['bndbox']['ymax']) - 1) / (height - 1))
 29 |             classes_text.append(obj['name'].encode('utf8'))
 30 |             classes.append(label_map_dict[obj['name']])
 31 | 
 32 |     example = tf.train.Example(features=tf.train.Features(feature={
 33 |         'image/height': dataset_utils.int64_feature(height),
 34 |         'image/width': dataset_utils.int64_feature(width),
 35 |         'image/filename': dataset_utils.bytes_feature(xml_dict['filename'].encode('utf8')),
 36 |         'image/encoded': dataset_utils.bytes_feature(encoded_jpg),
 37 |         'image/object/bbox/xmin': dataset_utils.float_list_feature(xmin),
 38 |         'image/object/bbox/xmax': dataset_utils.float_list_feature(xmax),
 39 |         'image/object/bbox/ymin': dataset_utils.float_list_feature(ymin),
 40 |         'image/object/bbox/ymax': dataset_utils.float_list_feature(ymax),
 41 |         'image/object/class/label': dataset_utils.int64_list_feature(classes),
 42 |         'image/object/class/text': dataset_utils.bytes_list_feature(classes_text),
 43 |     }))
 44 |     return example
 45 | 
 46 | 
 47 | def main(args):
 48 |     writers = dataset_utils.get_multi_tf_record_writers(base_path=args.writer_base_path,
 49 |                                                         file_pattern=args.writer_file_pattern,
 50 |                                                         year=args.year,
 51 |                                                         number=args.writers_number,
 52 |                                                         mode=args.mode)
 53 |     label_map_dict = label_map_utils.get_label_map_dict(args.label_map_path)
 54 |     if args.year == "2007":
 55 |         years = ["VOC2007"]
 56 |     elif args.year == "2012":
 57 |         years = ["VOC2012"]
 58 |     elif args.year == "0712":
 59 |         years = ["VOC2007", "VOC2012"]
 60 |     else:
 61 |         raise ValueError('unknown year {}'.format(args.year))
 62 | 
 63 |     annotation_file_paths_list = []
 64 |     root_paths = []
 65 |     for year in years:
 66 |         with open(os.path.join(args.data_root_path, year, 'ImageSets', 'Main', 'aeroplane_%s.txt' % args.mode),
 67 |                   'r') as f:
 68 |             lines = f.readlines()
 69 |         cur_annotation_list = [
 70 |             os.path.join(args.data_root_path, year, 'Annotations', line.strip().split(' ')[0] + '.xml')
 71 |             for line in lines
 72 |         ]
 73 |         cur_root_paths = [os.path.join(args.data_root_path, year)] * len(lines)
 74 | 
 75 |         annotation_file_paths_list += cur_annotation_list
 76 |         root_paths += cur_root_paths
 77 | 
 78 |     for idx, (annotation_file_path, root_path) in enumerate(tqdm(zip(annotation_file_paths_list, root_paths))):
 79 |         with open(annotation_file_path, 'r') as f:
 80 |             xml_str = f.read()
 81 |         xml_dict = dataset_utils.recursive_parse_xml_to_dict(etree.fromstring(xml_str))['annotation']
 82 |         tf_example = _get_tf_example(xml_dict, label_map_dict,
 83 |                                      os.path.join(root_path, 'JPEGImages', xml_dict['filename']))
 84 |         writers[idx % args.writers_number].write(tf_example.SerializeToString())
 85 |     for writer in writers:
 86 |         writer.close()
 87 | 
 88 | 
 89 | def _parse_arguments(argv):
 90 |     parser = argparse.ArgumentParser()
 91 |     parser.add_argument('--mode', type=str, default="trainval")
 92 |     parser.add_argument('--year', type=str, default="2007", help="one of [2007, 2012, 0712]")
 93 |     parser.add_argument('--writer_file_pattern', type=str, default='pascal_%s_%s_%02d.tfrecords',
 94 |                         help='tf records output file name pattern')
 95 |     parser.add_argument('--writers_number', type=int, default=5, help='split tf records into several files.')
 96 | 
 97 |     parser.add_argument('--writer_base_path', type=str, default="/path/to/tf_eager_records",
 98 |                         help='path to save generated tf record files.')
 99 |     parser.add_argument('--label_map_path', type=str,
100 |                         help='path to pascal_label_map.pbtxt, already exists in ./scripts/label_map_src/',
101 |                         default='./scripts/label_map_src/pascal_label_map.pbtxt')
102 |     parser.add_argument('--data_root_path', type=str, default='/path/to/VOCdevkit')
103 | 
104 |     # parser.add_argument('--writer_base_path', type=str, default="D:\\data\\VOCdevkit\\tf_eager_records")
105 |     # parser.add_argument('--label_map_path', type=str,
106 |     #                     help='path to pascal_label_map.pbtxt, already exists in ./scripts/label_map_src/',
107 |     #                     default='./scripts/label_map_src/pascal_label_map.pbtxt')
108 |     # parser.add_argument('--data_root_path', type=str, default='D:\\data\\VOCdevkit')
109 | 
110 |     return parser.parse_args(argv)
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main(_parse_arguments(sys.argv[1:]))
115 | 


--------------------------------------------------------------------------------
/scripts/label_map_src/pascal_label_map.pbtxt:
--------------------------------------------------------------------------------
  1 | item {
  2 |   id: 1
  3 |   name: 'aeroplane'
  4 | }
  5 | 
  6 | item {
  7 |   id: 2
  8 |   name: 'bicycle'
  9 | }
 10 | 
 11 | item {
 12 |   id: 3
 13 |   name: 'bird'
 14 | }
 15 | 
 16 | item {
 17 |   id: 4
 18 |   name: 'boat'
 19 | }
 20 | 
 21 | item {
 22 |   id: 5
 23 |   name: 'bottle'
 24 | }
 25 | 
 26 | item {
 27 |   id: 6
 28 |   name: 'bus'
 29 | }
 30 | 
 31 | item {
 32 |   id: 7
 33 |   name: 'car'
 34 | }
 35 | 
 36 | item {
 37 |   id: 8
 38 |   name: 'cat'
 39 | }
 40 | 
 41 | item {
 42 |   id: 9
 43 |   name: 'chair'
 44 | }
 45 | 
 46 | item {
 47 |   id: 10
 48 |   name: 'cow'
 49 | }
 50 | 
 51 | item {
 52 |   id: 11
 53 |   name: 'diningtable'
 54 | }
 55 | 
 56 | item {
 57 |   id: 12
 58 |   name: 'dog'
 59 | }
 60 | 
 61 | item {
 62 |   id: 13
 63 |   name: 'horse'
 64 | }
 65 | 
 66 | item {
 67 |   id: 14
 68 |   name: 'motorbike'
 69 | }
 70 | 
 71 | item {
 72 |   id: 15
 73 |   name: 'person'
 74 | }
 75 | 
 76 | item {
 77 |   id: 16
 78 |   name: 'pottedplant'
 79 | }
 80 | 
 81 | item {
 82 |   id: 17
 83 |   name: 'sheep'
 84 | }
 85 | 
 86 | item {
 87 |   id: 18
 88 |   name: 'sofa'
 89 | }
 90 | 
 91 | item {
 92 |   id: 19
 93 |   name: 'train'
 94 | }
 95 | 
 96 | item {
 97 |   id: 20
 98 |   name: 'tvmonitor'
 99 | }
100 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import argparse
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | from object_detection.model.model_factory import model_factory
  8 | from object_detection.config.config_factory import config_factory
  9 | from object_detection.utils.visual_utils import show_one_image
 10 | from object_detection.dataset.dataset_factory import dataset_factory
 11 | from tensorflow.contrib.summary import summary
 12 | from tensorflow.contrib.eager.python import saver as eager_saver
 13 | from tensorflow.python.platform import tf_logging
 14 | from tqdm import tqdm
 15 | 
 16 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
 17 | tf_logging.set_verbosity(tf_logging.INFO)
 18 | 
 19 | CONFIG = None
 20 | 
 21 | 
 22 | def train_step(model, loss, tape, optimizer):
 23 |     all_vars = model.variables
 24 |     gradients = tape.gradient(loss, all_vars)
 25 | 
 26 |     if CONFIG['learning_rate_bias_double']:
 27 |         all_grads = []
 28 |         all_vars = []
 29 |         for grad, var in zip(gradients, model.variables):
 30 |             if grad is None:
 31 |                 continue
 32 |             scale = 1.0
 33 |             if 'bias' in var.name:
 34 |                 scale = 2.0
 35 |             all_grads.append(grad * scale)
 36 |             all_vars.append(var)
 37 |         gradients = all_grads
 38 | 
 39 |     optimizer.apply_gradients(zip(gradients, all_vars),
 40 |                               global_step=tf.train.get_or_create_global_step())
 41 | 
 42 | 
 43 | def _get_default_optimizer(use_adam):
 44 |     lr = tf.train.piecewise_constant(tf.train.get_or_create_global_step(),
 45 |                                      boundaries=CONFIG['learning_rate_multi_decay_steps'],
 46 |                                      values=CONFIG['learning_rate_multi_lrs'])
 47 |     if use_adam:
 48 |         return tf.train.AdamOptimizer(lr)
 49 |     else:
 50 |         return tf.train.MomentumOptimizer(lr, momentum=CONFIG['optimizer_momentum'])
 51 | 
 52 | 
 53 | def _get_training_dataset(preprocessing_type='caffe', dataset_type='pascal',
 54 |                           coco_year="2017",
 55 |                           pascal_year="2007", pascal_mode='trainval', pascal_tf_records_num=5,
 56 |                           data_root_path=None):
 57 |     if dataset_type == 'pascal':
 58 |         base_pattern = 'pascal_{}_{}_%02d.tfrecords'.format(pascal_year, pascal_mode)
 59 |         file_names = [os.path.join(data_root_path, base_pattern % i) for i in range(pascal_tf_records_num)]
 60 |         dataset_configs = {'tf_records_list': file_names,
 61 |                            'min_size': CONFIG['image_min_size'], 'max_size': CONFIG['image_max_size'],
 62 |                            'preprocessing_type': preprocessing_type, 'caffe_pixel_means': CONFIG['bgr_pixel_means'],
 63 |                            'argument': True, }
 64 |         dataset = dataset_factory('pascal', 'train', dataset_configs)
 65 |     elif dataset_type == 'coco':
 66 |         dataset_configs = {'root_dir': data_root_path,
 67 |                            'mode': 'train', 'year': coco_year,
 68 |                            'min_size': CONFIG['image_min_size'], 'max_size': CONFIG['image_max_size'],
 69 |                            'preprocessing_type': preprocessing_type, 'caffe_pixel_means': CONFIG['bgr_pixel_means'],
 70 |                            'argument': True, }
 71 |         dataset = dataset_factory('coco', 'train', dataset_configs)
 72 |     else:
 73 |         raise ValueError('unknown dataset type {}'.format(dataset_type))
 74 |     return dataset
 75 | 
 76 | 
 77 | def train_one_epoch(dataset, base_model, optimizer,
 78 |                     preprocessing_type,
 79 |                     logging_every_n_steps,
 80 |                     summary_every_n_steps,
 81 |                     saver, save_every_n_steps, save_path):
 82 |     idx = 0
 83 | 
 84 |     for image, gt_bboxes, gt_labels in tqdm(dataset):
 85 |         # bgr input
 86 |         # for keras application pre-trained models, use bgr
 87 | 
 88 |         # conver ymin xmin ymax xmax -> xmin ymin xmax ymax
 89 |         gt_bboxes = tf.squeeze(gt_bboxes, axis=0)
 90 |         channels = tf.split(gt_bboxes, 4, axis=1)
 91 |         gt_bboxes = tf.concat([
 92 |             channels[1], channels[0], channels[3], channels[2]
 93 |         ], axis=1)
 94 | 
 95 |         # set labels to int32
 96 |         gt_labels = tf.to_int32(tf.squeeze(gt_labels, axis=0))
 97 | 
 98 |         # train one step
 99 |         with tf.GradientTape() as tape:
100 |             rpn_cls_loss, rpn_reg_loss, roi_cls_loss, roi_reg_loss = base_model((image, gt_bboxes, gt_labels), True)
101 |             l2_loss = tf.add_n(base_model.losses)
102 |             total_loss = rpn_cls_loss + rpn_reg_loss + roi_cls_loss + roi_reg_loss + l2_loss
103 |             train_step(base_model, total_loss, tape, optimizer)
104 | 
105 |         # summary
106 |         if idx % summary_every_n_steps == 0:
107 |             summary.scalar("l2_loss", l2_loss)
108 |             summary.scalar("rpn_cls_loss", rpn_cls_loss)
109 |             summary.scalar("rpn_reg_loss", rpn_reg_loss)
110 |             summary.scalar("roi_cls_loss", roi_cls_loss)
111 |             summary.scalar("roi_reg_loss", roi_reg_loss)
112 |             summary.scalar("total_loss", total_loss)
113 | 
114 |             pred_bboxes, pred_labels, pred_scores = base_model(image, False)
115 | 
116 |             if pred_bboxes is not None:
117 |                 selected_idx = tf.where(pred_scores >= CONFIG['show_image_score_threshold'])[:, 0]
118 |                 if tf.size(selected_idx) != 0:
119 |                     # show gt
120 |                     gt_channels = tf.split(gt_bboxes, 4, axis=1)
121 |                     show_gt_bboxes = tf.concat([gt_channels[1], gt_channels[0], gt_channels[3], gt_channels[2]], axis=1)
122 |                     gt_image = show_one_image(tf.squeeze(image, axis=0).numpy(), show_gt_bboxes.numpy(),
123 |                                               gt_labels.numpy(),
124 |                                               preprocessing_type=preprocessing_type,
125 |                                               caffe_pixel_means=CONFIG['bgr_pixel_means'],
126 |                                               enable_matplotlib=False)
127 |                     tf.contrib.summary.image("gt_image", tf.expand_dims(gt_image, axis=0))
128 | 
129 |                     # show pred
130 |                     pred_bboxes = tf.gather(pred_bboxes, selected_idx)
131 |                     pred_labels = tf.gather(pred_labels, selected_idx)
132 |                     channels = tf.split(pred_bboxes, num_or_size_splits=4, axis=1)
133 |                     show_pred_bboxes = tf.concat([
134 |                         channels[1], channels[0], channels[3], channels[2]
135 |                     ], axis=1)
136 |                     pred_image = show_one_image(tf.squeeze(image, axis=0).numpy(),
137 |                                                 show_pred_bboxes.numpy(),
138 |                                                 pred_labels.numpy(),
139 |                                                 preprocessing_type=preprocessing_type,
140 |                                                 caffe_pixel_means=CONFIG['bgr_pixel_means'],
141 |                                                 enable_matplotlib=False)
142 |                     tf.contrib.summary.image("pred_image", tf.expand_dims(pred_image, axis=0))
143 | 
144 |         # logging
145 |         if idx % logging_every_n_steps == 0:
146 |             if isinstance(optimizer, tf.train.AdamOptimizer):
147 |                 show_lr = optimizer._lr()
148 |             else:
149 |                 show_lr = optimizer._learning_rate()
150 |             logging_format = 'steps %d, lr is %.5f, loss: %.4f, %.4f, %.4f, %.4f, %.4f, %.4f'
151 |             tf_logging.info(logging_format % (idx + 1, show_lr,
152 |                                               rpn_cls_loss, rpn_reg_loss, roi_cls_loss, roi_reg_loss,
153 |                                               l2_loss, total_loss))
154 | 
155 |         # saving
156 |         if saver is not None and save_path is not None and idx % save_every_n_steps == 0 and idx != 0:
157 |             saver.save(os.path.join(save_path, 'model.ckpt'), global_step=tf.train.get_or_create_global_step())
158 | 
159 |         idx += 1
160 | 
161 | 
162 | def train(training_dataset,
163 |           preprocessing_type,
164 | 
165 |           base_model,
166 | 
167 |           optimizer,
168 | 
169 |           logging_every_n_steps,
170 |           save_every_n_steps,
171 |           summary_every_n_steps,
172 | 
173 |           train_dir,
174 |           ckpt_dir,
175 |           restore_ckpt_file_path,
176 |           ):
177 |     # 获取 pretrained model
178 |     variables = base_model.variables + [tf.train.get_or_create_global_step()]
179 |     saver = eager_saver.Saver(variables)
180 | 
181 |     # 命令行指定 ckpt file
182 |     if restore_ckpt_file_path is not None:
183 |         saver.restore(restore_ckpt_file_path)
184 | 
185 |     # 当前 logs_dir 中的预训练模型，用于继续训练
186 |     if tf.train.latest_checkpoint(ckpt_dir) is not None:
187 |         saver.restore(tf.train.latest_checkpoint(ckpt_dir))
188 | 
189 |     train_writer = tf.contrib.summary.create_file_writer(train_dir, flush_millis=100000)
190 |     for i in range(CONFIG['epochs']):
191 |         tf_logging.info('epoch %d starting...' % (i + 1))
192 |         start = time.time()
193 |         with train_writer.as_default(), summary.always_record_summaries():
194 |             train_one_epoch(dataset=training_dataset, base_model=base_model,
195 |                             optimizer=optimizer, preprocessing_type=preprocessing_type,
196 |                             logging_every_n_steps=logging_every_n_steps,
197 |                             summary_every_n_steps=summary_every_n_steps,
198 |                             saver=saver, save_every_n_steps=save_every_n_steps, save_path=ckpt_dir,
199 |                             )
200 |         tf.set_random_seed(1)
201 |         train_end = time.time()
202 |         tf_logging.info('epoch %d training finished, costing %d seconds...' % (i + 1, train_end - start))
203 | 
204 | 
205 | def parse_args():
206 |     """
207 |   Parse input arguments
208 |   """
209 |     parser = argparse.ArgumentParser(description='Train a model')
210 |     parser.add_argument('--gpu_id', default="0", type=str, help='used in sys variable CUDA_VISIBLE_DEVICES')
211 | 
212 |     parser.add_argument('--model_type', type=str, default='faster_rcnn',
213 |                         help='one of [faster_rcnn, fpn]')
214 |     parser.add_argument('--backbone', type=str, default='resnet50',
215 |                         help='one of [vgg16, resnet50, resnet101, resnet152]')
216 | 
217 |     parser.add_argument('--data_type', default="pascal", type=str, help='pascal or coco')
218 | 
219 |     # coco
220 |     parser.add_argument('--coco_year', default="2017", type=str, help='one of [2014, 2017]')
221 | 
222 |     # pascal
223 |     parser.add_argument('--pascal_year', default="2007", type=str, help='one of [2007, 2012, 0712]')
224 |     parser.add_argument('--pascal_mode', default="trainval", type=str, help='one of [trainval, train, val]')
225 |     parser.add_argument('--pascal_tf_records_num', default=5, type=int, help='number of pascal tf records')
226 | 
227 |     parser.add_argument('--logging_every_n_steps', default=100, type=int)
228 |     parser.add_argument('--saving_every_n_steps', default=5000, type=int)
229 |     parser.add_argument('--summary_every_n_steps', default=100, type=int)
230 |     parser.add_argument('--restore_ckpt_path', type=str, default=None)
231 | 
232 |     parser.add_argument('--use_adam', type=bool, default=False)
233 | 
234 |     parser.add_argument('--logs_name', type=str, default='default',
235 |                         help='logs dir name pattern is `logs-{data_type}-{model_type}-{backbone}-{logs_name}`', )
236 | 
237 |     # parser.add_argument('--data_root_path', default="/ssd/zhangyiyang/COCO2017", type=str)
238 |     parser.add_argument('--data_root_path', type=str,
239 |                         help='path to tfrecord files if pascal, path to coco root if coco',
240 |                         default="/ssd/zhangyiyang/tf_eager_object_detection/VOCdevkit/tf_eager_records")
241 |     parser.add_argument('--logs_dir', type=str, help='path to save ckpt files and tensorboard summaries.',
242 |                         default="/ssd/zhangyiyang/tf_eager_object_detection/logs")
243 | 
244 |     # # parser.add_argument('--data_root_path', default="D:\\data\\COCO2017", type=str)
245 |     # parser.add_argument('--data_root_path', default="D:\\data\\VOCdevkit\\tf_eager_records\\", type=str)
246 |     # parser.add_argument('--logs_dir', default="D:\\data\\logs\\logs-pascal", type=str)
247 | 
248 |     args = parser.parse_args()
249 |     return args
250 | 
251 | 
252 | def main(args):
253 |     global CONFIG
254 |     CONFIG = config_factory(args.data_type, args.model_type)
255 | 
256 |     # tensorflow eager 模式基本参数设置
257 |     os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
258 |     config = tf.ConfigProto(allow_soft_placement=True)
259 |     config.gpu_options.allow_growth = True
260 |     # config.log_device_placement = True
261 |     tf.enable_eager_execution(config=config)
262 | 
263 |     # 建立模型，并初始化
264 |     cur_model = model_factory(args.model_type, args.backbone, CONFIG)
265 |     preprocessing_type = 'caffe'
266 |     cur_model(tf.to_float(np.random.rand(1, 800, 600, 3)), False)
267 | 
268 |     # logs基本信息
269 |     # logs-{data_type}-{model_type}-{backbone}-{logs_name}
270 |     logs_name_pattern = 'logs-{}-{}-{}-{}'
271 |     logs_path_name = logs_name_pattern.format(args.data_type, args.model_type, args.backbone, args.logs_name)
272 | 
273 |     # 开始训练
274 |     train(training_dataset=_get_training_dataset(preprocessing_type=preprocessing_type,
275 |                                                  dataset_type=args.data_type,
276 |                                                  coco_year=args.coco_year,
277 |                                                  pascal_year=args.pascal_year,
278 |                                                  pascal_mode=args.pascal_mode,
279 |                                                  pascal_tf_records_num=args.pascal_tf_records_num,
280 |                                                  data_root_path=args.data_root_path),
281 |           preprocessing_type=preprocessing_type,
282 | 
283 |           base_model=cur_model,
284 | 
285 |           optimizer=_get_default_optimizer(args.use_adam),
286 | 
287 |           logging_every_n_steps=args.logging_every_n_steps,
288 |           save_every_n_steps=args.saving_every_n_steps,
289 |           summary_every_n_steps=args.summary_every_n_steps,
290 | 
291 |           train_dir=os.path.join(args.logs_dir, logs_path_name, 'train'),
292 |           ckpt_dir=os.path.join(args.logs_dir, logs_path_name, 'ckpt'),
293 |           restore_ckpt_file_path=args.restore_ckpt_path,
294 |           )
295 | 
296 | 
297 | if __name__ == '__main__':
298 |     main(parse_args())
299 | 


--------------------------------------------------------------------------------