├── .idea
├── Faster-RCNN_Tensorflow.iml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── data
├── __init__.py
├── __init__.pyc
├── io
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── read_tfrecord.cpython-35.pyc
│ ├── convert_data_to_tfrecord.py
│ ├── convert_data_to_tfrecord_raw.py
│ ├── image_preprocess.py
│ ├── image_preprocess.pyc
│ ├── read_tfrecord.py
│ └── read_tfrecord.pyc
├── lib_coco
│ ├── PythonAPI
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── pycocoDemo.ipynb
│ │ ├── pycocoEvalDemo.ipynb
│ │ ├── pycocotools
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyc
│ │ │ ├── _mask.c
│ │ │ ├── _mask.pyx
│ │ │ ├── _mask.so
│ │ │ ├── coco.py
│ │ │ ├── coco.pyc
│ │ │ ├── cocoeval.py
│ │ │ ├── mask.py
│ │ │ └── mask.pyc
│ │ └── setup.py
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── common
│ │ ├── gason.cpp
│ │ ├── gason.h
│ │ ├── maskApi.c
│ │ └── maskApi.h
│ ├── get_coco_next_batch.py
│ └── get_coco_next_batch.pyc
├── pretrained_weights
│ ├── README.md
│ └── mobilenet
│ │ └── README.md
└── tfrecord
│ ├── pascal_test.tfrecord
│ └── pascal_train.tfrecord
├── help_utils
├── __init__.py
├── __init__.pyc
├── __pycache__
│ ├── __init__.cpython-35.pyc
│ └── tools.cpython-35.pyc
├── tools.py
└── tools.pyc
├── images.png
├── libs
├── __init__.py
├── __init__.pyc
├── __pycache__
│ └── __init__.cpython-35.pyc
├── box_utils
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── anchor_utils.py
│ ├── anchor_utils.pyc
│ ├── boxes_utils.py
│ ├── boxes_utils.pyc
│ ├── cython_utils
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── bbox.c
│ │ ├── bbox.pyx
│ │ ├── cython_bbox.so
│ │ ├── cython_nms.so
│ │ ├── nms.c
│ │ ├── nms.pyx
│ │ └── setup.py
│ ├── draw_box_in_img.py
│ ├── draw_box_in_img.pyc
│ ├── encode_and_decode.py
│ ├── encode_and_decode.pyc
│ ├── show_box_in_tensor.py
│ ├── show_box_in_tensor.pyc
│ ├── tf_ops.py
│ └── tf_ops.pyc
├── configs
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── cfgs.cpython-35.pyc
│ ├── cfgs.py
│ ├── cfgs.pyc
│ ├── cfgs_coco.py
│ ├── cfgs_mobilenetv2.py
│ ├── cfgs_res101.py
│ └── cfgs_res50.py
├── detection_oprations
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── anchor_target_layer_without_boxweight.py
│ ├── anchor_target_layer_without_boxweight.pyc
│ ├── proposal_opr.py
│ ├── proposal_opr.pyc
│ ├── proposal_target_layer.py
│ └── proposal_target_layer.pyc
├── export_pbs
│ ├── __init__.py
│ ├── exportPb.py
│ └── test_exportPb.py
├── label_name_dict
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── remote_sensing_dict.cpython-35.pyc
│ ├── coco_dict.py
│ ├── coco_dict.pyc
│ ├── label_dict.py
│ ├── remote_sensing_dict.py
│ └── remote_sensing_dict.pyc
├── losses
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── losses.cpython-35.pyc
│ ├── losses.py
│ ├── losses.pyc
│ ├── tfapi_loss.py
│ └── tfapi_loss.pyc
├── networks
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ └── __init__.cpython-35.pyc
│ ├── build_whole_network.py
│ ├── build_whole_network.pyc
│ ├── mobilenet
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── conv_blocks.py
│ │ ├── conv_blocks.pyc
│ │ ├── mobilenet.py
│ │ ├── mobilenet.pyc
│ │ ├── mobilenet_v2.py
│ │ ├── mobilenet_v2.pyc
│ │ └── mobilenet_v2_test.py
│ ├── mobilenet_v2.py
│ ├── mobilenet_v2.pyc
│ ├── resnet.py
│ ├── resnet.pyc
│ └── slim_nets
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── inception_resnet_v2.cpython-35.pyc
│ │ ├── mobilenet_v1.cpython-35.pyc
│ │ ├── resnet_utils.cpython-35.pyc
│ │ ├── resnet_v1.cpython-35.pyc
│ │ └── vgg.cpython-35.pyc
│ │ ├── alexnet.py
│ │ ├── alexnet_test.py
│ │ ├── cifarnet.py
│ │ ├── inception.py
│ │ ├── inception_resnet_v2.py
│ │ ├── inception_resnet_v2.pyc
│ │ ├── inception_resnet_v2_test.py
│ │ ├── inception_utils.py
│ │ ├── inception_v1.py
│ │ ├── inception_v1_test.py
│ │ ├── inception_v2.py
│ │ ├── inception_v2_test.py
│ │ ├── inception_v3.py
│ │ ├── inception_v3_test.py
│ │ ├── inception_v4.py
│ │ ├── inception_v4_test.py
│ │ ├── lenet.py
│ │ ├── mobilenet_v1.md
│ │ ├── mobilenet_v1.png
│ │ ├── mobilenet_v1.py
│ │ ├── mobilenet_v1.pyc
│ │ ├── mobilenet_v1_test.py
│ │ ├── nets_factory.py
│ │ ├── nets_factory_test.py
│ │ ├── overfeat.py
│ │ ├── overfeat_test.py
│ │ ├── resnet_utils.py
│ │ ├── resnet_utils.pyc
│ │ ├── resnet_v1.py
│ │ ├── resnet_v1.pyc
│ │ ├── resnet_v1_test.py
│ │ ├── resnet_v2.py
│ │ ├── resnet_v2_test.py
│ │ ├── vgg.py
│ │ ├── vgg.pyc
│ │ └── vgg_test.py
├── setup.py
└── val_libs
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── voc_eval.py
│ └── voc_eval.pyc
├── output
└── trained_weights
│ └── README.md
├── scalars.png
├── tools
├── FasterRCNN_20180516_mobile.jpg
├── __init__.py
├── __init__.pyc
├── demos
│ ├── 000058.jpg
│ ├── 000108.jpg
│ ├── 000237.jpg
│ ├── 000449.jpg
│ ├── 000611.jpg
│ ├── 000706.jpg
│ ├── 000719.jpg
│ └── 004640.jpg
├── eval.py
├── inference.py
├── inference_for_coco.py
├── inference_results
│ ├── 000058.jpg
│ ├── 000108.jpg
│ ├── 000237.jpg
│ ├── 000449.jpg
│ ├── 000611.jpg
│ ├── 000706.jpg
│ ├── 000719.jpg
│ └── 004640.jpg
├── test.py
├── train.py
└── train_with_placeholder.py
└── voc_2007.gif
/.idea/Faster-RCNN_Tensorflow.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Faster-RCNN_Tensorflow
2 |
3 | ## Abstract
4 | This is a tensorflow re-implementation of [Faster R-CNN: Towards Real-Time ObjectDetection with Region Proposal Networks](https://arxiv.org/abs/1506.01497).
5 |
6 | This project is completed by [YangXue](https://github.com/yangxue0827) and [YangJirui](https://github.com/yangJirui). Some relevant projects ([R2CNN](https://github.com/DetectionTeamUCAS/R2CNN_Faster-RCNN_Tensorflow)) and ([RRPN](https://github.com/DetectionTeamUCAS/RRPN_Faster-RCNN_Tensorflow)) based on this code.
7 |
8 | ## Train on VOC 2007 trainval and test on VOC 2007 test (PS. This project also support coco training.)
9 | 
10 |
11 | ## Comparison
12 | ### use_voc2012_metric
13 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat | motorbike |
14 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|
15 | |resnet50_v1|75.16|74.08|89.27|80.27|55.74|83.38|69.35|85.13|88.80|91.42|81.17|81.71|62.74|78.65|86.86|47.00|76.71|50.29|79.05|60.51|80.96|
16 | |resnet101_v1|77.03|79.68|89.33|83.89|59.41|85.68|76.59|84.23|88.50|88.50|81.54|79.16|72.66|80.26|88.42|47.50|79.81|52.85|80.70|59.94|81.87|
17 | |mobilenet_v2|50.36|46.68|70.45|67.43|25.69|53.60|46.26|58.95|37.62|43.97|67.67|61.35|52.14|56.54|75.02|24.47|49.89|27.76|38.04|38.20|65.46|
18 |
19 | ### use_voc2007_metric
20 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat | motorbike |
21 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|
22 | |resnet50_v1|73.09|72.11|85.63|77.74|55.82|81.19|67.34|82.44|85.66|87.34|77.49|79.13|62.65|76.54|84.01|47.90|74.13|50.09|76.81|60.34|77.47|
23 | |resnet101_v1|74.63|76.35|86.18|79.87|58.73|83.4|74.75|80.03|85.4|86.55|78.24|76.07|70.89|78.52|86.26|47.80|76.34|52.14|78.06|58.90|78.04|
24 | |mobilenet_v2|50.34|46.99|68.45|65.89|28.16|53.21|46.96|57.80|38.60|44.12|66.20|60.49|52.40|56.06|72.68|26.91|49.99|30.18|39.38|38.54|64.74|
25 |
26 |
27 | ## Requirements
28 | 1、tensorflow >= 1.2
29 | 2、cuda8.0
30 | 3、python2.7 (anaconda2 recommend)
31 | 4、[opencv(cv2)](https://pypi.org/project/opencv-python/)
32 |
33 | ## Download Model
34 | 1、please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)、[resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to $PATH_ROOT/data/pretrained_weights.
35 | 2、please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to $PATH_ROOT/data/pretrained_weights/mobilenet.
36 | 3、please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow) by this project, put it to $PATH_ROOT/output/trained_weights.
37 |
38 | ## Data Format
39 | ```
40 | ├── VOCdevkit
41 | │ ├── VOCdevkit_train
42 | │ ├── Annotation
43 | │ ├── JPEGImages
44 | │ ├── VOCdevkit_test
45 | │ ├── Annotation
46 | │ ├── JPEGImages
47 | ```
48 |
49 | ## Compile
50 | ```
51 | cd $PATH_ROOT/libs/box_utils/cython_utils
52 | python setup.py build_ext --inplace
53 | ```
54 |
55 | ## Demo(available)
56 |
57 | **Select a configuration file in the folder ($PATH_ROOT/libs/configs/) and copy its contents into cfgs.py, then download the corresponding [weights](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow).**
58 |
59 | ```
60 | cd $PATH_ROOT/tools
61 | python inference.py --data_dir='/PATH/TO/IMAGES/'
62 | --save_dir='/PATH/TO/SAVE/RESULTS/'
63 | --GPU='0'
64 | ```
65 |
66 | ## Eval
67 | ```
68 | cd $PATH_ROOT/tools
69 | python eval.py --eval_imgs='/PATH/TO/IMAGES/'
70 | --annotation_dir='/PATH/TO/TEST/ANNOTATION/'
71 | --GPU='0'
72 | ```
73 |
74 | ## Train
75 |
76 | 1、If you want to train your own data, please note:
77 | ```
78 | (1) Modify parameters (such as CLASS_NUM, DATASET_NAME, VERSION, etc.) in $PATH_ROOT/libs/configs/cfgs.py
79 | (2) Add category information in $PATH_ROOT/libs/label_name_dict/lable_dict.py
80 | (3) Add data_name to line 76 of $PATH_ROOT/data/io/read_tfrecord.py
81 | ```
82 |
83 | 2、make tfrecord
84 | ```
85 | cd $PATH_ROOT/data/io/
86 | python convert_data_to_tfrecord.py --VOC_dir='/PATH/TO/VOCdevkit/VOCdevkit_train/'
87 | --xml_dir='Annotation'
88 | --image_dir='JPEGImages'
89 | --save_name='train'
90 | --img_format='.jpg'
91 | --dataset='pascal'
92 | ```
93 |
94 | 3、train
95 | ```
96 | cd $PATH_ROOT/tools
97 | python train.py
98 | ```
99 |
100 | ## Tensorboard
101 | ```
102 | cd $PATH_ROOT/output/summary
103 | tensorboard --logdir=.
104 | ```
105 | 
106 | 
107 |
108 | ## Reference
109 | 1、https://github.com/endernewton/tf-faster-rcnn
110 | 2、https://github.com/zengarden/light_head_rcnn
111 | 3、https://github.com/tensorflow/models/tree/master/research/object_detection
112 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/__init__.py
--------------------------------------------------------------------------------
/data/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/__init__.pyc
--------------------------------------------------------------------------------
/data/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__init__.py
--------------------------------------------------------------------------------
/data/io/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__init__.pyc
--------------------------------------------------------------------------------
/data/io/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/data/io/__pycache__/read_tfrecord.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__pycache__/read_tfrecord.cpython-35.pyc
--------------------------------------------------------------------------------
/data/io/convert_data_to_tfrecord.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import division, print_function, absolute_import
3 | import sys
4 | sys.path.append('../../')
5 | import xml.etree.cElementTree as ET
6 | import numpy as np
7 | import tensorflow as tf
8 | import glob
9 | import cv2
10 | from libs.label_name_dict.label_dict import *
11 | from help_utils.tools import *
12 |
13 | tf.app.flags.DEFINE_string('VOC_dir', '/mnt/USBB/gx/DOTA/DOTA_TOTAL/', 'Voc dir')
14 | tf.app.flags.DEFINE_string('xml_dir', 'XML', 'xml dir')
15 | tf.app.flags.DEFINE_string('image_dir', 'IMG', 'image dir')
16 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name')
17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
18 | tf.app.flags.DEFINE_string('img_format', '.png', 'format of image')
19 | tf.app.flags.DEFINE_string('dataset', 'DOTA_TOTAL', 'dataset')
20 | FLAGS = tf.app.flags.FLAGS
21 |
22 |
23 | def _int64_feature(value):
24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
25 |
26 |
27 | def _bytes_feature(value):
28 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
29 |
30 |
31 | def read_xml_gtbox_and_label(xml_path):
32 | """
33 | :param xml_path: the path of voc xml
34 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 9],
35 | and has [x1, y1, x2, y2, x3, y3, x4, y4, label] in a per row
36 | """
37 |
38 | tree = ET.parse(xml_path)
39 | root = tree.getroot()
40 | img_width = None
41 | img_height = None
42 | box_list = []
43 | for child_of_root in root:
44 | # if child_of_root.tag == 'filename':
45 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
46 | # + FLAGS.img_format, 'xml_name and img_name cannot match'
47 |
48 | if child_of_root.tag == 'size':
49 | for child_item in child_of_root:
50 | if child_item.tag == 'width':
51 | img_width = int(child_item.text)
52 | if child_item.tag == 'height':
53 | img_height = int(child_item.text)
54 |
55 | if child_of_root.tag == 'object':
56 | label = None
57 | for child_item in child_of_root:
58 | if child_item.tag == 'name':
59 | label = NAME_LABEL_MAP[child_item.text]
60 | if child_item.tag == 'bndbox':
61 | tmp_box = []
62 | for node in child_item:
63 | tmp_box.append(int(node.text))
64 | assert label is not None, 'label is none, error'
65 | tmp_box.append(label)
66 | box_list.append(tmp_box)
67 |
68 | gtbox_label = np.array(box_list, dtype=np.int32)
69 |
70 | return img_height, img_width, gtbox_label
71 |
72 |
73 | def convert_pascal_to_tfrecord():
74 | xml_path = FLAGS.VOC_dir + FLAGS.xml_dir
75 | image_path = FLAGS.VOC_dir + FLAGS.image_dir
76 | save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
77 | mkdir(FLAGS.save_dir)
78 |
79 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
80 | # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
81 | writer = tf.python_io.TFRecordWriter(path=save_path)
82 | for count, xml in enumerate(glob.glob(xml_path + '/*.xml')):
83 | # to avoid path error in different development platform
84 | xml = xml.replace('\\', '/')
85 |
86 | img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
87 | img_path = image_path + '/' + img_name
88 |
89 | if not os.path.exists(img_path):
90 | print('{} is not exist!'.format(img_path))
91 | continue
92 |
93 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)
94 |
95 | # img = np.array(Image.open(img_path))
96 | img = cv2.imread(img_path)[:, :, ::-1]
97 |
98 | feature = tf.train.Features(feature={
99 | # do not need encode() in linux
100 | # 'img_name': _bytes_feature(img_name.encode()),
101 | 'img_name': _bytes_feature(img_name),
102 | 'img_height': _int64_feature(img_height),
103 | 'img_width': _int64_feature(img_width),
104 | 'img': _bytes_feature(img.tostring()),
105 | 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
106 | 'num_objects': _int64_feature(gtbox_label.shape[0])
107 | })
108 |
109 | example = tf.train.Example(features=feature)
110 |
111 | writer.write(example.SerializeToString())
112 |
113 | view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml')))
114 |
115 | print('\nConversion is complete!')
116 |
117 |
118 | if __name__ == '__main__':
119 | # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
120 | # read_xml_gtbox_and_label(xml_path)
121 |
122 | convert_pascal_to_tfrecord()
123 |
--------------------------------------------------------------------------------
/data/io/convert_data_to_tfrecord_raw.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | '''
4 | this file is to convert pascal to tfrecord
5 | '''
6 |
7 | import numpy as np
8 | import cv2
9 | import os, sys
10 | import tensorflow as tf
11 | import xml.etree.cElementTree as ET
12 | from libs.label_name_dict.label_dict import NAME_LABEL_MAP
13 |
14 |
15 | tf.app.flags.DEFINE_string('VOC_dir', '/home/yjr/DataSet/VOC', 'Voc dir ')
16 | FLAGS = tf.app.flags.FLAGS
17 |
18 | def _int64_feature(value):
19 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
20 |
21 | def _bytes_feature(value):
22 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
23 |
24 | def read_xml_target_box_and_label(xml_path):
25 | '''
26 |
27 | :param xml_path:
28 | :return:img_height, img_width, gtboxes
29 | gtboxes is a array of shape [num_of_gtboxes, 5]
30 | a row in gtboxes is [xmin. ymin. xmax, ymax, label]
31 | '''
32 | tree = ET.parse(xml_path)
33 | root = tree.getroot()
34 | img_width = None
35 | img_height = None
36 | box_list = []
37 | for child_of_root in root:
38 | if child_of_root.tag == 'filename':
39 | assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] + '.jpg', 'xml_name and img_name cannot match'
40 | if child_of_root.tag == 'size':
41 | for child_item in child_of_root:
42 | if child_item.tag == 'width':
43 | img_width = int(child_item.text)
44 | if child_item.tag == 'height':
45 | img_height = int(child_item.text)
46 | if child_of_root.tag == 'object':
47 | label = None
48 | for child_item in child_of_root:
49 | if child_item.tag == 'name':
50 | # print child_item.text
51 | label = NAME_LABEL_MAP[child_item.text]
52 | if child_item.tag == 'bndbox':
53 | tmp_box = []
54 | for node in child_item:
55 | tmp_box.append(int(node.text)) # [xmin, ymin. xmax, ymax]
56 | assert label is not None, 'label is none, error'
57 | tmp_box.append(label) #[xmin, ymin, xmax, ymax, label]
58 | box_list.append(tmp_box)
59 |
60 | gtbox_list = np.array(box_list, dtype=np.int32) # [xmin, ymin, xmax, ymax, label]
61 |
62 | xmin, ymin, xmax, ymax, label = gtbox_list[:, 0], gtbox_list[:, 1], gtbox_list[:, 2], gtbox_list[:, 3],\
63 | gtbox_list[:, 4]
64 |
65 | gtbox_list = np.transpose(np.stack([xmin, ymin, xmax, ymax, label], axis=0)) # [xmin, ymin, xmax, ymax, label]
66 | # print gtbox_list.shape
67 | return img_height, img_width, gtbox_list
68 |
69 | def convert_pascal(dataset_name):
70 |
71 | dataset_rootdir = os.path.join(FLAGS.VOC_dir, 'VOCtrain_val/VOC2007') if dataset_name == 'train' \
72 | else os.path.join(FLAGS.VOC_dir, 'VOC_test/VOC2007')
73 |
74 | imgname_list = []
75 | part_name = 'trainval.txt' if dataset_name == 'train' else 'test.txt'
76 | with open(os.path.join(dataset_rootdir, 'ImageSets/Main/aeroplane_'+part_name)) as f:
77 | all_lines = f.readlines()
78 |
79 | for a_line in all_lines:
80 | imgname_list.append(a_line.split()[0].strip())
81 |
82 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
83 | # writer = tf.python_io.TFRecordWriter(path='../data/tfrecords/pascal_'+dataset_name+'.tfrecord', options=writer_options)
84 | writer = tf.python_io.TFRecordWriter(path='../tfrecord/pascal_' + dataset_name + '.tfrecord')
85 | for i, img_name in enumerate(imgname_list):
86 | img_np = cv2.imread(os.path.join(dataset_rootdir, 'JPEGImages/'+img_name+'.jpg'))
87 | # if img_np == None:
88 | # print img_name
89 | img_np = img_np[:, :, ::-1]
90 | assert img_np is not None, 'read img erro, imgnp is None'
91 | xml_path = os.path.join(dataset_rootdir, 'Annotations/'+img_name+'.xml')
92 | img_height, img_width, gtboxes = read_xml_target_box_and_label(xml_path)
93 |
94 | example = tf.train.Example(features=tf.train.Features(feature={
95 | 'img_name': _bytes_feature(img_name),
96 | 'img_height': _int64_feature(img_height),
97 | 'img_width': _int64_feature(img_width),
98 | 'img': _bytes_feature(img_np.tostring()),
99 | 'gtboxes_and_label': _bytes_feature(gtboxes.tostring()),
100 | 'num_objects': _int64_feature(gtboxes.shape[0])
101 | }))
102 | writer.write(example.SerializeToString())
103 | if i % 100 == 0:
104 | print('{} {} imgs convert over'.format(i, dataset_name))
105 | print(20*"@")
106 | print('all {} imgs convert over, the num is {}'.format(dataset_name, i))
107 |
108 | if __name__ == '__main__':
109 | # w, h, gtboxes = read_xml_target_box_and_label('/home/yjr/DataSet/VOC/VOCtrain_val/VOC2007/Annotations/000005.xml')
110 | # print w, h
111 | # print gtboxes
112 | convert_pascal('train')
113 | convert_pascal('test')
114 |
115 |
--------------------------------------------------------------------------------
/data/io/image_preprocess.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import tensorflow as tf
8 |
9 | import numpy as np
10 |
11 |
12 | def max_length_limitation(length, length_limitation):
13 | return tf.cond(tf.less(length, length_limitation),
14 | true_fn=lambda: length,
15 | false_fn=lambda: length_limitation)
16 |
17 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200):
18 | '''
19 |
20 | :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5]. gtboxes: [xmin, ymin, xmax, ymax]
21 | :param target_shortside_len:
22 | :param length_limitation: set max length to avoid OUT OF MEMORY
23 | :return:
24 | '''
25 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
26 | new_h, new_w = tf.cond(tf.less(img_h, img_w),
27 | true_fn=lambda: (target_shortside_len,
28 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
29 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
30 | target_shortside_len))
31 |
32 | img_tensor = tf.expand_dims(img_tensor, axis=0)
33 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
34 |
35 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
36 |
37 | new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h
38 | new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h
39 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3
40 |
41 | return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0))
42 |
43 |
44 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200):
45 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
46 |
47 | new_h, new_w = tf.cond(tf.less(img_h, img_w),
48 | true_fn=lambda: (target_shortside_len,
49 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
50 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
51 | target_shortside_len))
52 |
53 | img_tensor = tf.expand_dims(img_tensor, axis=0)
54 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
55 |
56 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3
57 | return img_tensor
58 |
59 | def flip_left_to_right(img_tensor, gtboxes_and_label):
60 |
61 | h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
62 |
63 | img_tensor = tf.image.flip_left_right(img_tensor)
64 |
65 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
66 | new_xmax = w - xmin
67 | new_xmin = w - xmax
68 |
69 | return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0))
70 |
71 | def random_flip_left_right(img_tensor, gtboxes_and_label):
72 | img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5),
73 | lambda: flip_left_to_right(img_tensor, gtboxes_and_label),
74 | lambda: (img_tensor, gtboxes_and_label))
75 |
76 | return img_tensor, gtboxes_and_label
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/data/io/image_preprocess.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/image_preprocess.pyc
--------------------------------------------------------------------------------
/data/io/read_tfrecord.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import numpy as np
8 | import tensorflow as tf
9 | import os
10 | from data.io import image_preprocess
11 | from libs.configs import cfgs
12 |
13 | def read_single_example_and_decode(filename_queue):
14 |
15 | # tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
16 |
17 | # reader = tf.TFRecordReader(options=tfrecord_options)
18 | reader = tf.TFRecordReader()
19 | _, serialized_example = reader.read(filename_queue)
20 |
21 | features = tf.parse_single_example(
22 | serialized=serialized_example,
23 | features={
24 | 'img_name': tf.FixedLenFeature([], tf.string),
25 | 'img_height': tf.FixedLenFeature([], tf.int64),
26 | 'img_width': tf.FixedLenFeature([], tf.int64),
27 | 'img': tf.FixedLenFeature([], tf.string),
28 | 'gtboxes_and_label': tf.FixedLenFeature([], tf.string),
29 | 'num_objects': tf.FixedLenFeature([], tf.int64)
30 | }
31 | )
32 | img_name = features['img_name']
33 | img_height = tf.cast(features['img_height'], tf.int32)
34 | img_width = tf.cast(features['img_width'], tf.int32)
35 | img = tf.decode_raw(features['img'], tf.uint8)
36 |
37 | img = tf.reshape(img, shape=[img_height, img_width, 3])
38 |
39 | gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32)
40 | gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5])
41 |
42 | num_objects = tf.cast(features['num_objects'], tf.int32)
43 | return img_name, img, gtboxes_and_label, num_objects
44 |
45 |
46 | def read_and_prepocess_single_img(filename_queue, shortside_len, is_training):
47 |
48 | img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue)
49 |
50 | img = tf.cast(img, tf.float32)
51 |
52 | if is_training:
53 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label,
54 | target_shortside_len=shortside_len,
55 | length_limitation=cfgs.IMG_MAX_LENGTH)
56 | img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img,
57 | gtboxes_and_label=gtboxes_and_label)
58 |
59 | else:
60 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label,
61 | target_shortside_len=shortside_len,
62 | length_limitation=cfgs.IMG_MAX_LENGTH)
63 | img = img - tf.constant([[cfgs.PIXEL_MEAN]]) # sub pixel mean at last
64 | return img_name, img, gtboxes_and_label, num_objects
65 |
66 |
67 | def next_batch(dataset_name, batch_size, shortside_len, is_training):
68 | '''
69 | :return:
70 | img_name_batch: shape(1, 1)
71 | img_batch: shape:(1, new_imgH, new_imgW, C)
72 | gtboxes_and_label_batch: shape(1, Num_Of_objects, 5] .each row is [x1, y1, x2, y2, label]
73 | '''
74 | assert batch_size == 1, "we only support batch_size is 1.We may support large batch_size in the future"
75 |
76 | if dataset_name not in ['ship', 'spacenet', 'pascal', 'coco']:
77 | raise ValueError('dataSet name must be in pascal, coco spacenet and ship')
78 |
79 | if is_training:
80 | pattern = os.path.join('../data/tfrecord', dataset_name + '_train*')
81 | else:
82 | pattern = os.path.join('../data/tfrecord', dataset_name + '_test*')
83 |
84 | print('tfrecord path is -->', os.path.abspath(pattern))
85 |
86 | filename_tensorlist = tf.train.match_filenames_once(pattern)
87 |
88 | filename_queue = tf.train.string_input_producer(filename_tensorlist)
89 |
90 | img_name, img, gtboxes_and_label, num_obs = read_and_prepocess_single_img(filename_queue, shortside_len,
91 | is_training=is_training)
92 | img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch = \
93 | tf.train.batch(
94 | [img_name, img, gtboxes_and_label, num_obs],
95 | batch_size=batch_size,
96 | capacity=1,
97 | num_threads=1,
98 | dynamic_pad=True)
99 | return img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch
100 |
--------------------------------------------------------------------------------
/data/io/read_tfrecord.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/read_tfrecord.pyc
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | # install pycocotools locally
3 | python setup.py build_ext --inplace
4 | rm -rf build
5 |
6 | install:
7 | # install pycocotools to the Python site-packages
8 | python setup.py build_ext install
9 | rm -rf build
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/__init__.py
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/__init__.pyc
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocoEvalDemo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%matplotlib inline\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "from pycocotools.coco import COCO\n",
14 | "from pycocotools.cocoeval import COCOeval\n",
15 | "import numpy as np\n",
16 | "import skimage.io as io\n",
17 | "import pylab\n",
18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "Running demo for *bbox* results.\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "annType = ['segm','bbox','keypoints']\n",
38 | "annType = annType[1] #specify type here\n",
39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n",
40 | "print 'Running demo for *%s* results.'%(annType)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 3,
46 | "metadata": {
47 | "collapsed": false
48 | },
49 | "outputs": [
50 | {
51 | "name": "stdout",
52 | "output_type": "stream",
53 | "text": [
54 | "loading annotations into memory...\n",
55 | "Done (t=8.01s)\n",
56 | "creating index...\n",
57 | "index created!\n"
58 | ]
59 | }
60 | ],
61 | "source": [
62 | "#initialize COCO ground truth api\n",
63 | "dataDir='../'\n",
64 | "dataType='val2014'\n",
65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n",
66 | "cocoGt=COCO(annFile)"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {
73 | "collapsed": false
74 | },
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | "Loading and preparing results... \n",
81 | "DONE (t=0.05s)\n",
82 | "creating index...\n",
83 | "index created!\n"
84 | ]
85 | }
86 | ],
87 | "source": [
88 | "#initialize COCO detections api\n",
89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n",
90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n",
91 | "cocoDt=cocoGt.loadRes(resFile)"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 5,
97 | "metadata": {
98 | "collapsed": false
99 | },
100 | "outputs": [],
101 | "source": [
102 | "imgIds=sorted(cocoGt.getImgIds())\n",
103 | "imgIds=imgIds[0:100]\n",
104 | "imgId = imgIds[np.random.randint(100)]"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 6,
110 | "metadata": {
111 | "collapsed": false
112 | },
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "Running per image evaluation... \n",
119 | "DONE (t=0.46s).\n",
120 | "Accumulating evaluation results... \n",
121 | "DONE (t=0.38s).\n",
122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n",
123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n",
124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n",
125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n",
126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n",
127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n",
128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n",
129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n",
130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n",
131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n",
132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n",
133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n"
134 | ]
135 | }
136 | ],
137 | "source": [
138 | "# running evaluation\n",
139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n",
140 | "cocoEval.params.imgIds = imgIds\n",
141 | "cocoEval.evaluate()\n",
142 | "cocoEval.accumulate()\n",
143 | "cocoEval.summarize()"
144 | ]
145 | }
146 | ],
147 | "metadata": {
148 | "kernelspec": {
149 | "display_name": "Python 2",
150 | "language": "python",
151 | "name": "python2"
152 | },
153 | "language_info": {
154 | "codemirror_mode": {
155 | "name": "ipython",
156 | "version": 2
157 | },
158 | "file_extension": ".py",
159 | "mimetype": "text/x-python",
160 | "name": "python",
161 | "nbconvert_exporter": "python",
162 | "pygments_lexer": "ipython2",
163 | "version": "2.7.10"
164 | }
165 | },
166 | "nbformat": 4,
167 | "nbformat_minor": 0
168 | }
169 |
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/__init__.pyc
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/_mask.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/_mask.so
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/coco.pyc
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | import pycocotools._mask as _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | iou = _mask.iou
77 | merge = _mask.merge
78 | frPyObjects = _mask.frPyObjects
79 |
80 | def encode(bimask):
81 | if len(bimask.shape) == 3:
82 | return _mask.encode(bimask)
83 | elif len(bimask.shape) == 2:
84 | h, w = bimask.shape
85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
86 |
87 | def decode(rleObjs):
88 | if type(rleObjs) == list:
89 | return _mask.decode(rleObjs)
90 | else:
91 | return _mask.decode([rleObjs])[:,:,0]
92 |
93 | def area(rleObjs):
94 | if type(rleObjs) == list:
95 | return _mask.area(rleObjs)
96 | else:
97 | return _mask.area([rleObjs])[0]
98 |
99 | def toBbox(rleObjs):
100 | if type(rleObjs) == list:
101 | return _mask.toBbox(rleObjs)
102 | else:
103 | return _mask.toBbox([rleObjs])[0]
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/mask.pyc
--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, Extension
2 | import numpy as np
3 |
4 | # To compile and install locally run "python setup.py build_ext --inplace"
5 | # To install library to Python site-packages run "python setup.py build_ext install"
6 |
7 | ext_modules = [
8 | Extension(
9 | 'pycocotools._mask',
10 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'],
11 | include_dirs = [np.get_include(), '../common'],
12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
13 | )
14 | ]
15 |
16 | setup(
17 | name='pycocotools',
18 | packages=['pycocotools'],
19 | package_dir = {'pycocotools': 'pycocotools'},
20 | install_requires=[
21 | 'setuptools>=18.0',
22 | 'cython>=0.27.3',
23 | 'matplotlib>=2.1.0'
24 | ],
25 | version='2.0',
26 | ext_modules= ext_modules
27 | )
28 |
--------------------------------------------------------------------------------
/data/lib_coco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/__init__.py
--------------------------------------------------------------------------------
/data/lib_coco/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/__init__.pyc
--------------------------------------------------------------------------------
/data/lib_coco/common/gason.h:
--------------------------------------------------------------------------------
1 | // https://github.com/vivkin/gason - pulled January 10, 2016
2 | #pragma once
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | enum JsonTag {
9 | JSON_NUMBER = 0,
10 | JSON_STRING,
11 | JSON_ARRAY,
12 | JSON_OBJECT,
13 | JSON_TRUE,
14 | JSON_FALSE,
15 | JSON_NULL = 0xF
16 | };
17 |
18 | struct JsonNode;
19 |
20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
22 | #define JSON_VALUE_TAG_MASK 0xF
23 | #define JSON_VALUE_TAG_SHIFT 47
24 |
25 | union JsonValue {
26 | uint64_t ival;
27 | double fval;
28 |
29 | JsonValue(double x)
30 | : fval(x) {
31 | }
32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
35 | }
36 | bool isDouble() const {
37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
38 | }
39 | JsonTag getTag() const {
40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
41 | }
42 | uint64_t getPayload() const {
43 | assert(!isDouble());
44 | return ival & JSON_VALUE_PAYLOAD_MASK;
45 | }
46 | double toNumber() const {
47 | assert(getTag() == JSON_NUMBER);
48 | return fval;
49 | }
50 | char *toString() const {
51 | assert(getTag() == JSON_STRING);
52 | return (char *)getPayload();
53 | }
54 | JsonNode *toNode() const {
55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
56 | return (JsonNode *)getPayload();
57 | }
58 | };
59 |
60 | struct JsonNode {
61 | JsonValue value;
62 | JsonNode *next;
63 | char *key;
64 | };
65 |
66 | struct JsonIterator {
67 | JsonNode *p;
68 |
69 | void operator++() {
70 | p = p->next;
71 | }
72 | bool operator!=(const JsonIterator &x) const {
73 | return p != x.p;
74 | }
75 | JsonNode *operator*() const {
76 | return p;
77 | }
78 | JsonNode *operator->() const {
79 | return p;
80 | }
81 | };
82 |
83 | inline JsonIterator begin(JsonValue o) {
84 | return JsonIterator{o.toNode()};
85 | }
86 | inline JsonIterator end(JsonValue) {
87 | return JsonIterator{nullptr};
88 | }
89 |
90 | #define JSON_ERRNO_MAP(XX) \
91 | XX(OK, "ok") \
92 | XX(BAD_NUMBER, "bad number") \
93 | XX(BAD_STRING, "bad string") \
94 | XX(BAD_IDENTIFIER, "bad identifier") \
95 | XX(STACK_OVERFLOW, "stack overflow") \
96 | XX(STACK_UNDERFLOW, "stack underflow") \
97 | XX(MISMATCH_BRACKET, "mismatch bracket") \
98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \
99 | XX(UNQUOTED_KEY, "unquoted key") \
100 | XX(BREAKING_BAD, "breaking bad") \
101 | XX(ALLOCATION_FAILURE, "allocation failure")
102 |
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 | JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 |
109 | const char *jsonStrError(int err);
110 |
111 | class JsonAllocator {
112 | struct Zone {
113 | Zone *next;
114 | size_t used;
115 | } *head = nullptr;
116 |
117 | public:
118 | JsonAllocator() = default;
119 | JsonAllocator(const JsonAllocator &) = delete;
120 | JsonAllocator &operator=(const JsonAllocator &) = delete;
121 | JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 | x.head = nullptr;
123 | }
124 | JsonAllocator &operator=(JsonAllocator &&x) {
125 | head = x.head;
126 | x.head = nullptr;
127 | return *this;
128 | }
129 | ~JsonAllocator() {
130 | deallocate();
131 | }
132 | void *allocate(size_t size);
133 | void deallocate();
134 | };
135 |
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 |
--------------------------------------------------------------------------------
/data/lib_coco/common/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 |
9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 |
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 |
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 |
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 |
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 |
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 |
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 |
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 |
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 |
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 |
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 |
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 |
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 |
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 |
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 |
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 |
--------------------------------------------------------------------------------
/data/lib_coco/get_coco_next_batch.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 | import sys, os
6 | # sys.path.insert(0, os.path.abspath('.'))
7 | sys.path.insert(0, './PythonAPI/')
8 | # sys.path.insert(0, os.path.abspath('data'))
9 | for _ in sys.path:
10 | print (_)
11 | from PythonAPI.pycocotools.coco import COCO
12 | import cv2
13 | import numpy as np
14 | import os
15 | from libs.label_name_dict import coco_dict
16 |
17 |
18 | annotation_path = '/home/yjr/DataSet/COCO/2017/annotations/instances_train2017.json'
19 | print ("load coco .... it will cost about 17s..")
20 | coco = COCO(annotation_path)
21 |
22 | imgId_list = coco.getImgIds()
23 | imgId_list = np.array(imgId_list)
24 |
25 | total_imgs = len(imgId_list)
26 |
27 | # print (NAME_LABEL_DICT)
28 |
29 |
30 | def next_img(step):
31 |
32 | if step % total_imgs == 0:
33 | np.random.shuffle(imgId_list)
34 | imgid = imgId_list[step % total_imgs]
35 |
36 | imgname = coco.loadImgs(ids=[imgid])[0]['file_name']
37 | # print (type(imgname), imgname)
38 | img = cv2.imread(os.path.join("/home/yjr/DataSet/COCO/2017/train2017", imgname))
39 |
40 | annotation = coco.imgToAnns[imgid]
41 | gtbox_and_label_list = []
42 | for ann in annotation:
43 | box = ann['bbox']
44 |
45 | box = [box[0], box[1], box[0]+box[2], box[1]+box[3]] # [xmin, ymin, xmax, ymax]
46 | cat_id = ann['category_id']
47 | cat_name = coco_dict.originID_classes[cat_id] #ID_NAME_DICT[cat_id]
48 | label = coco_dict.NAME_LABEL_MAP[cat_name]
49 | gtbox_and_label_list.append(box + [label])
50 | gtbox_and_label_list = np.array(gtbox_and_label_list, dtype=np.int32)
51 | # print (img.shape, gtbox_and_label_list.shape)
52 | if gtbox_and_label_list.shape[0] == 0:
53 | return next_img(step+1)
54 | else:
55 | return imgid, img[:, :, ::-1], gtbox_and_label_list
56 |
57 |
58 | if __name__ == '__main__':
59 |
60 | imgid, img, gtbox = next_img(3234)
61 |
62 | print("::")
63 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores
64 |
65 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1],
66 | scores=np.ones(shape=(len(gtbox), )))
67 | print ("_----")
68 |
69 |
70 | cv2.imshow("test", img)
71 | cv2.waitKey(0)
72 |
73 |
74 |
--------------------------------------------------------------------------------
/data/lib_coco/get_coco_next_batch.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/get_coco_next_batch.pyc
--------------------------------------------------------------------------------
/data/pretrained_weights/README.md:
--------------------------------------------------------------------------------
1 | Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)、[resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights.
--------------------------------------------------------------------------------
/data/pretrained_weights/mobilenet/README.md:
--------------------------------------------------------------------------------
1 | Please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to data/pretrained_weights/mobilenet.
--------------------------------------------------------------------------------
/data/tfrecord/pascal_test.tfrecord:
--------------------------------------------------------------------------------
1 | /home/yjr/PycharmProjects/Faster-RCNN_TensorflowOLD/data/tfrecord/pascal_test.tfrecord
--------------------------------------------------------------------------------
/data/tfrecord/pascal_train.tfrecord:
--------------------------------------------------------------------------------
1 | /home/yjr/PycharmProjects/Faster-RCNN_TensorflowOLD/data/tfrecord/pascal_train.tfrecord
--------------------------------------------------------------------------------
/help_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__init__.py
--------------------------------------------------------------------------------
/help_utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__init__.pyc
--------------------------------------------------------------------------------
/help_utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/help_utils/__pycache__/tools.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__pycache__/tools.cpython-35.pyc
--------------------------------------------------------------------------------
/help_utils/tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import division, print_function, absolute_import
3 | import math
4 | import sys
5 | import os
6 |
7 |
8 | def view_bar(message, num, total):
9 | rate = num / total
10 | rate_num = int(rate * 40)
11 | rate_nums = math.ceil(rate * 100)
12 | r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,)
13 | sys.stdout.write(r)
14 | sys.stdout.flush()
15 |
16 |
17 | def mkdir(path):
18 | if not os.path.exists(path):
19 | os.makedirs(path)
--------------------------------------------------------------------------------
/help_utils/tools.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/tools.pyc
--------------------------------------------------------------------------------
/images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/images.png
--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__init__.py
--------------------------------------------------------------------------------
/libs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__init__.pyc
--------------------------------------------------------------------------------
/libs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/box_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/__init__.py
--------------------------------------------------------------------------------
/libs/box_utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/__init__.pyc
--------------------------------------------------------------------------------
/libs/box_utils/anchor_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import, print_function, division
3 |
4 | import tensorflow as tf
5 |
6 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios,
7 | featuremap_height, featuremap_width,
8 | stride, name='make_anchors'):
9 | '''
10 | :param base_anchor_size:256
11 | :param anchor_scales:
12 | :param anchor_ratios:
13 | :param featuremap_height:
14 | :param featuremap_width:
15 | :param stride:
16 | :return:
17 | '''
18 | with tf.variable_scope(name):
19 | base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32) # [x_center, y_center, w, h]
20 |
21 | ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales),
22 | anchor_ratios) # per locations ws and hs
23 |
24 | x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride
25 | y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride
26 |
27 | x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
28 |
29 | ws, x_centers = tf.meshgrid(ws, x_centers)
30 | hs, y_centers = tf.meshgrid(hs, y_centers)
31 |
32 | anchor_centers = tf.stack([x_centers, y_centers], 2)
33 | anchor_centers = tf.reshape(anchor_centers, [-1, 2])
34 |
35 | box_sizes = tf.stack([ws, hs], axis=2)
36 | box_sizes = tf.reshape(box_sizes, [-1, 2])
37 | # anchors = tf.concat([anchor_centers, box_sizes], axis=1)
38 | anchors = tf.concat([anchor_centers - 0.5*box_sizes,
39 | anchor_centers + 0.5*box_sizes], axis=1)
40 | return anchors
41 |
42 |
43 | def enum_scales(base_anchor, anchor_scales):
44 |
45 | anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1))
46 |
47 | return anchor_scales
48 |
49 |
50 | def enum_ratios(anchors, anchor_ratios):
51 | '''
52 | ratio = h /w
53 | :param anchors:
54 | :param anchor_ratios:
55 | :return:
56 | '''
57 | ws = anchors[:, 2] # for base anchor: w == h
58 | hs = anchors[:, 3]
59 | sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios))
60 |
61 | ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1])
62 | hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1])
63 |
64 | return ws, hs
65 |
66 |
67 |
--------------------------------------------------------------------------------
/libs/box_utils/anchor_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/anchor_utils.pyc
--------------------------------------------------------------------------------
/libs/box_utils/boxes_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import tensorflow as tf
8 |
9 | def ious_calu(boxes_1, boxes_2):
10 | '''
11 |
12 | :param boxes_1: [N, 4] [xmin, ymin, xmax, ymax]
13 | :param boxes_2: [M, 4] [xmin, ymin. xmax, ymax]
14 | :return:
15 | '''
16 | boxes_1 = tf.cast(boxes_1, tf.float32)
17 | boxes_2 = tf.cast(boxes_2, tf.float32)
18 | xmin_1, ymin_1, xmax_1, ymax_1 = tf.split(boxes_1, 4, axis=1) # xmin_1 shape is [N, 1]..
19 | xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1) # xmin_2 shape is [M, ]..
20 |
21 | max_xmin = tf.maximum(xmin_1, xmin_2)
22 | min_xmax = tf.minimum(xmax_1, xmax_2)
23 |
24 | max_ymin = tf.maximum(ymin_1, ymin_2)
25 | min_ymax = tf.minimum(ymax_1, ymax_2)
26 |
27 | overlap_h = tf.maximum(0., min_ymax - max_ymin) # avoid h < 0
28 | overlap_w = tf.maximum(0., min_xmax - max_xmin)
29 |
30 | overlaps = overlap_h * overlap_w
31 |
32 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1]
33 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ]
34 |
35 | ious = overlaps / (area_1 + area_2 - overlaps)
36 |
37 | return ious
38 |
39 |
40 | def clip_boxes_to_img_boundaries(decode_boxes, img_shape):
41 | '''
42 |
43 | :param decode_boxes:
44 | :return: decode boxes, and already clip to boundaries
45 | '''
46 |
47 | with tf.name_scope('clip_boxes_to_img_boundaries'):
48 |
49 | # xmin, ymin, xmax, ymax = tf.unstack(decode_boxes, axis=1)
50 | xmin = decode_boxes[:, 0]
51 | ymin = decode_boxes[:, 1]
52 | xmax = decode_boxes[:, 2]
53 | ymax = decode_boxes[:, 3]
54 | img_h, img_w = img_shape[1], img_shape[2]
55 |
56 | img_h, img_w = tf.cast(img_h, tf.float32), tf.cast(img_w, tf.float32)
57 |
58 | xmin = tf.maximum(tf.minimum(xmin, img_w-1.), 0.)
59 | ymin = tf.maximum(tf.minimum(ymin, img_h-1.), 0.)
60 |
61 | xmax = tf.maximum(tf.minimum(xmax, img_w-1.), 0.)
62 | ymax = tf.maximum(tf.minimum(ymax, img_h-1.), 0.)
63 |
64 | return tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
65 |
66 |
67 | def filter_outside_boxes(boxes, img_h, img_w):
68 | '''
69 | :param anchors:boxes with format [xmin, ymin, xmax, ymax]
70 | :param img_h: height of image
71 | :param img_w: width of image
72 | :return: indices of anchors that inside the image boundary
73 | '''
74 |
75 | with tf.name_scope('filter_outside_boxes'):
76 | xmin, ymin, xmax, ymax = tf.unstack(boxes, axis=1)
77 |
78 | xmin_index = tf.greater_equal(xmin, 0)
79 | ymin_index = tf.greater_equal(ymin, 0)
80 | xmax_index = tf.less_equal(xmax, tf.cast(img_w, tf.float32))
81 | ymax_index = tf.less_equal(ymax, tf.cast(img_h, tf.float32))
82 |
83 | indices = tf.transpose(tf.stack([xmin_index, ymin_index, xmax_index, ymax_index]))
84 | indices = tf.cast(indices, dtype=tf.int32)
85 | indices = tf.reduce_sum(indices, axis=1)
86 | indices = tf.where(tf.equal(indices, 4))
87 | # indices = tf.equal(indices, 4)
88 | return tf.reshape(indices, [-1])
89 |
90 |
91 | def padd_boxes_with_zeros(boxes, scores, max_num_of_boxes):
92 |
93 | '''
94 | num of boxes less than max num of boxes, so it need to pad with zeros[0, 0, 0, 0]
95 | :param boxes:
96 | :param scores: [-1]
97 | :param max_num_of_boxes:
98 | :return:
99 | '''
100 |
101 | pad_num = tf.cast(max_num_of_boxes, tf.int32) - tf.shape(boxes)[0]
102 |
103 | zero_boxes = tf.zeros(shape=[pad_num, 4], dtype=boxes.dtype)
104 | zero_scores = tf.zeros(shape=[pad_num], dtype=scores.dtype)
105 |
106 | final_boxes = tf.concat([boxes, zero_boxes], axis=0)
107 |
108 | final_scores = tf.concat([scores, zero_scores], axis=0)
109 |
110 | return final_boxes, final_scores
--------------------------------------------------------------------------------
/libs/box_utils/boxes_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/boxes_utils.pyc
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 | clean:
5 | rm -rf */*.pyc
6 | rm -rf */*.so
7 |
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/__init__.py
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/__init__.pyc
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/cython_bbox.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/cython_bbox.so
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/cython_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/cython_nms.so
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
76 |
77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
79 |
80 | cdef int ndets = dets.shape[0]
81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
82 | np.zeros((ndets), dtype=np.int)
83 |
84 | # nominal indices
85 | cdef int _i, _j
86 | # sorted indices
87 | cdef int i, j
88 | # temp variables for box i's (the box currently under consideration)
89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
90 | # variables for computing overlap with box j (lower scoring box)
91 | cdef np.float32_t xx1, yy1, xx2, yy2
92 | cdef np.float32_t w, h
93 | cdef np.float32_t inter, ovr
94 |
95 | keep = []
96 | for _i in range(ndets):
97 | i = order[_i]
98 | if suppressed[i] == 1:
99 | continue
100 | keep.append(i)
101 | ix1 = x1[i]
102 | iy1 = y1[i]
103 | ix2 = x2[i]
104 | iy2 = y2[i]
105 | iarea = areas[i]
106 | for _j in range(_i + 1, ndets):
107 | j = order[_j]
108 | if suppressed[j] == 1:
109 | continue
110 | xx1 = max(ix1, x1[j])
111 | yy1 = max(iy1, y1[j])
112 | xx2 = min(ix2, x2[j])
113 | yy2 = min(iy2, y2[j])
114 | w = max(0.0, xx2 - xx1 + 1)
115 | h = max(0.0, yy2 - yy1 + 1)
116 | inter = w * h
117 | ovr = inter / (iarea + areas[j] - inter)
118 | ovr1 = inter / iarea
119 | ovr2 = inter / areas[j]
120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 | suppressed[j] = 1
122 |
123 | return keep
124 |
--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/setup.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | import numpy as np
11 | from distutils.core import setup
12 | from distutils.extension import Extension
13 | from Cython.Distutils import build_ext
14 |
15 | def find_in_path(name, path):
16 | "Find a file in a search path"
17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
18 | for dir in path.split(os.pathsep):
19 | binpath = pjoin(dir, name)
20 | if os.path.exists(binpath):
21 | return os.path.abspath(binpath)
22 | return None
23 |
24 | def locate_cuda():
25 | """Locate the CUDA environment on the system
26 |
27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
28 | and values giving the absolute path to each directory.
29 |
30 | Starts by looking for the CUDAHOME env variable. If not found, everything
31 | is based on finding 'nvcc' in the PATH.
32 | """
33 |
34 | # first check if the CUDAHOME env variable is in use
35 | if 'CUDAHOME' in os.environ:
36 | home = os.environ['CUDAHOME']
37 | nvcc = pjoin(home, 'bin', 'nvcc')
38 | else:
39 | # otherwise, search the PATH for NVCC
40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
42 | if nvcc is None:
43 | raise EnvironmentError('The nvcc binary could not be '
44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
45 | home = os.path.dirname(os.path.dirname(nvcc))
46 |
47 | cudaconfig = {'home':home, 'nvcc':nvcc,
48 | 'include': pjoin(home, 'include'),
49 | 'lib64': pjoin(home, 'lib64')}
50 | for k, v in cudaconfig.items():
51 | if not os.path.exists(v):
52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
53 |
54 | return cudaconfig
55 | CUDA = locate_cuda()
56 |
57 | # Obtain the numpy include directory. This logic works across numpy versions.
58 | try:
59 | numpy_include = np.get_include()
60 | except AttributeError:
61 | numpy_include = np.get_numpy_include()
62 |
63 | def customize_compiler_for_nvcc(self):
64 | """inject deep into distutils to customize how the dispatch
65 | to gcc/nvcc works.
66 |
67 | If you subclass UnixCCompiler, it's not trivial to get your subclass
68 | injected in, and still have the right customizations (i.e.
69 | distutils.sysconfig.customize_compiler) run on it. So instead of going
70 | the OO route, I have this. Note, it's kindof like a wierd functional
71 | subclassing going on."""
72 |
73 | # tell the compiler it can processes .cu
74 | self.src_extensions.append('.cu')
75 |
76 | # save references to the default compiler_so and _comple methods
77 | default_compiler_so = self.compiler_so
78 | super = self._compile
79 |
80 | # now redefine the _compile method. This gets executed for each
81 | # object but distutils doesn't have the ability to change compilers
82 | # based on source extension: we add it.
83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
84 | print(extra_postargs)
85 | if os.path.splitext(src)[1] == '.cu':
86 | # use the cuda for .cu files
87 | self.set_executable('compiler_so', CUDA['nvcc'])
88 | # use only a subset of the extra_postargs, which are 1-1 translated
89 | # from the extra_compile_args in the Extension class
90 | postargs = extra_postargs['nvcc']
91 | else:
92 | postargs = extra_postargs['gcc']
93 |
94 | super(obj, src, ext, cc_args, postargs, pp_opts)
95 | # reset the default compiler_so, which we might have changed for cuda
96 | self.compiler_so = default_compiler_so
97 |
98 | # inject our redefined _compile method into the class
99 | self._compile = _compile
100 |
101 | # run the customize_compiler
102 | class custom_build_ext(build_ext):
103 | def build_extensions(self):
104 | customize_compiler_for_nvcc(self.compiler)
105 | build_ext.build_extensions(self)
106 |
107 | ext_modules = [
108 | Extension(
109 | "cython_bbox",
110 | ["bbox.pyx"],
111 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
112 | include_dirs = [numpy_include]
113 | ),
114 | Extension(
115 | "cython_nms",
116 | ["nms.pyx"],
117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
118 | include_dirs = [numpy_include]
119 | )
120 | # Extension(
121 | # "cpu_nms",
122 | # ["cpu_nms.pyx"],
123 | # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 | # include_dirs = [numpy_include]
125 | # )
126 | ]
127 |
128 | setup(
129 | name='tf_faster_rcnn',
130 | ext_modules=ext_modules,
131 | # inject our custom trigger
132 | cmdclass={'build_ext': custom_build_ext},
133 | )
134 |
--------------------------------------------------------------------------------
/libs/box_utils/draw_box_in_img.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/draw_box_in_img.pyc
--------------------------------------------------------------------------------
/libs/box_utils/encode_and_decode.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import tensorflow as tf
8 | import numpy as np
9 |
10 |
11 | def decode_boxes(encoded_boxes, reference_boxes, scale_factors=None):
12 | '''
13 |
14 | :param encoded_boxes:[N, 4]
15 | :param reference_boxes: [N, 4] .
16 | :param scale_factors: use for scale.
17 |
18 | in the first stage, reference_boxes are anchors
19 | in the second stage, reference boxes are proposals(decode) produced by first stage
20 | :return:decode boxes [N, 4]
21 | '''
22 |
23 | t_xcenter, t_ycenter, t_w, t_h = tf.unstack(encoded_boxes, axis=1)
24 | if scale_factors:
25 | t_xcenter /= scale_factors[0]
26 | t_ycenter /= scale_factors[1]
27 | t_w /= scale_factors[2]
28 | t_h /= scale_factors[3]
29 |
30 | reference_xmin, reference_ymin, reference_xmax, reference_ymax = tf.unstack(reference_boxes, axis=1)
31 | # reference boxes are anchors in the first stage
32 |
33 | # reference_xcenter = (reference_xmin + reference_xmax) / 2.
34 | # reference_ycenter = (reference_ymin + reference_ymax) / 2.
35 | reference_w = reference_xmax - reference_xmin
36 | reference_h = reference_ymax - reference_ymin
37 | reference_xcenter = reference_xmin + reference_w/2.0
38 | reference_ycenter = reference_ymin + reference_h/2.0
39 |
40 | predict_xcenter = t_xcenter * reference_w + reference_xcenter
41 | predict_ycenter = t_ycenter * reference_h + reference_ycenter
42 | predict_w = tf.exp(t_w) * reference_w
43 | predict_h = tf.exp(t_h) * reference_h
44 |
45 | predict_xmin = predict_xcenter - predict_w / 2.
46 | predict_xmax = predict_xcenter + predict_w / 2.
47 | predict_ymin = predict_ycenter - predict_h / 2.
48 | predict_ymax = predict_ycenter + predict_h / 2.
49 |
50 | return tf.transpose(tf.stack([predict_xmin, predict_ymin,
51 | predict_xmax, predict_ymax]))
52 |
53 |
54 | def encode_boxes(unencode_boxes, reference_boxes, scale_factors=None):
55 | '''
56 |
57 | :param unencode_boxes: [-1, 4]
58 | :param reference_boxes: [-1, 4]
59 | :return: encode_boxes [-1, 4]
60 | '''
61 |
62 | xmin, ymin, xmax, ymax = unencode_boxes[:, 0], unencode_boxes[:, 1], unencode_boxes[:, 2], unencode_boxes[:, 3]
63 |
64 | reference_xmin, reference_ymin, reference_xmax, reference_ymax = reference_boxes[:, 0], reference_boxes[:, 1], \
65 | reference_boxes[:, 2], reference_boxes[:, 3]
66 |
67 | # x_center = (xmin + xmax) / 2.
68 | # y_center = (ymin + ymax) / 2.
69 | w = xmax - xmin + 1e-8
70 | h = ymax - ymin + 1e-8
71 | x_center = xmin + w/2.0
72 | y_center = ymin + h/2.0
73 |
74 | # reference_xcenter = (reference_xmin + reference_xmax) / 2.
75 | # reference_ycenter = (reference_ymin + reference_ymax) / 2.
76 | reference_w = reference_xmax - reference_xmin + 1e-8
77 | reference_h = reference_ymax - reference_ymin + 1e-8
78 | reference_xcenter = reference_xmin + reference_w/2.0
79 | reference_ycenter = reference_ymin + reference_h/2.0
80 | # w + 1e-8 to avoid NaN in division and log below
81 |
82 | t_xcenter = (x_center - reference_xcenter) / reference_w
83 | t_ycenter = (y_center - reference_ycenter) / reference_h
84 | t_w = np.log(w/reference_w)
85 | t_h = np.log(h/reference_h)
86 |
87 | if scale_factors:
88 | t_xcenter *= scale_factors[0]
89 | t_ycenter *= scale_factors[1]
90 | t_w *= scale_factors[2]
91 | t_h *= scale_factors[3]
92 |
93 | return np.transpose(np.stack([t_xcenter, t_ycenter, t_w, t_h], axis=0))
94 |
--------------------------------------------------------------------------------
/libs/box_utils/encode_and_decode.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/encode_and_decode.pyc
--------------------------------------------------------------------------------
/libs/box_utils/show_box_in_tensor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import tensorflow as tf
8 | import numpy as np
9 | import cv2
10 | from libs.label_name_dict.label_dict import LABEL_NAME_MAP
11 |
12 | from libs.configs import cfgs
13 |
14 | from libs.box_utils import draw_box_in_img
15 |
16 | def only_draw_boxes(img_batch, boxes):
17 |
18 | boxes = tf.stop_gradient(boxes)
19 | img_tensor = tf.squeeze(img_batch, 0)
20 | img_tensor = tf.cast(img_tensor, tf.float32)
21 | labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES
22 | scores = tf.zeros_like(labels, dtype=tf.float32)
23 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
24 | inp=[img_tensor, boxes, labels, scores],
25 | Tout=tf.uint8)
26 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) # [batch_size, h, w, c]
27 |
28 | return img_tensor_with_boxes
29 |
30 | def draw_boxes_with_scores(img_batch, boxes, scores):
31 |
32 | boxes = tf.stop_gradient(boxes)
33 | scores = tf.stop_gradient(scores)
34 |
35 | img_tensor = tf.squeeze(img_batch, 0)
36 | img_tensor = tf.cast(img_tensor, tf.float32)
37 | labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES
38 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
39 | inp=[img_tensor, boxes, labels, scores],
40 | Tout=[tf.uint8])
41 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
42 | return img_tensor_with_boxes
43 |
44 | def draw_boxes_with_categories(img_batch, boxes, labels):
45 | boxes = tf.stop_gradient(boxes)
46 |
47 | img_tensor = tf.squeeze(img_batch, 0)
48 | img_tensor = tf.cast(img_tensor, tf.float32)
49 | scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32)
50 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
51 | inp=[img_tensor, boxes, labels, scores],
52 | Tout=[tf.uint8])
53 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
54 | return img_tensor_with_boxes
55 |
56 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores):
57 | boxes = tf.stop_gradient(boxes)
58 | scores = tf.stop_gradient(scores)
59 |
60 | img_tensor = tf.squeeze(img_batch, 0)
61 | img_tensor = tf.cast(img_tensor, tf.float32)
62 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
63 | inp=[img_tensor, boxes, labels, scores],
64 | Tout=[tf.uint8])
65 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
66 | return img_tensor_with_boxes
67 |
68 | if __name__ == "__main__":
69 | print (1)
70 |
71 |
--------------------------------------------------------------------------------
/libs/box_utils/show_box_in_tensor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/show_box_in_tensor.pyc
--------------------------------------------------------------------------------
/libs/box_utils/tf_ops.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 | import tensorflow as tf
6 |
7 | '''
8 | all of these ops are derived from tenosrflow Object Detection API
9 | '''
10 | def indices_to_dense_vector(indices,
11 | size,
12 | indices_value=1.,
13 | default_value=0,
14 | dtype=tf.float32):
15 | """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros.
16 |
17 | This function exists because it is unclear if it is safe to use
18 | tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
19 | with indices which are not ordered.
20 | This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
21 |
22 | Args:
23 | indices: 1d Tensor with integer indices which are to be set to
24 | indices_values.
25 | size: scalar with size (integer) of output Tensor.
26 | indices_value: values of elements specified by indices in the output vector
27 | default_value: values of other elements in the output vector.
28 | dtype: data type.
29 |
30 | Returns:
31 | dense 1D Tensor of shape [size] with indices set to indices_values and the
32 | rest set to default_value.
33 | """
34 | size = tf.to_int32(size)
35 | zeros = tf.ones([size], dtype=dtype) * default_value
36 | values = tf.ones_like(indices, dtype=dtype) * indices_value
37 |
38 | return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
39 | [zeros, values])
--------------------------------------------------------------------------------
/libs/box_utils/tf_ops.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/tf_ops.pyc
--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__init__.py
--------------------------------------------------------------------------------
/libs/configs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__init__.pyc
--------------------------------------------------------------------------------
/libs/configs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/configs/__pycache__/cfgs.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__pycache__/cfgs.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/configs/cfgs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/cfgs.pyc
--------------------------------------------------------------------------------
/libs/configs/cfgs_coco.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import division, print_function, absolute_import
3 | import os
4 | import tensorflow as tf
5 |
6 |
7 | # ------------------------------------------------
8 | VERSION = 'FasterRCNN_Res50_20180603COCO'
9 | NET_NAME = 'resnet_v1_50' #'MobilenetV2'
10 | ADD_BOX_IN_TENSORBOARD = True
11 |
12 | # ---------------------------------------- System_config
13 | ROOT_PATH = os.path.abspath('../')
14 | print (20*"++--")
15 | print (ROOT_PATH)
16 | GPU_GROUP = "0"
17 | SHOW_TRAIN_INFO_INTE = 10
18 | SMRY_ITER = 100
19 | SAVE_WEIGHTS_INTE = 10000
20 |
21 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
22 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
23 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
24 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
25 |
26 | if NET_NAME.startswith("resnet"):
27 | weights_name = NET_NAME
28 | elif NET_NAME.startswith("MobilenetV2"):
29 | weights_name = "mobilenet/mobilenet_v2_1.0_224"
30 | else:
31 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
32 |
33 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
34 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
35 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
36 |
37 | # ------------------------------------------ Train config
38 | RESTORE_FROM_RPN = False
39 | IS_FILTER_OUTSIDE_BOXES = True
40 | FIXED_BLOCKS = 1 # allow 0~3
41 |
42 | RPN_LOCATION_LOSS_WEIGHT = 1.
43 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
44 |
45 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
46 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
47 | RPN_SIGMA = 3.0 # 3.0
48 | FASTRCNN_SIGMA = 1.0
49 |
50 |
51 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy
52 | GRADIENT_CLIPPING_BY_NORM = None #10.0 if None, will not clip
53 |
54 | EPSILON = 1e-5
55 | MOMENTUM = 0.9
56 | LR = 0.001 # 0.001 # 0.0003
57 | DECAY_STEP = [350000, 490000] # 50000, 70000
58 | MAX_ITERATION = 500000
59 |
60 | # -------------------------------------------- Data_preprocess_config
61 | DATASET_NAME = 'coco' #'pascal' # 'ship', 'spacenet', 'pascal', 'coco'
62 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
63 | IMG_SHORT_SIDE_LEN = 600
64 | IMG_MAX_LENGTH = 1000
65 | CLASS_NUM = 80 #20
66 |
67 | # --------------------------------------------- Network_config
68 | BATCH_SIZE = 1
69 | # INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
70 | INITIALIZER = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
71 | # BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
72 | BBOX_INITIALIZER = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
73 | # WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
74 | WEIGHT_DECAY = 0.0
75 |
76 | # ---------------------------------------------Anchor config
77 | BASE_ANCHOR_SIZE_LIST = [256] # can be modified
78 | ANCHOR_STRIDE = [16] # can not be modified in most situations
79 | ANCHOR_SCALES = [0.25, 0.5, 1., 2.0] # [4, 8, 16, 32]
80 | ANCHOR_RATIOS = [0.5, 1., 2.0]
81 | ROI_SCALE_FACTORS = [10., 10., 5.0, 5.0]
82 | ANCHOR_SCALE_FACTORS = None # [10.0, 10.0, 5.0, 5.0]
83 |
84 |
85 | # --------------------------------------------RPN config
86 | KERNEL_SIZE = 3
87 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
88 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
89 | TRAIN_RPN_CLOOBER_POSITIVES = False
90 |
91 | RPN_MINIBATCH_SIZE = 256
92 | RPN_POSITIVE_RATE = 0.5
93 | RPN_NMS_IOU_THRESHOLD = 0.7
94 | RPN_TOP_K_NMS_TRAIN = 12000
95 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 ##########OHEM sample in num
96 |
97 | RPN_TOP_K_NMS_TEST = 6000 # 5000
98 | RPN_MAXIMUM_PROPOSAL_TEST = 300 # 300
99 |
100 |
101 | # -------------------------------------------Fast-RCNN config
102 | ROI_SIZE = 14
103 | ROI_POOL_KERNEL_SIZE = 2
104 | USE_DROPOUT = False
105 | KEEP_PROB = 1.0
106 | SHOW_SCORE_THRSHOLD = 0.5 # only show in tensorboard
107 |
108 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.3 # 0.6
109 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
110 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
111 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative
112 | FAST_RCNN_MINIBATCH_SIZE = 256 # 256 # if is -1, that is train with OHEM
113 | FAST_RCNN_POSITIVE_RATE = 0.25
114 |
115 | ADD_GTBOXES_TO_TRAIN = False
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/libs/detection_oprations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/__init__.py
--------------------------------------------------------------------------------
/libs/detection_oprations/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/__init__.pyc
--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Xinlei Chen
6 | # --------------------------------------------------------
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os
12 | from libs.configs import cfgs
13 | import numpy as np
14 | import numpy.random as npr
15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
16 | from libs.box_utils import encode_and_decode
17 |
18 |
19 | def anchor_target_layer(
20 | gt_boxes, img_shape, all_anchors, is_restrict_bg=False):
21 | """Same as the anchor target layer in original Fast/er RCNN """
22 |
23 | total_anchors = all_anchors.shape[0]
24 | img_h, img_w = img_shape[1], img_shape[2]
25 | gt_boxes = gt_boxes[:, :-1] # remove class label
26 |
27 | # allow boxes to sit over the edge by a small amount
28 | _allowed_border = 0
29 |
30 | # only keep anchors inside the image
31 | inds_inside = np.where(
32 | (all_anchors[:, 0] >= -_allowed_border) &
33 | (all_anchors[:, 1] >= -_allowed_border) &
34 | (all_anchors[:, 2] < img_w + _allowed_border) & # width
35 | (all_anchors[:, 3] < img_h + _allowed_border) # height
36 | )[0]
37 |
38 | anchors = all_anchors[inds_inside, :]
39 |
40 | # label: 1 is positive, 0 is negative, -1 is dont care
41 | labels = np.empty((len(inds_inside),), dtype=np.float32)
42 | labels.fill(-1)
43 |
44 | # overlaps between the anchors and the gt boxes
45 | overlaps = bbox_overlaps(
46 | np.ascontiguousarray(anchors, dtype=np.float),
47 | np.ascontiguousarray(gt_boxes, dtype=np.float))
48 |
49 | argmax_overlaps = overlaps.argmax(axis=1)
50 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
51 | gt_argmax_overlaps = overlaps.argmax(axis=0)
52 | gt_max_overlaps = overlaps[
53 | gt_argmax_overlaps, np.arange(overlaps.shape[1])]
54 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
55 |
56 | if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
57 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0
58 |
59 | labels[gt_argmax_overlaps] = 1
60 | labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1
61 |
62 | if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
63 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0
64 |
65 | num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
66 | fg_inds = np.where(labels == 1)[0]
67 | if len(fg_inds) > num_fg:
68 | disable_inds = npr.choice(
69 | fg_inds, size=(len(fg_inds) - num_fg), replace=False)
70 | labels[disable_inds] = -1
71 |
72 | num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
73 | if is_restrict_bg:
74 | num_bg = max(num_bg, num_fg * 1.5)
75 | bg_inds = np.where(labels == 0)[0]
76 | if len(bg_inds) > num_bg:
77 | disable_inds = npr.choice(
78 | bg_inds, size=(len(bg_inds) - num_bg), replace=False)
79 | labels[disable_inds] = -1
80 |
81 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
82 |
83 | # map up to original set of anchors
84 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
85 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
86 |
87 | # labels = labels.reshape((1, height, width, A))
88 | rpn_labels = labels.reshape((-1, 1))
89 |
90 | # bbox_targets
91 | bbox_targets = bbox_targets.reshape((-1, 4))
92 | rpn_bbox_targets = bbox_targets
93 |
94 | return rpn_labels, rpn_bbox_targets
95 |
96 |
97 | def _unmap(data, count, inds, fill=0):
98 | """ Unmap a subset of item (data) back to the original set of items (of
99 | size count) """
100 | if len(data.shape) == 1:
101 | ret = np.empty((count,), dtype=np.float32)
102 | ret.fill(fill)
103 | ret[inds] = data
104 | else:
105 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
106 | ret.fill(fill)
107 | ret[inds, :] = data
108 | return ret
109 |
110 |
111 | def _compute_targets(ex_rois, gt_rois):
112 | """Compute bounding-box regression targets for an image."""
113 | # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype(
114 | # np.float32, copy=False)
115 | targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois,
116 | reference_boxes=ex_rois,
117 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
118 | # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois,
119 | # gt_rois=gt_rois,
120 | # scale_factor=None)
121 | return targets
122 |
--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/anchor_target_layer_without_boxweight.pyc
--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_opr.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: zeming li
4 | @contact: zengarden2009@gmail.com
5 | """
6 |
7 | from libs.configs import cfgs
8 | from libs.box_utils import encode_and_decode
9 | from libs.box_utils import boxes_utils
10 | import tensorflow as tf
11 | import numpy as np
12 |
13 |
14 | def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training):
15 | '''
16 |
17 | :param rpn_bbox_pred: [-1, 4]
18 | :param rpn_cls_prob: [-1, 2]
19 | :param img_shape:
20 | :param anchors:[-1, 4]
21 | :param is_training:
22 | :return:
23 | '''
24 |
25 | if is_training:
26 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
27 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
28 | nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD
29 | else:
30 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
31 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
32 | nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD
33 |
34 | cls_prob = rpn_cls_prob[:, 1]
35 |
36 | # 1. decode boxes
37 | decode_boxes = encode_and_decode.decode_boxes(encoded_boxes=rpn_bbox_pred,
38 | reference_boxes=anchors,
39 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
40 |
41 | # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors,
42 | # deltas=rpn_bbox_pred,
43 | # scale_factor=None)
44 |
45 | # 2. clip to img boundaries
46 | decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=decode_boxes,
47 | img_shape=img_shape)
48 |
49 | # 3. get top N to NMS
50 | if pre_nms_topN > 0:
51 | pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes')
52 | cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
53 | decode_boxes = tf.gather(decode_boxes, top_k_indices)
54 |
55 | # 4. NMS
56 | keep = tf.image.non_max_suppression(
57 | boxes=decode_boxes,
58 | scores=cls_prob,
59 | max_output_size=post_nms_topN,
60 | iou_threshold=nms_thresh)
61 |
62 | final_boxes = tf.gather(decode_boxes, keep)
63 | final_probs = tf.gather(cls_prob, keep)
64 |
65 | return final_boxes, final_probs
66 |
67 |
--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_opr.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/proposal_opr.pyc
--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_target_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 | from libs.configs import cfgs
11 | import numpy as np
12 | import numpy.random as npr
13 |
14 | from libs.box_utils import encode_and_decode
15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
16 |
17 |
18 | def proposal_target_layer(rpn_rois, gt_boxes):
19 | """
20 | Assign object detection proposals to ground-truth targets. Produces proposal
21 | classification labels and bounding-box regression targets.
22 | """
23 | # Proposal ROIs (x1, y1, x2, y2) coming from RPN
24 | # gt_boxes (x1, y1, x2, y2, label)
25 | if cfgs.ADD_GTBOXES_TO_TRAIN:
26 | all_rois = np.vstack((rpn_rois, gt_boxes[:, :-1]))
27 | else:
28 | all_rois = rpn_rois
29 | # np.inf
30 | rois_per_image = np.inf if cfgs.FAST_RCNN_MINIBATCH_SIZE == -1 else cfgs.FAST_RCNN_MINIBATCH_SIZE
31 |
32 | fg_rois_per_image = np.round(cfgs.FAST_RCNN_POSITIVE_RATE * rois_per_image)
33 |
34 | # Sample rois with classification labels and bounding box regression
35 | labels, rois, bbox_targets = _sample_rois(all_rois, gt_boxes, fg_rois_per_image,
36 | rois_per_image, cfgs.CLASS_NUM+1)
37 | # print(labels.shape, rois.shape, bbox_targets.shape)
38 | rois = rois.reshape(-1, 4)
39 | labels = labels.reshape(-1)
40 | bbox_targets = bbox_targets.reshape(-1, (cfgs.CLASS_NUM+1) * 4)
41 |
42 | return rois, labels, bbox_targets
43 |
44 |
45 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
46 | """Bounding-box regression targets (bbox_target_data) are stored in a
47 | compact form N x (class, tx, ty, tw, th)
48 |
49 | This function expands those targets into the 4-of-4*K representation used
50 | by the network (i.e. only one class has non-zero targets).
51 |
52 | Returns:
53 | bbox_target (ndarray): N x 4K blob of regression targets
54 | """
55 |
56 | clss = bbox_target_data[:, 0]
57 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
58 | inds = np.where(clss > 0)[0]
59 | for ind in inds:
60 | cls = clss[ind]
61 | start = int(4 * cls)
62 | end = start + 4
63 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
64 |
65 | return bbox_targets
66 |
67 |
68 | def _compute_targets(ex_rois, gt_rois, labels):
69 | """Compute bounding-box regression targets for an image.
70 | that is : [label, tx, ty, tw, th]
71 | """
72 |
73 | assert ex_rois.shape[0] == gt_rois.shape[0]
74 | assert ex_rois.shape[1] == 4
75 | assert gt_rois.shape[1] == 4
76 |
77 | targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois,
78 | reference_boxes=ex_rois,
79 | scale_factors=cfgs.ROI_SCALE_FACTORS)
80 | # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois,
81 | # gt_rois=gt_rois,
82 | # scale_factor=cfgs.ROI_SCALE_FACTORS)
83 |
84 | return np.hstack(
85 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
86 |
87 |
88 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image,
89 | rois_per_image, num_classes):
90 | """Generate a random sample of RoIs comprising foreground and background
91 | examples.
92 |
93 | all_rois shape is [-1, 4]
94 | gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label]
95 | """
96 | # overlaps: (rois x gt_boxes)
97 | overlaps = bbox_overlaps(
98 | np.ascontiguousarray(all_rois, dtype=np.float),
99 | np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float))
100 | gt_assignment = overlaps.argmax(axis=1)
101 | max_overlaps = overlaps.max(axis=1)
102 | labels = gt_boxes[gt_assignment, -1]
103 |
104 | # Select foreground RoIs as those with >= FG_THRESH overlap
105 | fg_inds = np.where(max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0]
106 |
107 | # Guard against the case when an image has fewer than fg_rois_per_image
108 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
109 | bg_inds = np.where((max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD) &
110 | (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0]
111 | # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
112 | # Guard against the case when an image has fewer than fg_rois_per_image
113 | # foreground RoIs
114 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
115 |
116 | # Sample foreground regions without replacement
117 | if fg_inds.size > 0:
118 | fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False)
119 | # Compute number of background RoIs to take from this image (guarding
120 | # against there being fewer than desired)
121 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
122 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
123 | # Sample background regions without replacement
124 | if bg_inds.size > 0:
125 | bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False)
126 |
127 | # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape))
128 | # The indices that we're selecting (both fg and bg)
129 | keep_inds = np.append(fg_inds, bg_inds)
130 |
131 |
132 | # Select sampled values from various arrays:
133 | labels = labels[keep_inds]
134 |
135 | # Clamp labels for the background RoIs to 0
136 | labels[int(fg_rois_per_this_image):] = 0
137 | rois = all_rois[keep_inds]
138 |
139 | bbox_target_data = _compute_targets(
140 | rois, gt_boxes[gt_assignment[keep_inds], :-1], labels)
141 | bbox_targets = \
142 | _get_bbox_regression_labels(bbox_target_data, num_classes)
143 |
144 | return labels, rois, bbox_targets
145 |
--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_target_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/proposal_target_layer.pyc
--------------------------------------------------------------------------------
/libs/export_pbs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/export_pbs/__init__.py
--------------------------------------------------------------------------------
/libs/export_pbs/exportPb.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 | import os, sys
6 | import tensorflow as tf
7 | import tensorflow.contrib.slim as slim
8 | from tensorflow.python.tools import freeze_graph
9 |
10 | sys.path.append('../../')
11 | from data.io.image_preprocess import short_side_resize_for_inference_data
12 | from libs.configs import cfgs
13 | from libs.networks import build_whole_network
14 |
15 | CKPT_PATH = '/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/output/trained_weights/FasterRCNN_20180517/voc_200000model.ckpt'
16 | OUT_DIR = '../../output/Pbs'
17 | PB_NAME = 'FasterRCNN_Res101_Pascal.pb'
18 |
19 |
20 | def build_detection_graph():
21 | # 1. preprocess img
22 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
23 | name='input_img') # is RGB. not GBR
24 | raw_shape = tf.shape(img_plac)
25 | raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
26 |
27 | img_batch = tf.cast(img_plac, tf.float32)
28 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
29 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
30 | length_limitation=cfgs.IMG_MAX_LENGTH)
31 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
32 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3]
33 |
34 | det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
35 | is_training=False)
36 |
37 | detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
38 | input_img_batch=img_batch,
39 | gtboxes_batch=None)
40 |
41 | xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
42 | detected_boxes[:, 2], detected_boxes[:, 3]
43 |
44 | resized_shape = tf.shape(img_batch)
45 | resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2])
46 |
47 | xmin = xmin * raw_w / resized_w
48 | xmax = xmax * raw_w / resized_w
49 |
50 | ymin = ymin * raw_h / resized_h
51 | ymax = ymax * raw_h / resized_h
52 |
53 | boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
54 | dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
55 | tf.reshape(detection_scores, [-1, 1]),
56 | boxes], axis=1, name='DetResults')
57 |
58 | return dets
59 |
60 |
61 | def export_frozenPB():
62 |
63 | tf.reset_default_graph()
64 |
65 | dets = build_detection_graph()
66 |
67 | saver = tf.train.Saver()
68 |
69 | with tf.Session() as sess:
70 | print("we have restred the weights from =====>>\n", CKPT_PATH)
71 | saver.restore(sess, CKPT_PATH)
72 |
73 | tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
74 | freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
75 | input_saver='',
76 | input_binary=False,
77 | input_checkpoint=CKPT_PATH,
78 | output_node_names="DetResults",
79 | restore_op_name="save/restore_all",
80 | filename_tensor_name='save/Const:0',
81 | output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
82 | clear_devices=False,
83 | initializer_nodes='')
84 |
85 | if __name__ == '__main__':
86 | os.environ["CUDA_VISIBLE_DEVICES"] = ''
87 | export_frozenPB()
88 |
--------------------------------------------------------------------------------
/libs/export_pbs/test_exportPb.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import os, sys
8 | import tensorflow as tf
9 | import time
10 | import cv2
11 | import argparse
12 | import numpy as np
13 | sys.path.append("../")
14 |
15 | from data.io.image_preprocess import short_side_resize_for_inference_data
16 | from libs.configs import cfgs
17 | from libs.networks import build_whole_network
18 | from libs.box_utils import draw_box_in_img
19 | from help_utils import tools
20 |
21 |
22 |
23 |
24 |
25 | def load_graph(frozen_graph_file):
26 |
27 | # we parse the graph_def file
28 | with tf.gfile.GFile(frozen_graph_file, 'rb') as f:
29 | graph_def = tf.GraphDef()
30 | graph_def.ParseFromString(f.read())
31 |
32 | # we load the graph_def in the default graph
33 |
34 | with tf.Graph().as_default() as graph:
35 | tf.import_graph_def(graph_def,
36 | input_map=None,
37 | return_elements=None,
38 | name="",
39 | op_dict=None,
40 | producer_op_list=None)
41 | return graph
42 |
43 |
44 | def test(frozen_graph_path, test_dir):
45 |
46 | graph = load_graph(frozen_graph_path)
47 | print("we are testing ====>>>>", frozen_graph_path)
48 |
49 | img = graph.get_tensor_by_name("input_img:0")
50 | dets = graph.get_tensor_by_name("DetResults:0")
51 |
52 | with tf.Session(graph=graph) as sess:
53 | for img_path in os.listdir(test_dir):
54 | a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1]
55 | st = time.time()
56 | dets_val = sess.run(dets, feed_dict={img: a_img})
57 |
58 | show_indices = dets_val[:, 1] >= 0.5
59 | dets_val = dets_val[show_indices]
60 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img,
61 | boxes=dets_val[:, 2:],
62 | labels=dets_val[:, 0],
63 | scores=dets_val[:, 1])
64 | cv2.imwrite(img_path,
65 | final_detections[:, :, ::-1])
66 | print ("%s cost time: %f" % (img_path, time.time() - st))
67 |
68 | if __name__ == '__main__':
69 | test('/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/output/Pbs/FasterRCNN_Res101_Pascal_Frozen.pb',
70 | '/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/tools/demos')
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/libs/label_name_dict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__init__.py
--------------------------------------------------------------------------------
/libs/label_name_dict/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__init__.pyc
--------------------------------------------------------------------------------
/libs/label_name_dict/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/label_name_dict/__pycache__/remote_sensing_dict.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__pycache__/remote_sensing_dict.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/label_name_dict/coco_dict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 | class_names = [
6 | 'back_ground', 'person', 'bicycle', 'car', 'motorcycle',
7 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
8 | 'fire hydrant', 'stop sign', 'parking meter', 'bench',
9 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
10 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
11 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
12 | 'sports ball', 'kite', 'baseball bat', 'baseball glove',
13 | 'skateboard', 'surfboard', 'tennis racket', 'bottle',
14 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
15 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
16 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
17 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
18 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
19 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
20 | 'book', 'clock', 'vase', 'scissors', 'teddy bear',
21 | 'hair drier', 'toothbrush']
22 |
23 |
24 | classes_originID = {
25 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4,
26 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
27 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13,
28 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
29 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22,
30 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
31 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
32 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
33 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40,
34 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43,
35 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48,
36 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53,
37 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57,
38 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61,
39 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65,
40 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73,
41 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77,
42 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
43 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86,
44 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89,
45 | 'toothbrush': 90}
46 |
47 | originID_classes = {item: key for key, item in classes_originID.items()}
48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names))))
49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names))
50 |
51 | # print (originID_classes)
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/libs/label_name_dict/coco_dict.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/coco_dict.pyc
--------------------------------------------------------------------------------
/libs/label_name_dict/label_dict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import division, print_function, absolute_import
3 |
4 | from libs.configs import cfgs
5 |
6 | if cfgs.DATASET_NAME == 'ship':
7 | NAME_LABEL_MAP = {
8 | 'back_ground': 0,
9 | 'ship': 1
10 | }
11 | elif cfgs.DATASET_NAME == 'FDDB':
12 | NAME_LABEL_MAP = {
13 | 'back_ground': 0,
14 | 'face': 1
15 | }
16 | elif cfgs.DATASET_NAME == 'icdar':
17 | NAME_LABEL_MAP = {
18 | 'back_ground': 0,
19 | 'text': 1
20 | }
21 | elif cfgs.DATASET_NAME.startswith('DOTA'):
22 | NAME_LABEL_MAP = {
23 | 'back_ground': 0,
24 | 'roundabout': 1,
25 | 'tennis-court': 2,
26 | 'swimming-pool': 3,
27 | 'storage-tank': 4,
28 | 'soccer-ball-field': 5,
29 | 'small-vehicle': 6,
30 | 'ship': 7,
31 | 'plane': 8,
32 | 'large-vehicle': 9,
33 | 'helicopter': 10,
34 | 'harbor': 11,
35 | 'ground-track-field': 12,
36 | 'bridge': 13,
37 | 'basketball-court': 14,
38 | 'baseball-diamond': 15
39 | }
40 | elif cfgs.DATASET_NAME == 'pascal':
41 | NAME_LABEL_MAP = {
42 | 'back_ground': 0,
43 | 'aeroplane': 1,
44 | 'bicycle': 2,
45 | 'bird': 3,
46 | 'boat': 4,
47 | 'bottle': 5,
48 | 'bus': 6,
49 | 'car': 7,
50 | 'cat': 8,
51 | 'chair': 9,
52 | 'cow': 10,
53 | 'diningtable': 11,
54 | 'dog': 12,
55 | 'horse': 13,
56 | 'motorbike': 14,
57 | 'person': 15,
58 | 'pottedplant': 16,
59 | 'sheep': 17,
60 | 'sofa': 18,
61 | 'train': 19,
62 | 'tvmonitor': 20
63 | }
64 | else:
65 | assert 'please set label dict!'
66 |
67 |
68 | def get_label_name_map():
69 | reverse_dict = {}
70 | for name, label in NAME_LABEL_MAP.items():
71 | reverse_dict[label] = name
72 | return reverse_dict
73 |
74 | LABEL_NAME_MAP = get_label_name_map()
--------------------------------------------------------------------------------
/libs/label_name_dict/remote_sensing_dict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | NAME_LABEL_MAP = {
4 | 'back_ground': 0,
5 | 'building': 1
6 | }
7 |
8 |
9 | def get_label_name_map():
10 | reverse_dict = {}
11 | for name, label in NAME_LABEL_MAP.items():
12 | reverse_dict[label] = name
13 | return reverse_dict
14 |
15 | LABEL_NAME_MAP = get_label_name_map()
--------------------------------------------------------------------------------
/libs/label_name_dict/remote_sensing_dict.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/remote_sensing_dict.pyc
--------------------------------------------------------------------------------
/libs/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__init__.py
--------------------------------------------------------------------------------
/libs/losses/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__init__.pyc
--------------------------------------------------------------------------------
/libs/losses/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/losses/__pycache__/losses.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__pycache__/losses.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/losses/losses.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @author: jemmy li
4 | @contact: zengarden2009@gmail.com
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import tensorflow as tf
11 |
12 |
13 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=1.0):
14 | '''
15 |
16 | :param bbox_pred: [-1, 4] in RPN. [-1, cls_num+1, 4] in Fast-rcnn
17 | :param bbox_targets: shape is same as bbox_pred
18 | :param sigma:
19 | :return:
20 | '''
21 | sigma_2 = sigma**2
22 |
23 | box_diff = bbox_pred - bbox_targets
24 |
25 | abs_box_diff = tf.abs(box_diff)
26 |
27 | smoothL1_sign = tf.stop_gradient(
28 | tf.to_float(tf.less(abs_box_diff, 1. / sigma_2)))
29 | loss_box = tf.pow(box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \
30 | + (abs_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign)
31 | return loss_box
32 |
33 | def smooth_l1_loss_rpn(bbox_pred, bbox_targets, label, sigma=1.0):
34 | '''
35 |
36 | :param bbox_pred: [-1, 4]
37 | :param bbox_targets: [-1, 4]
38 | :param label: [-1]
39 | :param sigma:
40 | :return:
41 | '''
42 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=sigma)
43 | value = tf.reduce_sum(value, axis=1) # to sum in axis 1
44 | rpn_select = tf.where(tf.greater(label, 0))
45 |
46 | # rpn_select = tf.stop_gradient(rpn_select) # to avoid
47 | selected_value = tf.gather(value, rpn_select)
48 | non_ignored_mask = tf.stop_gradient(
49 | 1.0 - tf.to_float(tf.equal(label, -1))) # positve is 1.0 others is 0.0
50 |
51 | bbox_loss = tf.reduce_sum(selected_value) / tf.maximum(1.0, tf.reduce_sum(non_ignored_mask))
52 |
53 | return bbox_loss
54 |
55 |
56 |
57 | def smooth_l1_loss_rcnn(bbox_pred, bbox_targets, label, num_classes, sigma=1.0):
58 | '''
59 |
60 | :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4]
61 | :param bbox_targets:[-1, (cfgs.CLS_NUM +1) * 4]
62 | :param label:[-1]
63 | :param num_classes:
64 | :param sigma:
65 | :return:
66 | '''
67 |
68 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0)))
69 |
70 | bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4])
71 | bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4])
72 |
73 | value = _smooth_l1_loss_base(bbox_pred,
74 | bbox_targets,
75 | sigma=sigma)
76 | value = tf.reduce_sum(value, 2)
77 | value = tf.reshape(value, [-1, num_classes])
78 |
79 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]),
80 | depth=num_classes, axis=1)
81 |
82 | inside_mask = tf.stop_gradient(
83 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes])))
84 |
85 | normalizer = tf.to_float(tf.shape(bbox_pred)[0])
86 | bbox_loss = tf.reduce_sum(
87 | tf.reduce_sum(value * inside_mask, 1)*outside_mask) / normalizer
88 |
89 | return bbox_loss
90 |
91 |
92 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets,
93 | num_classes, num_ohem_samples=256, sigma=1.0):
94 | '''
95 |
96 | :param cls_score: [-1, cls_num+1]
97 | :param label: [-1]
98 | :param bbox_pred: [-1, 4*(cls_num+1)]
99 | :param bbox_targets: [-1, 4*(cls_num+1)]
100 | :param num_ohem_samples: 256 by default
101 | :param num_classes: cls_num+1
102 | :param sigma:
103 | :return:
104 | '''
105 |
106 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label) # [-1, ]
107 | # cls_loss = tf.Print(cls_loss, [tf.shape(cls_loss)], summarize=10, message='CLS losss shape ****')
108 |
109 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0)))
110 | bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4])
111 | bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4])
112 |
113 | value = _smooth_l1_loss_base(bbox_pred,
114 | bbox_targets,
115 | sigma=sigma)
116 | value = tf.reduce_sum(value, 2)
117 | value = tf.reshape(value, [-1, num_classes])
118 |
119 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]),
120 | depth=num_classes, axis=1)
121 |
122 | inside_mask = tf.stop_gradient(
123 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes])))
124 | loc_loss = tf.reduce_sum(value * inside_mask, 1)*outside_mask
125 | # loc_loss = tf.Print(loc_loss, [tf.shape(loc_loss)], summarize=10, message='loc_loss shape***')
126 |
127 | sum_loss = cls_loss + loc_loss
128 |
129 | num_ohem_samples = tf.stop_gradient(tf.minimum(num_ohem_samples, tf.shape(sum_loss)[0]))
130 | _, top_k_indices = tf.nn.top_k(sum_loss, k=num_ohem_samples)
131 |
132 | cls_loss_ohem = tf.gather(cls_loss, top_k_indices)
133 | cls_loss_ohem = tf.reduce_mean(cls_loss_ohem)
134 |
135 | loc_loss_ohem = tf.gather(loc_loss, top_k_indices)
136 | normalizer = tf.to_float(num_ohem_samples)
137 | loc_loss_ohem = tf.reduce_sum(loc_loss_ohem) / normalizer
138 |
139 | return cls_loss_ohem, loc_loss_ohem
140 |
141 |
--------------------------------------------------------------------------------
/libs/losses/losses.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/losses.pyc
--------------------------------------------------------------------------------
/libs/losses/tfapi_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Those loss are derived tensorflow detection api.
4 | But yjr modified it to suit for this project.
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import tensorflow as tf
11 |
12 |
13 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, weights):
14 | '''
15 | Smooth L1 localization loss function aka Huber Loss..
16 |
17 | The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
18 | 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
19 | predictions and target.
20 |
21 | See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
22 |
23 | :param bbox_pred: [-1, 4] in RPN. [-1, (cls_num+1) * 4] in Fast-rcnn
24 | :param bbox_targets: shape is same as bbox_pred
25 | :param sigma:
26 | :return:
27 | '''
28 | loss_box = tf.losses.huber_loss(labels=bbox_targets,
29 | predictions=bbox_pred,
30 | weights=tf.expand_dims(weights, axis=1), # (n, ) -->(n, 1)
31 | delta=1.0,
32 | loss_collection=None,
33 | reduction=tf.losses.Reduction.NONE)
34 | return loss_box
35 |
36 | def smooth_l1_loss_rpn(bbox_pred, bbox_targets, label, sigma=1.0):
37 | '''
38 |
39 | :param bbox_pred: [-1, 4]
40 | :param bbox_targets: [-1, 4]
41 | :param label: [-1]
42 | :param sigma:
43 | :return:
44 | '''
45 | rpn_selected = tf.to_float(tf.greater(label, 0)) # that positive anchors
46 |
47 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, weights=rpn_selected)
48 | value = tf.reduce_sum(value, axis=1) # to sum in axis 1
49 |
50 | non_ignored_mask = tf.stop_gradient(
51 | tf.to_float(tf.not_equal(label, -1))) # positve is 1.0 negative is 0.0. Ignored is -1
52 |
53 | bbox_loss = tf.reduce_sum(value) / tf.maximum(1.0, tf.reduce_sum(non_ignored_mask))
54 |
55 | return bbox_loss
56 |
57 |
58 |
59 | def smooth_l1_loss_rcnn(bbox_pred, bbox_targets, label, num_classes, sigma=1.0):
60 | '''
61 |
62 | :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4]
63 | :param bbox_targets:[-1, (cfgs.CLS_NUM +1) * 4]
64 | :param label:[-1]
65 | :param num_classes:
66 | :param sigma:
67 | :return:
68 | '''
69 |
70 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0)))
71 |
72 | # bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4])
73 | # bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4])
74 |
75 | value = _smooth_l1_loss_base(bbox_pred,
76 | bbox_targets,
77 | weights=outside_mask) # [-1, (num_classes)*4]
78 | value = tf.reshape(value, [-1, num_classes, 4])
79 |
80 | value = tf.reduce_sum(value, 2)
81 | value = tf.reshape(value, [-1, num_classes])
82 |
83 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]),
84 | depth=num_classes, axis=1)
85 |
86 | inside_mask = tf.stop_gradient(
87 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes])))
88 |
89 | normalizer = tf.to_float(tf.shape(bbox_pred)[0])
90 |
91 | bbox_loss = tf.reduce_sum(
92 | tf.reduce_sum(value * inside_mask, 1)*outside_mask) / normalizer
93 |
94 | return bbox_loss
95 |
96 |
97 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets,
98 | num_classes, num_ohem_samples=256, sigma=1.0):
99 | '''
100 |
101 | :param cls_score: [-1, cls_num+1]
102 | :param label: [-1]
103 | :param bbox_pred: [-1, 4*(cls_num+1)]
104 | :param bbox_targets: [-1, 4*(cls_num+1)]
105 | :param num_ohem_samples: 256 by default
106 | :param num_classes: cls_num+1
107 | :param sigma:
108 | :return:
109 | '''
110 |
111 | # cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label) # [-1, ]
112 | # # cls_loss = tf.Print(cls_loss, [tf.shape(cls_loss)], summarize=10, message='CLS losss shape ****')
113 | #
114 | # outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0)))
115 | # bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4])
116 | # bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4])
117 | #
118 | # value = _smooth_l1_loss_base(bbox_pred,
119 | # bbox_targets,
120 | # sigma=sigma)
121 | # value = tf.reduce_sum(value, 2)
122 | # value = tf.reshape(value, [-1, num_classes])
123 | #
124 | # inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]),
125 | # depth=num_classes, axis=1)
126 | #
127 | # inside_mask = tf.stop_gradient(
128 | # tf.to_float(tf.reshape(inside_mask, [-1, num_classes])))
129 | # loc_loss = tf.reduce_sum(value * inside_mask, 1)*outside_mask
130 | # # loc_loss = tf.Print(loc_loss, [tf.shape(loc_loss)], summarize=10, message='loc_loss shape***')
131 | #
132 | # sum_loss = cls_loss + loc_loss
133 | #
134 | # num_ohem_samples = tf.stop_gradient(tf.minimum(num_ohem_samples, tf.shape(sum_loss)[0]))
135 | # _, top_k_indices = tf.nn.top_k(sum_loss, k=num_ohem_samples)
136 | #
137 | # cls_loss_ohem = tf.gather(cls_loss, top_k_indices)
138 | # cls_loss_ohem = tf.reduce_mean(cls_loss_ohem)
139 | #
140 | # loc_loss_ohem = tf.gather(loc_loss, top_k_indices)
141 | # normalizer = tf.to_float(num_ohem_samples)
142 | # loc_loss_ohem = tf.reduce_sum(loc_loss_ohem) / normalizer
143 | #
144 | # return cls_loss_ohem, loc_loss_ohem
145 |
146 | raise NotImplementedError('ohem not implemented')
147 |
148 |
--------------------------------------------------------------------------------
/libs/losses/tfapi_loss.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/tfapi_loss.pyc
--------------------------------------------------------------------------------
/libs/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__init__.py
--------------------------------------------------------------------------------
/libs/networks/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__init__.pyc
--------------------------------------------------------------------------------
/libs/networks/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/build_whole_network.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/build_whole_network.pyc
--------------------------------------------------------------------------------
/libs/networks/mobilenet/README.md:
--------------------------------------------------------------------------------
1 | # Mobilenet V2
2 | This folder contains building code for Mobilenet V2, based on
3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation]
4 | (https://arxiv.org/abs/1801.04381)
5 |
6 | # Pretrained model
7 | TODO
8 |
9 | # Example
10 | TODO
11 |
12 |
13 |
--------------------------------------------------------------------------------
/libs/networks/mobilenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/__init__.py
--------------------------------------------------------------------------------
/libs/networks/mobilenet/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/__init__.pyc
--------------------------------------------------------------------------------
/libs/networks/mobilenet/conv_blocks.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/conv_blocks.pyc
--------------------------------------------------------------------------------
/libs/networks/mobilenet/mobilenet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/mobilenet.pyc
--------------------------------------------------------------------------------
/libs/networks/mobilenet/mobilenet_v2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/mobilenet_v2.pyc
--------------------------------------------------------------------------------
/libs/networks/mobilenet_v2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 | import tensorflow.contrib.slim as slim
5 | import tensorflow as tf
6 |
7 | from libs.networks.mobilenet import mobilenet_v2
8 | from libs.networks.mobilenet.mobilenet import training_scope
9 | from libs.networks.mobilenet.mobilenet_v2 import op
10 | from libs.networks.mobilenet.mobilenet_v2 import ops
11 | expand_input = ops.expand_input_by_factor
12 |
13 | V2_BASE_DEF = dict(
14 | defaults={
15 | # Note: these parameters of batch norm affect the architecture
16 | # that's why they are here and not in training_scope.
17 | (slim.batch_norm,): {'center': True, 'scale': True},
18 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
19 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
20 | },
21 | (ops.expanded_conv,): {
22 | 'expansion_size': expand_input(6),
23 | 'split_expansion': 1,
24 | 'normalizer_fn': slim.batch_norm,
25 | 'residual': True
26 | },
27 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
28 | },
29 | spec=[
30 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
31 | op(ops.expanded_conv,
32 | expansion_size=expand_input(1, divisible_by=1),
33 | num_outputs=16, scope='expanded_conv'),
34 | op(ops.expanded_conv, stride=2, num_outputs=24, scope='expanded_conv_1'),
35 | op(ops.expanded_conv, stride=1, num_outputs=24, scope='expanded_conv_2'),
36 | op(ops.expanded_conv, stride=2, num_outputs=32, scope='expanded_conv_3'),
37 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_4'),
38 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_5'),
39 | op(ops.expanded_conv, stride=2, num_outputs=64, scope='expanded_conv_6'),
40 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_7'),
41 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_8'),
42 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_9'),
43 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_10'),
44 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_11'),
45 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_12')
46 | ],
47 | )
48 |
49 |
50 | V2_HEAD_DEF = dict(
51 | defaults={
52 | # Note: these parameters of batch norm affect the architecture
53 | # that's why they are here and not in training_scope.
54 | (slim.batch_norm,): {'center': True, 'scale': True},
55 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
56 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
57 | },
58 | (ops.expanded_conv,): {
59 | 'expansion_size': expand_input(6),
60 | 'split_expansion': 1,
61 | 'normalizer_fn': slim.batch_norm,
62 | 'residual': True
63 | },
64 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
65 | },
66 | spec=[
67 | op(ops.expanded_conv, stride=2, num_outputs=160, scope='expanded_conv_13'),
68 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_14'),
69 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_15'),
70 | op(ops.expanded_conv, stride=1, num_outputs=320, scope='expanded_conv_16'),
71 | op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280, scope='Conv_1')
72 | ],
73 | )
74 |
75 | def mobilenetv2_scope(is_training=True,
76 | trainable=True,
77 | weight_decay=0.00004,
78 | stddev=0.09,
79 | dropout_keep_prob=0.8,
80 | bn_decay=0.997):
81 | """Defines Mobilenet training scope.
82 | In default. We do not use BN
83 |
84 | ReWrite the scope.
85 | """
86 | batch_norm_params = {
87 | 'is_training': False,
88 | 'trainable': False,
89 | 'decay': bn_decay,
90 | }
91 | with slim.arg_scope(training_scope(is_training=is_training, weight_decay=weight_decay)):
92 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_conv2d],
93 | trainable=trainable):
94 | with slim.arg_scope([slim.batch_norm], **batch_norm_params) as sc:
95 | return sc
96 |
97 |
98 |
99 | def mobilenetv2_base(img_batch, is_training=True):
100 |
101 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)):
102 |
103 | feature_to_crop, endpoints = mobilenet_v2.mobilenet_base(input_tensor=img_batch,
104 | num_classes=None,
105 | is_training=False,
106 | depth_multiplier=1.0,
107 | scope='MobilenetV2',
108 | conv_defs=V2_BASE_DEF,
109 | finegrain_classification_mode=False)
110 |
111 | # feature_to_crop = tf.Print(feature_to_crop, [tf.shape(feature_to_crop)], summarize=10, message='rpn_shape')
112 | return feature_to_crop
113 |
114 |
115 | def mobilenetv2_head(inputs, is_training=True):
116 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)):
117 | net, _ = mobilenet_v2.mobilenet(input_tensor=inputs,
118 | num_classes=None,
119 | is_training=False,
120 | depth_multiplier=1.0,
121 | scope='MobilenetV2',
122 | conv_defs=V2_HEAD_DEF,
123 | finegrain_classification_mode=False)
124 |
125 | net = tf.squeeze(net, [1, 2])
126 |
127 | return net
--------------------------------------------------------------------------------
/libs/networks/mobilenet_v2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet_v2.pyc
--------------------------------------------------------------------------------
/libs/networks/resnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 |
6 | import tensorflow as tf
7 | import tensorflow.contrib.slim as slim
8 | from libs.configs import cfgs
9 | from tensorflow.contrib.slim.nets import resnet_v1
10 | from tensorflow.contrib.slim.nets import resnet_utils
11 | from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
12 | # import tfplot as tfp
13 |
14 | def resnet_arg_scope(
15 | is_training=True, weight_decay=cfgs.WEIGHT_DECAY, batch_norm_decay=0.997,
16 | batch_norm_epsilon=1e-5, batch_norm_scale=True):
17 | '''
18 |
19 | In Default, we do not use BN to train resnet, since batch_size is too small.
20 | So is_training is False and trainable is False in the batch_norm params.
21 |
22 | '''
23 | batch_norm_params = {
24 | 'is_training': False, 'decay': batch_norm_decay,
25 | 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale,
26 | 'trainable': False,
27 | 'updates_collections': tf.GraphKeys.UPDATE_OPS
28 | }
29 |
30 | with slim.arg_scope(
31 | [slim.conv2d],
32 | weights_regularizer=slim.l2_regularizer(weight_decay),
33 | weights_initializer=slim.variance_scaling_initializer(),
34 | trainable=is_training,
35 | activation_fn=tf.nn.relu,
36 | normalizer_fn=slim.batch_norm,
37 | normalizer_params=batch_norm_params):
38 | with slim.arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
39 | return arg_sc
40 |
41 |
42 | # def add_heatmap(feature_maps, name):
43 | # '''
44 | #
45 | # :param feature_maps:[B, H, W, C]
46 | # :return:
47 | # '''
48 | #
49 | # def figure_attention(activation):
50 | # fig, ax = tfp.subplots()
51 | # im = ax.imshow(activation, cmap='jet')
52 | # fig.colorbar(im)
53 | # return fig
54 | #
55 | # heatmap = tf.reduce_sum(feature_maps, axis=-1)
56 | # heatmap = tf.squeeze(heatmap, axis=0)
57 | # tfp.summary.plot(name, figure_attention, [heatmap])
58 |
59 |
60 | def resnet_base(img_batch, scope_name, is_training=True):
61 | '''
62 | this code is derived from light-head rcnn.
63 | https://github.com/zengarden/light_head_rcnn
64 |
65 | It is convenient to freeze blocks. So we adapt this mode.
66 | '''
67 | if scope_name == 'resnet_v1_50':
68 | middle_num_units = 6
69 | elif scope_name == 'resnet_v1_101':
70 | middle_num_units = 23
71 | else:
72 | raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr')
73 |
74 | blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
75 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
76 | # use stride 1 for the last conv4 layer.
77 |
78 | resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)]
79 | # when use fpn . stride list is [1, 2, 2]
80 |
81 | with slim.arg_scope(resnet_arg_scope(is_training=False)):
82 | with tf.variable_scope(scope_name, scope_name):
83 | # Do the first few layers manually, because 'SAME' padding can behave inconsistently
84 | # for images of different sizes: sometimes 0, sometimes 1
85 | net = resnet_utils.conv2d_same(
86 | img_batch, 64, 7, stride=2, scope='conv1')
87 | net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
88 | net = slim.max_pool2d(
89 | net, [3, 3], stride=2, padding='VALID', scope='pool1')
90 |
91 | not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True]
92 | # Fixed_Blocks can be 1~3
93 |
94 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
95 | C2, _ = resnet_v1.resnet_v1(net,
96 | blocks[0:1],
97 | global_pool=False,
98 | include_root_block=False,
99 | scope=scope_name)
100 |
101 | # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
102 | # add_heatmap(C2, 'Layer/C2')
103 |
104 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
105 | C3, _ = resnet_v1.resnet_v1(C2,
106 | blocks[1:2],
107 | global_pool=False,
108 | include_root_block=False,
109 | scope=scope_name)
110 | # add_heatmap(C3, name='Layer/C3')
111 | # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
112 |
113 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
114 | C4, _ = resnet_v1.resnet_v1(C3,
115 | blocks[2:3],
116 | global_pool=False,
117 | include_root_block=False,
118 | scope=scope_name)
119 | # add_heatmap(C4, name='Layer/C4')
120 | # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
121 | return C4
122 |
123 |
124 | def restnet_head(input, is_training, scope_name):
125 | block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]
126 |
127 | with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
128 | C5, _ = resnet_v1.resnet_v1(input,
129 | block4,
130 | global_pool=False,
131 | include_root_block=False,
132 | scope=scope_name)
133 | # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
134 | C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling')
135 | # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape')
136 |
137 | # global average pooling C5 to obtain fc layers
138 | return C5_flatten
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
--------------------------------------------------------------------------------
/libs/networks/resnet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/resnet.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__init__.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/inception_resnet_v2.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/inception_resnet_v2.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/mobilenet_v1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/mobilenet_v1.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/resnet_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/resnet_utils.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/resnet_v1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/resnet_v1.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/__pycache__/vgg.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/vgg.cpython-35.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/alexnet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a model definition for AlexNet.
16 |
17 | This work was first described in:
18 | ImageNet Classification with Deep Convolutional Neural Networks
19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
20 |
21 | and later refined in:
22 | One weird trick for parallelizing convolutional neural networks
23 | Alex Krizhevsky, 2014
24 |
25 | Here we provide the implementation proposed in "One weird trick" and not
26 | "ImageNet Classification", as per the paper, the LRN layers have been removed.
27 |
28 | Usage:
29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
30 | outputs, end_points = alexnet.alexnet_v2(inputs)
31 |
32 | @@alexnet_v2
33 | """
34 |
35 | from __future__ import absolute_import
36 | from __future__ import division
37 | from __future__ import print_function
38 |
39 | import tensorflow as tf
40 |
41 | slim = tf.contrib.slim
42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
43 |
44 |
45 | def alexnet_v2_arg_scope(weight_decay=0.0005):
46 | with slim.arg_scope([slim.conv2d, slim.fully_connected],
47 | activation_fn=tf.nn.relu,
48 | biases_initializer=tf.constant_initializer(0.1),
49 | weights_regularizer=slim.l2_regularizer(weight_decay)):
50 | with slim.arg_scope([slim.conv2d], padding='SAME'):
51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
52 | return arg_sc
53 |
54 |
55 | def alexnet_v2(inputs,
56 | num_classes=1000,
57 | is_training=True,
58 | dropout_keep_prob=0.5,
59 | spatial_squeeze=True,
60 | scope='alexnet_v2'):
61 | """AlexNet version 2.
62 |
63 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf
64 | Parameters from:
65 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
66 | layers-imagenet-1gpu.cfg
67 |
68 | Note: All the fully_connected layers have been transformed to conv2d layers.
69 | To use in classification mode, resize input to 224x224. To use in fully
70 | convolutional mode, set spatial_squeeze to false.
71 | The LRN layers have been removed and change the initializers from
72 | random_normal_initializer to xavier_initializer.
73 |
74 | Args:
75 | inputs: a tensor of size [batch_size, height, width, channels].
76 | num_classes: number of predicted classes.
77 | is_training: whether or not the model is being trained.
78 | dropout_keep_prob: the probability that activations are kept in the dropout
79 | layers during training.
80 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
81 | outputs. Useful to remove unnecessary dimensions for classification.
82 | scope: Optional scope for the variables.
83 |
84 | Returns:
85 | the last op containing the log predictions and end_points dict.
86 | """
87 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
88 | end_points_collection = sc.name + '_end_points'
89 | # Collect outputs for conv2d, fully_connected and max_pool2d.
90 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
91 | outputs_collections=[end_points_collection]):
92 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
93 | scope='conv1')
94 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
95 | net = slim.conv2d(net, 192, [5, 5], scope='conv2')
96 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
97 | net = slim.conv2d(net, 384, [3, 3], scope='conv3')
98 | net = slim.conv2d(net, 384, [3, 3], scope='conv4')
99 | net = slim.conv2d(net, 256, [3, 3], scope='conv5')
100 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
101 |
102 | # Use conv2d instead of fully_connected layers.
103 | with slim.arg_scope([slim.conv2d],
104 | weights_initializer=trunc_normal(0.005),
105 | biases_initializer=tf.constant_initializer(0.1)):
106 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
107 | scope='fc6')
108 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
109 | scope='dropout6')
110 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
111 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
112 | scope='dropout7')
113 | net = slim.conv2d(net, num_classes, [1, 1],
114 | activation_fn=None,
115 | normalizer_fn=None,
116 | biases_initializer=tf.zeros_initializer(),
117 | scope='fc8')
118 |
119 | # Convert end_points_collection into a end_point dict.
120 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
121 | if spatial_squeeze:
122 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
123 | end_points[sc.name + '/fc8'] = net
124 | return net, end_points
125 | alexnet_v2.default_image_size = 224
126 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/alexnet_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.slim_nets.alexnet."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from nets import alexnet
23 |
24 | slim = tf.contrib.slim
25 |
26 |
27 | class AlexnetV2Test(tf.test.TestCase):
28 |
29 | def testBuild(self):
30 | batch_size = 5
31 | height, width = 224, 224
32 | num_classes = 1000
33 | with self.test_session():
34 | inputs = tf.random_uniform((batch_size, height, width, 3))
35 | logits, _ = alexnet.alexnet_v2(inputs, num_classes)
36 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
37 | self.assertListEqual(logits.get_shape().as_list(),
38 | [batch_size, num_classes])
39 |
40 | def testFullyConvolutional(self):
41 | batch_size = 1
42 | height, width = 300, 400
43 | num_classes = 1000
44 | with self.test_session():
45 | inputs = tf.random_uniform((batch_size, height, width, 3))
46 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
47 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
48 | self.assertListEqual(logits.get_shape().as_list(),
49 | [batch_size, 4, 7, num_classes])
50 |
51 | def testEndPoints(self):
52 | batch_size = 5
53 | height, width = 224, 224
54 | num_classes = 1000
55 | with self.test_session():
56 | inputs = tf.random_uniform((batch_size, height, width, 3))
57 | _, end_points = alexnet.alexnet_v2(inputs, num_classes)
58 | expected_names = ['alexnet_v2/conv1',
59 | 'alexnet_v2/pool1',
60 | 'alexnet_v2/conv2',
61 | 'alexnet_v2/pool2',
62 | 'alexnet_v2/conv3',
63 | 'alexnet_v2/conv4',
64 | 'alexnet_v2/conv5',
65 | 'alexnet_v2/pool5',
66 | 'alexnet_v2/fc6',
67 | 'alexnet_v2/fc7',
68 | 'alexnet_v2/fc8'
69 | ]
70 | self.assertSetEqual(set(end_points.keys()), set(expected_names))
71 |
72 | def testModelVariables(self):
73 | batch_size = 5
74 | height, width = 224, 224
75 | num_classes = 1000
76 | with self.test_session():
77 | inputs = tf.random_uniform((batch_size, height, width, 3))
78 | alexnet.alexnet_v2(inputs, num_classes)
79 | expected_names = ['alexnet_v2/conv1/weights',
80 | 'alexnet_v2/conv1/biases',
81 | 'alexnet_v2/conv2/weights',
82 | 'alexnet_v2/conv2/biases',
83 | 'alexnet_v2/conv3/weights',
84 | 'alexnet_v2/conv3/biases',
85 | 'alexnet_v2/conv4/weights',
86 | 'alexnet_v2/conv4/biases',
87 | 'alexnet_v2/conv5/weights',
88 | 'alexnet_v2/conv5/biases',
89 | 'alexnet_v2/fc6/weights',
90 | 'alexnet_v2/fc6/biases',
91 | 'alexnet_v2/fc7/weights',
92 | 'alexnet_v2/fc7/biases',
93 | 'alexnet_v2/fc8/weights',
94 | 'alexnet_v2/fc8/biases',
95 | ]
96 | model_variables = [v.op.name for v in slim.get_model_variables()]
97 | self.assertSetEqual(set(model_variables), set(expected_names))
98 |
99 | def testEvaluation(self):
100 | batch_size = 2
101 | height, width = 224, 224
102 | num_classes = 1000
103 | with self.test_session():
104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3))
105 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
106 | self.assertListEqual(logits.get_shape().as_list(),
107 | [batch_size, num_classes])
108 | predictions = tf.argmax(logits, 1)
109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
110 |
111 | def testTrainEvalWithReuse(self):
112 | train_batch_size = 2
113 | eval_batch_size = 1
114 | train_height, train_width = 224, 224
115 | eval_height, eval_width = 300, 400
116 | num_classes = 1000
117 | with self.test_session():
118 | train_inputs = tf.random_uniform(
119 | (train_batch_size, train_height, train_width, 3))
120 | logits, _ = alexnet.alexnet_v2(train_inputs)
121 | self.assertListEqual(logits.get_shape().as_list(),
122 | [train_batch_size, num_classes])
123 | tf.get_variable_scope().reuse_variables()
124 | eval_inputs = tf.random_uniform(
125 | (eval_batch_size, eval_height, eval_width, 3))
126 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
127 | spatial_squeeze=False)
128 | self.assertListEqual(logits.get_shape().as_list(),
129 | [eval_batch_size, 4, 7, num_classes])
130 | logits = tf.reduce_mean(logits, [1, 2])
131 | predictions = tf.argmax(logits, 1)
132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
133 |
134 | def testForward(self):
135 | batch_size = 1
136 | height, width = 224, 224
137 | with self.test_session() as sess:
138 | inputs = tf.random_uniform((batch_size, height, width, 3))
139 | logits, _ = alexnet.alexnet_v2(inputs)
140 | sess.run(tf.global_variables_initializer())
141 | output = sess.run(logits)
142 | self.assertTrue(output.any())
143 |
144 | if __name__ == '__main__':
145 | tf.test.main()
146 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/cifarnet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the CIFAR-10 model definition."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | slim = tf.contrib.slim
24 |
25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)
26 |
27 |
28 | def cifarnet(images, num_classes=10, is_training=False,
29 | dropout_keep_prob=0.5,
30 | prediction_fn=slim.softmax,
31 | scope='CifarNet'):
32 | """Creates a variant of the CifarNet model.
33 |
34 | Note that since the output is a set of 'logits', the values fall in the
35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a
36 | probability distribution over the characters, one will need to convert them
37 | using the softmax function:
38 |
39 | logits = cifarnet.cifarnet(images, is_training=False)
40 | probabilities = tf.nn.softmax(logits)
41 | predictions = tf.argmax(logits, 1)
42 |
43 | Args:
44 | images: A batch of `Tensors` of size [batch_size, height, width, channels].
45 | num_classes: the number of classes in the dataset.
46 | is_training: specifies whether or not we're currently training the model.
47 | This variable will determine the behaviour of the dropout layer.
48 | dropout_keep_prob: the percentage of activation values that are retained.
49 | prediction_fn: a function to get predictions out of logits.
50 | scope: Optional variable_scope.
51 |
52 | Returns:
53 | logits: the pre-softmax activations, a tensor of size
54 | [batch_size, `num_classes`]
55 | end_points: a dictionary from components of the network to the corresponding
56 | activation.
57 | """
58 | end_points = {}
59 |
60 | with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):
61 | net = slim.conv2d(images, 64, [5, 5], scope='conv1')
62 | end_points['conv1'] = net
63 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
64 | end_points['pool1'] = net
65 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
66 | net = slim.conv2d(net, 64, [5, 5], scope='conv2')
67 | end_points['conv2'] = net
68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
69 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
70 | end_points['pool2'] = net
71 | net = slim.flatten(net)
72 | end_points['Flatten'] = net
73 | net = slim.fully_connected(net, 384, scope='fc3')
74 | end_points['fc3'] = net
75 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
76 | scope='dropout3')
77 | net = slim.fully_connected(net, 192, scope='fc4')
78 | end_points['fc4'] = net
79 | logits = slim.fully_connected(net, num_classes,
80 | biases_initializer=tf.zeros_initializer(),
81 | weights_initializer=trunc_normal(1/192.0),
82 | weights_regularizer=None,
83 | activation_fn=None,
84 | scope='logits')
85 |
86 | end_points['Logits'] = logits
87 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
88 |
89 | return logits, end_points
90 | cifarnet.default_image_size = 32
91 |
92 |
93 | def cifarnet_arg_scope(weight_decay=0.004):
94 | """Defines the default cifarnet argument scope.
95 |
96 | Args:
97 | weight_decay: The weight decay to use for regularizing the model.
98 |
99 | Returns:
100 | An `arg_scope` to use for the inception v3 model.
101 | """
102 | with slim.arg_scope(
103 | [slim.conv2d],
104 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
105 | activation_fn=tf.nn.relu):
106 | with slim.arg_scope(
107 | [slim.fully_connected],
108 | biases_initializer=tf.constant_initializer(0.1),
109 | weights_initializer=trunc_normal(0.04),
110 | weights_regularizer=slim.l2_regularizer(weight_decay),
111 | activation_fn=tf.nn.relu) as sc:
112 | return sc
113 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings all inception models under one namespace."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | from nets.inception_resnet_v2 import inception_resnet_v2_base
25 | from nets.inception_v1 import inception_v1
26 | from nets.inception_v1 import inception_v1_arg_scope
27 | from nets.inception_v1 import inception_v1_base
28 | from nets.inception_v2 import inception_v2
29 | from nets.inception_v2 import inception_v2_arg_scope
30 | from nets.inception_v2 import inception_v2_base
31 | from nets.inception_v3 import inception_v3
32 | from nets.inception_v3 import inception_v3_arg_scope
33 | from nets.inception_v3 import inception_v3_base
34 | from nets.inception_v4 import inception_v4
35 | from nets.inception_v4 import inception_v4_arg_scope
36 | from nets.inception_v4 import inception_v4_base
37 | # pylint: enable=unused-import
38 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception_resnet_v2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/inception_resnet_v2.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 |
17 | Usage of arg scope:
18 | with slim.arg_scope(inception_arg_scope()):
19 | logits, end_points = inception.inception_v3(images, num_classes,
20 | is_training=is_training)
21 |
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 |
27 | import tensorflow as tf
28 |
29 | slim = tf.contrib.slim
30 |
31 |
32 | def inception_arg_scope(weight_decay=0.00004,
33 | use_batch_norm=True,
34 | batch_norm_decay=0.9997,
35 | batch_norm_epsilon=0.001):
36 | """Defines the default arg scope for inception models.
37 |
38 | Args:
39 | weight_decay: The weight decay to use for regularizing the model.
40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution.
41 | batch_norm_decay: Decay for batch norm moving average.
42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero
43 | in batch norm.
44 |
45 | Returns:
46 | An `arg_scope` to use for the inception models.
47 | """
48 | batch_norm_params = {
49 | # Decay for the moving averages.
50 | 'decay': batch_norm_decay,
51 | # epsilon to prevent 0s in variance.
52 | 'epsilon': batch_norm_epsilon,
53 | # collection containing update_ops.
54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS,
55 | }
56 | if use_batch_norm:
57 | normalizer_fn = slim.batch_norm
58 | normalizer_params = batch_norm_params
59 | else:
60 | normalizer_fn = None
61 | normalizer_params = {}
62 | # Set weight_decay for weights in Conv and FC layers.
63 | with slim.arg_scope([slim.conv2d, slim.fully_connected],
64 | weights_regularizer=slim.l2_regularizer(weight_decay)):
65 | with slim.arg_scope(
66 | [slim.conv2d],
67 | weights_initializer=slim.variance_scaling_initializer(),
68 | activation_fn=tf.nn.relu,
69 | normalizer_fn=normalizer_fn,
70 | normalizer_params=normalizer_params) as sc:
71 | return sc
72 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/lenet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the LeNet model definition."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | slim = tf.contrib.slim
24 |
25 |
26 | def lenet(images, num_classes=10, is_training=False,
27 | dropout_keep_prob=0.5,
28 | prediction_fn=slim.softmax,
29 | scope='LeNet'):
30 | """Creates a variant of the LeNet model.
31 |
32 | Note that since the output is a set of 'logits', the values fall in the
33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a
34 | probability distribution over the characters, one will need to convert them
35 | using the softmax function:
36 |
37 | logits = lenet.lenet(images, is_training=False)
38 | probabilities = tf.nn.softmax(logits)
39 | predictions = tf.argmax(logits, 1)
40 |
41 | Args:
42 | images: A batch of `Tensors` of size [batch_size, height, width, channels].
43 | num_classes: the number of classes in the dataset.
44 | is_training: specifies whether or not we're currently training the model.
45 | This variable will determine the behaviour of the dropout layer.
46 | dropout_keep_prob: the percentage of activation values that are retained.
47 | prediction_fn: a function to get predictions out of logits.
48 | scope: Optional variable_scope.
49 |
50 | Returns:
51 | logits: the pre-softmax activations, a tensor of size
52 | [batch_size, `num_classes`]
53 | end_points: a dictionary from components of the network to the corresponding
54 | activation.
55 | """
56 | end_points = {}
57 |
58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1')
60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2')
62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
63 | net = slim.flatten(net)
64 | end_points['Flatten'] = net
65 |
66 | net = slim.fully_connected(net, 1024, scope='fc3')
67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
68 | scope='dropout3')
69 | logits = slim.fully_connected(net, num_classes, activation_fn=None,
70 | scope='fc4')
71 |
72 | end_points['Logits'] = logits
73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
74 |
75 | return logits, end_points
76 | lenet.default_image_size = 28
77 |
78 |
79 | def lenet_arg_scope(weight_decay=0.0):
80 | """Defines the default lenet argument scope.
81 |
82 | Args:
83 | weight_decay: The weight decay to use for regularizing the model.
84 |
85 | Returns:
86 | An `arg_scope` to use for the inception v3 model.
87 | """
88 | with slim.arg_scope(
89 | [slim.conv2d, slim.fully_connected],
90 | weights_regularizer=slim.l2_regularizer(weight_decay),
91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
92 | activation_fn=tf.nn.relu) as sc:
93 | return sc
94 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.md:
--------------------------------------------------------------------------------
1 | # MobileNet_v1
2 |
3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
4 |
5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature.
6 |
7 | 
8 |
9 | # Pre-trained Models
10 |
11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the
12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/)
13 | image classification dataset. Accuracies were computed by evaluating using a single image crop.
14 |
15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy |
16 | :----:|:------------:|:----------:|:-------:|:-------:|
17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5|
18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9|
19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5|
20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3|
21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2|
22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3|
23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1|
24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6|
25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4|
26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0|
27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5|
28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6|
29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0|
30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6|
31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7|
32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2|
33 |
34 |
35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint:
36 |
37 | ```shell
38 | $ CHECKPOINT_DIR=/tmp/checkpoints
39 | $ mkdir ${CHECKPOINT_DIR}
40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz
41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz
42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR}
43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz
44 | ```
45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md).
46 |
47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
48 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/mobilenet_v1.png
--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/mobilenet_v1.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/nets_factory.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | import functools
21 |
22 | import tensorflow as tf
23 |
24 | from nets import alexnet
25 | from nets import cifarnet
26 | from nets import inception
27 | from nets import lenet
28 | from nets import mobilenet_v1
29 | from nets import overfeat
30 | from nets import resnet_v1
31 | from nets import resnet_v2
32 | from nets import vgg
33 |
34 | slim = tf.contrib.slim
35 |
36 | networks_map = {'alexnet_v2': alexnet.alexnet_v2,
37 | 'cifarnet': cifarnet.cifarnet,
38 | 'overfeat': overfeat.overfeat,
39 | 'vgg_a': vgg.vgg_a,
40 | 'vgg_16': vgg.vgg_16,
41 | 'vgg_19': vgg.vgg_19,
42 | 'inception_v1': inception.inception_v1,
43 | 'inception_v2': inception.inception_v2,
44 | 'inception_v3': inception.inception_v3,
45 | 'inception_v4': inception.inception_v4,
46 | 'inception_resnet_v2': inception.inception_resnet_v2,
47 | 'lenet': lenet.lenet,
48 | 'resnet_v1_50': resnet_v1.resnet_v1_50,
49 | 'resnet_v1_101': resnet_v1.resnet_v1_101,
50 | 'resnet_v1_152': resnet_v1.resnet_v1_152,
51 | 'resnet_v1_200': resnet_v1.resnet_v1_200,
52 | 'resnet_v2_50': resnet_v2.resnet_v2_50,
53 | 'resnet_v2_101': resnet_v2.resnet_v2_101,
54 | 'resnet_v2_152': resnet_v2.resnet_v2_152,
55 | 'resnet_v2_200': resnet_v2.resnet_v2_200,
56 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1,
57 | }
58 |
59 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
60 | 'cifarnet': cifarnet.cifarnet_arg_scope,
61 | 'overfeat': overfeat.overfeat_arg_scope,
62 | 'vgg_a': vgg.vgg_arg_scope,
63 | 'vgg_16': vgg.vgg_arg_scope,
64 | 'vgg_19': vgg.vgg_arg_scope,
65 | 'inception_v1': inception.inception_v3_arg_scope,
66 | 'inception_v2': inception.inception_v3_arg_scope,
67 | 'inception_v3': inception.inception_v3_arg_scope,
68 | 'inception_v4': inception.inception_v4_arg_scope,
69 | 'inception_resnet_v2':
70 | inception.inception_resnet_v2_arg_scope,
71 | 'lenet': lenet.lenet_arg_scope,
72 | 'resnet_v1_50': resnet_v1.resnet_arg_scope,
73 | 'resnet_v1_101': resnet_v1.resnet_arg_scope,
74 | 'resnet_v1_152': resnet_v1.resnet_arg_scope,
75 | 'resnet_v1_200': resnet_v1.resnet_arg_scope,
76 | 'resnet_v2_50': resnet_v2.resnet_arg_scope,
77 | 'resnet_v2_101': resnet_v2.resnet_arg_scope,
78 | 'resnet_v2_152': resnet_v2.resnet_arg_scope,
79 | 'resnet_v2_200': resnet_v2.resnet_arg_scope,
80 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope,
81 | }
82 |
83 |
84 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
85 | """Returns a network_fn such as `logits, end_points = network_fn(images)`.
86 |
87 | Args:
88 | name: The name of the network.
89 | num_classes: The number of classes to use for classification.
90 | weight_decay: The l2 coefficient for the model weights.
91 | is_training: `True` if the model is being used for training and `False`
92 | otherwise.
93 |
94 | Returns:
95 | network_fn: A function that applies the model to a batch of images. It has
96 | the following signature:
97 | logits, end_points = network_fn(images)
98 | Raises:
99 | ValueError: If network `name` is not recognized.
100 | """
101 | if name not in networks_map:
102 | raise ValueError('Name of network unknown %s' % name)
103 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
104 | func = networks_map[name]
105 | @functools.wraps(func)
106 | def network_fn(images):
107 | with slim.arg_scope(arg_scope):
108 | return func(images, num_classes, is_training=is_training)
109 | if hasattr(func, 'default_image_size'):
110 | network_fn.default_image_size = func.default_image_size
111 |
112 | return network_fn
113 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/nets_factory_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for slim.inception."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import tensorflow as tf
23 |
24 | from nets import nets_factory
25 |
26 | slim = tf.contrib.slim
27 |
28 |
29 | class NetworksTest(tf.test.TestCase):
30 |
31 | def testGetNetworkFn(self):
32 | batch_size = 5
33 | num_classes = 1000
34 | for net in nets_factory.networks_map:
35 | with self.test_session():
36 | net_fn = nets_factory.get_network_fn(net, num_classes)
37 | # Most networks use 224 as their default_image_size
38 | image_size = getattr(net_fn, 'default_image_size', 224)
39 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
40 | logits, end_points = net_fn(inputs)
41 | self.assertTrue(isinstance(logits, tf.Tensor))
42 | self.assertTrue(isinstance(end_points, dict))
43 | self.assertEqual(logits.get_shape().as_list()[0], batch_size)
44 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
45 |
46 | def testGetNetworkFnArgScope(self):
47 | batch_size = 5
48 | num_classes = 10
49 | net = 'cifarnet'
50 | with self.test_session(use_gpu=True):
51 | net_fn = nets_factory.get_network_fn(net, num_classes)
52 | image_size = getattr(net_fn, 'default_image_size', 224)
53 | with slim.arg_scope([slim.model_variable, slim.variable],
54 | device='/CPU:0'):
55 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
56 | net_fn(inputs)
57 | weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0]
58 | self.assertDeviceEqual('/CPU:0', weights.device)
59 |
60 | if __name__ == '__main__':
61 | tf.test.main()
62 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/overfeat.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains the model definition for the OverFeat network.
16 |
17 | The definition for the network was obtained from:
18 | OverFeat: Integrated Recognition, Localization and Detection using
19 | Convolutional Networks
20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
21 | Yann LeCun, 2014
22 | http://arxiv.org/abs/1312.6229
23 |
24 | Usage:
25 | with slim.arg_scope(overfeat.overfeat_arg_scope()):
26 | outputs, end_points = overfeat.overfeat(inputs)
27 |
28 | @@overfeat
29 | """
30 | from __future__ import absolute_import
31 | from __future__ import division
32 | from __future__ import print_function
33 |
34 | import tensorflow as tf
35 |
36 | slim = tf.contrib.slim
37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
38 |
39 |
40 | def overfeat_arg_scope(weight_decay=0.0005):
41 | with slim.arg_scope([slim.conv2d, slim.fully_connected],
42 | activation_fn=tf.nn.relu,
43 | weights_regularizer=slim.l2_regularizer(weight_decay),
44 | biases_initializer=tf.zeros_initializer()):
45 | with slim.arg_scope([slim.conv2d], padding='SAME'):
46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
47 | return arg_sc
48 |
49 |
50 | def overfeat(inputs,
51 | num_classes=1000,
52 | is_training=True,
53 | dropout_keep_prob=0.5,
54 | spatial_squeeze=True,
55 | scope='overfeat'):
56 | """Contains the model definition for the OverFeat network.
57 |
58 | The definition for the network was obtained from:
59 | OverFeat: Integrated Recognition, Localization and Detection using
60 | Convolutional Networks
61 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
62 | Yann LeCun, 2014
63 | http://arxiv.org/abs/1312.6229
64 |
65 | Note: All the fully_connected layers have been transformed to conv2d layers.
66 | To use in classification mode, resize input to 231x231. To use in fully
67 | convolutional mode, set spatial_squeeze to false.
68 |
69 | Args:
70 | inputs: a tensor of size [batch_size, height, width, channels].
71 | num_classes: number of predicted classes.
72 | is_training: whether or not the model is being trained.
73 | dropout_keep_prob: the probability that activations are kept in the dropout
74 | layers during training.
75 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
76 | outputs. Useful to remove unnecessary dimensions for classification.
77 | scope: Optional scope for the variables.
78 |
79 | Returns:
80 | the last op containing the log predictions and end_points dict.
81 |
82 | """
83 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
84 | end_points_collection = sc.name + '_end_points'
85 | # Collect outputs for conv2d, fully_connected and max_pool2d
86 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
87 | outputs_collections=end_points_collection):
88 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
89 | scope='conv1')
90 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
91 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
92 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
93 | net = slim.conv2d(net, 512, [3, 3], scope='conv3')
94 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
95 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
96 | net = slim.max_pool2d(net, [2, 2], scope='pool5')
97 | with slim.arg_scope([slim.conv2d],
98 | weights_initializer=trunc_normal(0.005),
99 | biases_initializer=tf.constant_initializer(0.1)):
100 | # Use conv2d instead of fully_connected layers.
101 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
102 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
103 | scope='dropout6')
104 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
105 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
106 | scope='dropout7')
107 | net = slim.conv2d(net, num_classes, [1, 1],
108 | activation_fn=None,
109 | normalizer_fn=None,
110 | biases_initializer=tf.zeros_initializer(),
111 | scope='fc8')
112 | # Convert end_points_collection into a end_point dict.
113 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
114 | if spatial_squeeze:
115 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
116 | end_points[sc.name + '/fc8'] = net
117 | return net, end_points
118 | overfeat.default_image_size = 231
119 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/overfeat_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.slim_nets.overfeat."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from nets import overfeat
23 |
24 | slim = tf.contrib.slim
25 |
26 |
27 | class OverFeatTest(tf.test.TestCase):
28 |
29 | def testBuild(self):
30 | batch_size = 5
31 | height, width = 231, 231
32 | num_classes = 1000
33 | with self.test_session():
34 | inputs = tf.random_uniform((batch_size, height, width, 3))
35 | logits, _ = overfeat.overfeat(inputs, num_classes)
36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')
37 | self.assertListEqual(logits.get_shape().as_list(),
38 | [batch_size, num_classes])
39 |
40 | def testFullyConvolutional(self):
41 | batch_size = 1
42 | height, width = 281, 281
43 | num_classes = 1000
44 | with self.test_session():
45 | inputs = tf.random_uniform((batch_size, height, width, 3))
46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)
47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
48 | self.assertListEqual(logits.get_shape().as_list(),
49 | [batch_size, 2, 2, num_classes])
50 |
51 | def testEndPoints(self):
52 | batch_size = 5
53 | height, width = 231, 231
54 | num_classes = 1000
55 | with self.test_session():
56 | inputs = tf.random_uniform((batch_size, height, width, 3))
57 | _, end_points = overfeat.overfeat(inputs, num_classes)
58 | expected_names = ['overfeat/conv1',
59 | 'overfeat/pool1',
60 | 'overfeat/conv2',
61 | 'overfeat/pool2',
62 | 'overfeat/conv3',
63 | 'overfeat/conv4',
64 | 'overfeat/conv5',
65 | 'overfeat/pool5',
66 | 'overfeat/fc6',
67 | 'overfeat/fc7',
68 | 'overfeat/fc8'
69 | ]
70 | self.assertSetEqual(set(end_points.keys()), set(expected_names))
71 |
72 | def testModelVariables(self):
73 | batch_size = 5
74 | height, width = 231, 231
75 | num_classes = 1000
76 | with self.test_session():
77 | inputs = tf.random_uniform((batch_size, height, width, 3))
78 | overfeat.overfeat(inputs, num_classes)
79 | expected_names = ['overfeat/conv1/weights',
80 | 'overfeat/conv1/biases',
81 | 'overfeat/conv2/weights',
82 | 'overfeat/conv2/biases',
83 | 'overfeat/conv3/weights',
84 | 'overfeat/conv3/biases',
85 | 'overfeat/conv4/weights',
86 | 'overfeat/conv4/biases',
87 | 'overfeat/conv5/weights',
88 | 'overfeat/conv5/biases',
89 | 'overfeat/fc6/weights',
90 | 'overfeat/fc6/biases',
91 | 'overfeat/fc7/weights',
92 | 'overfeat/fc7/biases',
93 | 'overfeat/fc8/weights',
94 | 'overfeat/fc8/biases',
95 | ]
96 | model_variables = [v.op.name for v in slim.get_model_variables()]
97 | self.assertSetEqual(set(model_variables), set(expected_names))
98 |
99 | def testEvaluation(self):
100 | batch_size = 2
101 | height, width = 231, 231
102 | num_classes = 1000
103 | with self.test_session():
104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3))
105 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False)
106 | self.assertListEqual(logits.get_shape().as_list(),
107 | [batch_size, num_classes])
108 | predictions = tf.argmax(logits, 1)
109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
110 |
111 | def testTrainEvalWithReuse(self):
112 | train_batch_size = 2
113 | eval_batch_size = 1
114 | train_height, train_width = 231, 231
115 | eval_height, eval_width = 281, 281
116 | num_classes = 1000
117 | with self.test_session():
118 | train_inputs = tf.random_uniform(
119 | (train_batch_size, train_height, train_width, 3))
120 | logits, _ = overfeat.overfeat(train_inputs)
121 | self.assertListEqual(logits.get_shape().as_list(),
122 | [train_batch_size, num_classes])
123 | tf.get_variable_scope().reuse_variables()
124 | eval_inputs = tf.random_uniform(
125 | (eval_batch_size, eval_height, eval_width, 3))
126 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False,
127 | spatial_squeeze=False)
128 | self.assertListEqual(logits.get_shape().as_list(),
129 | [eval_batch_size, 2, 2, num_classes])
130 | logits = tf.reduce_mean(logits, [1, 2])
131 | predictions = tf.argmax(logits, 1)
132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
133 |
134 | def testForward(self):
135 | batch_size = 1
136 | height, width = 231, 231
137 | with self.test_session() as sess:
138 | inputs = tf.random_uniform((batch_size, height, width, 3))
139 | logits, _ = overfeat.overfeat(inputs)
140 | sess.run(tf.global_variables_initializer())
141 | output = sess.run(logits)
142 | self.assertTrue(output.any())
143 |
144 | if __name__ == '__main__':
145 | tf.test.main()
146 |
--------------------------------------------------------------------------------
/libs/networks/slim_nets/resnet_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/resnet_utils.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/resnet_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/resnet_v1.pyc
--------------------------------------------------------------------------------
/libs/networks/slim_nets/vgg.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/vgg.pyc
--------------------------------------------------------------------------------
/libs/val_libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/__init__.py
--------------------------------------------------------------------------------
/libs/val_libs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/__init__.pyc
--------------------------------------------------------------------------------
/libs/val_libs/voc_eval.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/voc_eval.pyc
--------------------------------------------------------------------------------
/output/trained_weights/README.md:
--------------------------------------------------------------------------------
1 | Please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow) by this project, then put it here.
--------------------------------------------------------------------------------
/scalars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/scalars.png
--------------------------------------------------------------------------------
/tools/FasterRCNN_20180516_mobile.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/FasterRCNN_20180516_mobile.jpg
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/__init__.py
--------------------------------------------------------------------------------
/tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/__init__.pyc
--------------------------------------------------------------------------------
/tools/demos/000058.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000058.jpg
--------------------------------------------------------------------------------
/tools/demos/000108.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000108.jpg
--------------------------------------------------------------------------------
/tools/demos/000237.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000237.jpg
--------------------------------------------------------------------------------
/tools/demos/000449.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000449.jpg
--------------------------------------------------------------------------------
/tools/demos/000611.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000611.jpg
--------------------------------------------------------------------------------
/tools/demos/000706.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000706.jpg
--------------------------------------------------------------------------------
/tools/demos/000719.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000719.jpg
--------------------------------------------------------------------------------
/tools/demos/004640.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/004640.jpg
--------------------------------------------------------------------------------
/tools/inference.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import os, sys
8 | import tensorflow as tf
9 | import time
10 | import cv2
11 | import argparse
12 | import numpy as np
13 | sys.path.append("../")
14 |
15 | from data.io.image_preprocess import short_side_resize_for_inference_data
16 | from libs.configs import cfgs
17 | from libs.networks import build_whole_network
18 | from libs.box_utils import draw_box_in_img
19 | from help_utils import tools
20 |
21 |
22 | def detect(det_net, inference_save_path, real_test_imgname_list):
23 |
24 | # 1. preprocess img
25 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR
26 | img_batch = tf.cast(img_plac, tf.float32)
27 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
28 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
29 | length_limitation=cfgs.IMG_MAX_LENGTH)
30 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
31 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3]
32 |
33 | detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
34 | input_img_batch=img_batch,
35 | gtboxes_batch=None)
36 |
37 | init_op = tf.group(
38 | tf.global_variables_initializer(),
39 | tf.local_variables_initializer()
40 | )
41 |
42 | restorer, restore_ckpt = det_net.get_restorer()
43 |
44 | config = tf.ConfigProto()
45 | config.gpu_options.allow_growth = True
46 |
47 | with tf.Session(config=config) as sess:
48 | sess.run(init_op)
49 | if not restorer is None:
50 | restorer.restore(sess, restore_ckpt)
51 | print('restore model')
52 |
53 | for i, a_img_name in enumerate(real_test_imgname_list):
54 |
55 | raw_img = cv2.imread(a_img_name)
56 | start = time.time()
57 | resized_img, detected_boxes, detected_scores, detected_categories = \
58 | sess.run(
59 | [img_batch, detection_boxes, detection_scores, detection_category],
60 | feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB
61 | )
62 | end = time.time()
63 | # print("{} cost time : {} ".format(img_name, (end - start)))
64 |
65 | show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
66 | show_scores = detected_scores[show_indices]
67 | show_boxes = detected_boxes[show_indices]
68 | show_categories = detected_categories[show_indices]
69 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(np.squeeze(resized_img, 0),
70 | boxes=show_boxes,
71 | labels=show_categories,
72 | scores=show_scores)
73 | nake_name = a_img_name.split('/')[-1]
74 | # print (inference_save_path + '/' + nake_name)
75 | cv2.imwrite(inference_save_path + '/' + nake_name,
76 | final_detections[:, :, ::-1])
77 |
78 | tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
79 |
80 |
81 | def inference(test_dir, inference_save_path):
82 |
83 | test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir)
84 | if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))]
85 | assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \
86 | ' Note that, we only support img format of (.jpg, .png, and .tiff) '
87 |
88 | faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
89 | is_training=False)
90 | detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list)
91 |
92 |
93 | def parse_args():
94 | """
95 | Parse input arguments
96 | """
97 | parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir')
98 | parser.add_argument('--data_dir', dest='data_dir',
99 | help='data path',
100 | default='demos', type=str)
101 | parser.add_argument('--save_dir', dest='save_dir',
102 | help='demo imgs to save',
103 | default='inference_results', type=str)
104 | parser.add_argument('--GPU', dest='GPU',
105 | help='gpu id ',
106 | default='0', type=str)
107 |
108 | if len(sys.argv) == 1:
109 | parser.print_help()
110 | sys.exit(1)
111 |
112 | args = parser.parse_args()
113 |
114 | return args
115 | if __name__ == '__main__':
116 |
117 | args = parse_args()
118 | print('Called with args:')
119 | print(args)
120 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU
121 | inference(args.data_dir,
122 | inference_save_path=args.save_dir)
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/tools/inference_results/000058.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000058.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000108.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000108.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000237.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000237.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000449.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000449.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000611.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000611.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000706.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000706.jpg
--------------------------------------------------------------------------------
/tools/inference_results/000719.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000719.jpg
--------------------------------------------------------------------------------
/tools/inference_results/004640.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/004640.jpg
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import print_function
5 | from __future__ import division
6 |
7 | import os, sys
8 | import tensorflow as tf
9 | import time
10 | import cv2
11 | import argparse
12 | import numpy as np
13 | sys.path.append("../")
14 |
15 | from data.io.image_preprocess import short_side_resize_for_inference_data
16 | from libs.configs import cfgs
17 | from libs.networks import build_whole_network
18 | from libs.box_utils import draw_box_in_img
19 | from help_utils import tools
20 |
21 |
22 | def detect(det_net, inference_save_path, real_test_imgname_list):
23 |
24 | # 1. preprocess img
25 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR
26 | img_batch = tf.cast(img_plac, tf.float32)
27 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
28 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
29 | length_limitation=cfgs.IMG_MAX_LENGTH)
30 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
31 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3]
32 |
33 | detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
34 | input_img_batch=img_batch,
35 | gtboxes_batch=None)
36 |
37 | init_op = tf.group(
38 | tf.global_variables_initializer(),
39 | tf.local_variables_initializer()
40 | )
41 |
42 | restorer, restore_ckpt = det_net.get_restorer()
43 |
44 | config = tf.ConfigProto()
45 | config.gpu_options.allow_growth = True
46 |
47 | with tf.Session(config=config) as sess:
48 | sess.run(init_op)
49 | if not restorer is None:
50 | restorer.restore(sess, restore_ckpt)
51 | print('restore model')
52 |
53 | for i, a_img_name in enumerate(real_test_imgname_list):
54 |
55 | raw_img = cv2.imread(a_img_name)
56 | start = time.time()
57 | resized_img, detected_boxes, detected_scores, detected_categories = \
58 | sess.run(
59 | [img_batch, detection_boxes, detection_scores, detection_category],
60 | feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB
61 | )
62 | end = time.time()
63 | # print("{} cost time : {} ".format(img_name, (end - start)))
64 |
65 | raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
66 |
67 | xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
68 | detected_boxes[:, 2], detected_boxes[:, 3]
69 |
70 | resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]
71 |
72 | xmin = xmin * raw_w / resized_w
73 | xmax = xmax * raw_w / resized_w
74 |
75 | ymin = ymin * raw_h / resized_h
76 | ymax = ymax * raw_h / resized_h
77 |
78 | detected_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax]))
79 |
80 | show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
81 | show_scores = detected_scores[show_indices]
82 | show_boxes = detected_boxes[show_indices]
83 | show_categories = detected_categories[show_indices]
84 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(raw_img - np.array(cfgs.PIXEL_MEAN),
85 | boxes=show_boxes,
86 | labels=show_categories,
87 | scores=show_scores)
88 | nake_name = a_img_name.split('/')[-1]
89 | # print (inference_save_path + '/' + nake_name)
90 | cv2.imwrite(inference_save_path + '/' + nake_name,
91 | final_detections[:, :, ::-1])
92 |
93 | tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
94 |
95 |
96 | def test(test_dir, inference_save_path):
97 |
98 | test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir)
99 | if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))]
100 | assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \
101 | ' Note that, we only support img format of (.jpg, .png, and .tiff) '
102 |
103 | faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
104 | is_training=False)
105 | detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list)
106 |
107 |
108 | def parse_args():
109 | """
110 | Parse input arguments
111 | """
112 | parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir')
113 | parser.add_argument('--data_dir', dest='data_dir',
114 | help='data path',
115 | default='demos', type=str)
116 | parser.add_argument('--save_dir', dest='save_dir',
117 | help='demo imgs to save',
118 | default='inference_results', type=str)
119 | parser.add_argument('--GPU', dest='GPU',
120 | help='gpu id ',
121 | default='0', type=str)
122 |
123 | if len(sys.argv) == 1:
124 | parser.print_help()
125 | sys.exit(1)
126 |
127 | args = parser.parse_args()
128 |
129 | return args
130 | if __name__ == '__main__':
131 |
132 | args = parse_args()
133 | print('Called with args:')
134 | print(args)
135 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU
136 | test(args.data_dir,
137 | inference_save_path=args.save_dir)
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/voc_2007.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/voc_2007.gif
--------------------------------------------------------------------------------