├── .idea ├── Faster-RCNN_Tensorflow.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md ├── data ├── __init__.py ├── __init__.pyc ├── io │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── read_tfrecord.cpython-35.pyc │ ├── convert_data_to_tfrecord.py │ ├── convert_data_to_tfrecord_raw.py │ ├── image_preprocess.py │ ├── image_preprocess.pyc │ ├── read_tfrecord.py │ └── read_tfrecord.pyc ├── lib_coco │ ├── PythonAPI │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── pycocoDemo.ipynb │ │ ├── pycocoEvalDemo.ipynb │ │ ├── pycocotools │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── _mask.c │ │ │ ├── _mask.pyx │ │ │ ├── _mask.so │ │ │ ├── coco.py │ │ │ ├── coco.pyc │ │ │ ├── cocoeval.py │ │ │ ├── mask.py │ │ │ └── mask.pyc │ │ └── setup.py │ ├── __init__.py │ ├── __init__.pyc │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ ├── get_coco_next_batch.py │ └── get_coco_next_batch.pyc ├── pretrained_weights │ ├── README.md │ └── mobilenet │ │ └── README.md └── tfrecord │ ├── pascal_test.tfrecord │ └── pascal_train.tfrecord ├── help_utils ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-35.pyc │ └── tools.cpython-35.pyc ├── tools.py └── tools.pyc ├── images.png ├── libs ├── __init__.py ├── __init__.pyc ├── __pycache__ │ └── __init__.cpython-35.pyc ├── box_utils │ ├── __init__.py │ ├── __init__.pyc │ ├── anchor_utils.py │ ├── anchor_utils.pyc │ ├── boxes_utils.py │ ├── boxes_utils.pyc │ ├── cython_utils │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── cython_bbox.so │ │ ├── cython_nms.so │ │ ├── nms.c │ │ ├── nms.pyx │ │ └── setup.py │ ├── draw_box_in_img.py │ ├── draw_box_in_img.pyc │ ├── encode_and_decode.py │ ├── encode_and_decode.pyc │ ├── show_box_in_tensor.py │ ├── show_box_in_tensor.pyc │ ├── tf_ops.py │ └── tf_ops.pyc ├── configs │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── cfgs.cpython-35.pyc │ ├── cfgs.py │ ├── cfgs.pyc │ ├── cfgs_coco.py │ ├── cfgs_mobilenetv2.py │ ├── cfgs_res101.py │ └── cfgs_res50.py ├── detection_oprations │ ├── __init__.py │ ├── __init__.pyc │ ├── anchor_target_layer_without_boxweight.py │ ├── anchor_target_layer_without_boxweight.pyc │ ├── proposal_opr.py │ ├── proposal_opr.pyc │ ├── proposal_target_layer.py │ └── proposal_target_layer.pyc ├── export_pbs │ ├── __init__.py │ ├── exportPb.py │ └── test_exportPb.py ├── label_name_dict │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── remote_sensing_dict.cpython-35.pyc │ ├── coco_dict.py │ ├── coco_dict.pyc │ ├── label_dict.py │ ├── remote_sensing_dict.py │ └── remote_sensing_dict.pyc ├── losses │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── losses.cpython-35.pyc │ ├── losses.py │ ├── losses.pyc │ ├── tfapi_loss.py │ └── tfapi_loss.pyc ├── networks │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ └── __init__.cpython-35.pyc │ ├── build_whole_network.py │ ├── build_whole_network.pyc │ ├── mobilenet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── conv_blocks.py │ │ ├── conv_blocks.pyc │ │ ├── mobilenet.py │ │ ├── mobilenet.pyc │ │ ├── mobilenet_v2.py │ │ ├── mobilenet_v2.pyc │ │ └── mobilenet_v2_test.py │ ├── mobilenet_v2.py │ ├── mobilenet_v2.pyc │ ├── resnet.py │ ├── resnet.pyc │ └── slim_nets │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── inception_resnet_v2.cpython-35.pyc │ │ ├── mobilenet_v1.cpython-35.pyc │ │ ├── resnet_utils.cpython-35.pyc │ │ ├── resnet_v1.cpython-35.pyc │ │ └── vgg.cpython-35.pyc │ │ ├── alexnet.py │ │ ├── alexnet_test.py │ │ ├── cifarnet.py │ │ ├── inception.py │ │ ├── inception_resnet_v2.py │ │ ├── inception_resnet_v2.pyc │ │ ├── inception_resnet_v2_test.py │ │ ├── inception_utils.py │ │ ├── inception_v1.py │ │ ├── inception_v1_test.py │ │ ├── inception_v2.py │ │ ├── inception_v2_test.py │ │ ├── inception_v3.py │ │ ├── inception_v3_test.py │ │ ├── inception_v4.py │ │ ├── inception_v4_test.py │ │ ├── lenet.py │ │ ├── mobilenet_v1.md │ │ ├── mobilenet_v1.png │ │ ├── mobilenet_v1.py │ │ ├── mobilenet_v1.pyc │ │ ├── mobilenet_v1_test.py │ │ ├── nets_factory.py │ │ ├── nets_factory_test.py │ │ ├── overfeat.py │ │ ├── overfeat_test.py │ │ ├── resnet_utils.py │ │ ├── resnet_utils.pyc │ │ ├── resnet_v1.py │ │ ├── resnet_v1.pyc │ │ ├── resnet_v1_test.py │ │ ├── resnet_v2.py │ │ ├── resnet_v2_test.py │ │ ├── vgg.py │ │ ├── vgg.pyc │ │ └── vgg_test.py ├── setup.py └── val_libs │ ├── __init__.py │ ├── __init__.pyc │ ├── voc_eval.py │ └── voc_eval.pyc ├── output └── trained_weights │ └── README.md ├── scalars.png ├── tools ├── FasterRCNN_20180516_mobile.jpg ├── __init__.py ├── __init__.pyc ├── demos │ ├── 000058.jpg │ ├── 000108.jpg │ ├── 000237.jpg │ ├── 000449.jpg │ ├── 000611.jpg │ ├── 000706.jpg │ ├── 000719.jpg │ └── 004640.jpg ├── eval.py ├── inference.py ├── inference_for_coco.py ├── inference_results │ ├── 000058.jpg │ ├── 000108.jpg │ ├── 000237.jpg │ ├── 000449.jpg │ ├── 000611.jpg │ ├── 000706.jpg │ ├── 000719.jpg │ └── 004640.jpg ├── test.py ├── train.py └── train_with_placeholder.py └── voc_2007.gif /.idea/Faster-RCNN_Tensorflow.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Faster-RCNN_Tensorflow 2 | 3 | ## Abstract 4 | This is a tensorflow re-implementation of [Faster R-CNN: Towards Real-Time ObjectDetection with Region Proposal Networks](https://arxiv.org/abs/1506.01497). 5 | 6 | This project is completed by [YangXue](https://github.com/yangxue0827) and [YangJirui](https://github.com/yangJirui). Some relevant projects ([R2CNN](https://github.com/DetectionTeamUCAS/R2CNN_Faster-RCNN_Tensorflow)) and ([RRPN](https://github.com/DetectionTeamUCAS/RRPN_Faster-RCNN_Tensorflow)) based on this code. 7 | 8 | ## Train on VOC 2007 trainval and test on VOC 2007 test (PS. This project also support coco training.) 9 | ![1](voc_2007.gif) 10 | 11 | ## Comparison 12 | ### use_voc2012_metric 13 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat | motorbike | 14 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:| 15 | |resnet50_v1|75.16|74.08|89.27|80.27|55.74|83.38|69.35|85.13|88.80|91.42|81.17|81.71|62.74|78.65|86.86|47.00|76.71|50.29|79.05|60.51|80.96| 16 | |resnet101_v1|77.03|79.68|89.33|83.89|59.41|85.68|76.59|84.23|88.50|88.50|81.54|79.16|72.66|80.26|88.42|47.50|79.81|52.85|80.70|59.94|81.87| 17 | |mobilenet_v2|50.36|46.68|70.45|67.43|25.69|53.60|46.26|58.95|37.62|43.97|67.67|61.35|52.14|56.54|75.02|24.47|49.89|27.76|38.04|38.20|65.46| 18 | 19 | ### use_voc2007_metric 20 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat | motorbike | 21 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:| 22 | |resnet50_v1|73.09|72.11|85.63|77.74|55.82|81.19|67.34|82.44|85.66|87.34|77.49|79.13|62.65|76.54|84.01|47.90|74.13|50.09|76.81|60.34|77.47| 23 | |resnet101_v1|74.63|76.35|86.18|79.87|58.73|83.4|74.75|80.03|85.4|86.55|78.24|76.07|70.89|78.52|86.26|47.80|76.34|52.14|78.06|58.90|78.04| 24 | |mobilenet_v2|50.34|46.99|68.45|65.89|28.16|53.21|46.96|57.80|38.60|44.12|66.20|60.49|52.40|56.06|72.68|26.91|49.99|30.18|39.38|38.54|64.74| 25 | 26 | 27 | ## Requirements 28 | 1、tensorflow >= 1.2 29 | 2、cuda8.0 30 | 3、python2.7 (anaconda2 recommend) 31 | 4、[opencv(cv2)](https://pypi.org/project/opencv-python/) 32 | 33 | ## Download Model 34 | 1、please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)、[resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to $PATH_ROOT/data/pretrained_weights. 35 | 2、please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to $PATH_ROOT/data/pretrained_weights/mobilenet. 36 | 3、please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow) by this project, put it to $PATH_ROOT/output/trained_weights. 37 | 38 | ## Data Format 39 | ``` 40 | ├── VOCdevkit 41 | │   ├── VOCdevkit_train 42 | │   ├── Annotation 43 | │   ├── JPEGImages 44 | │ ├── VOCdevkit_test 45 | │   ├── Annotation 46 | │   ├── JPEGImages 47 | ``` 48 | 49 | ## Compile 50 | ``` 51 | cd $PATH_ROOT/libs/box_utils/cython_utils 52 | python setup.py build_ext --inplace 53 | ``` 54 | 55 | ## Demo(available) 56 | 57 | **Select a configuration file in the folder ($PATH_ROOT/libs/configs/) and copy its contents into cfgs.py, then download the corresponding [weights](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow).** 58 | 59 | ``` 60 | cd $PATH_ROOT/tools 61 | python inference.py --data_dir='/PATH/TO/IMAGES/' 62 | --save_dir='/PATH/TO/SAVE/RESULTS/' 63 | --GPU='0' 64 | ``` 65 | 66 | ## Eval 67 | ``` 68 | cd $PATH_ROOT/tools 69 | python eval.py --eval_imgs='/PATH/TO/IMAGES/' 70 | --annotation_dir='/PATH/TO/TEST/ANNOTATION/' 71 | --GPU='0' 72 | ``` 73 | 74 | ## Train 75 | 76 | 1、If you want to train your own data, please note: 77 | ``` 78 | (1) Modify parameters (such as CLASS_NUM, DATASET_NAME, VERSION, etc.) in $PATH_ROOT/libs/configs/cfgs.py 79 | (2) Add category information in $PATH_ROOT/libs/label_name_dict/lable_dict.py 80 | (3) Add data_name to line 76 of $PATH_ROOT/data/io/read_tfrecord.py 81 | ``` 82 | 83 | 2、make tfrecord 84 | ``` 85 | cd $PATH_ROOT/data/io/ 86 | python convert_data_to_tfrecord.py --VOC_dir='/PATH/TO/VOCdevkit/VOCdevkit_train/' 87 | --xml_dir='Annotation' 88 | --image_dir='JPEGImages' 89 | --save_name='train' 90 | --img_format='.jpg' 91 | --dataset='pascal' 92 | ``` 93 | 94 | 3、train 95 | ``` 96 | cd $PATH_ROOT/tools 97 | python train.py 98 | ``` 99 | 100 | ## Tensorboard 101 | ``` 102 | cd $PATH_ROOT/output/summary 103 | tensorboard --logdir=. 104 | ``` 105 | ![2](scalars.png) 106 | ![1](images.png) 107 | 108 | ## Reference 109 | 1、https://github.com/endernewton/tf-faster-rcnn 110 | 2、https://github.com/zengarden/light_head_rcnn 111 | 3、https://github.com/tensorflow/models/tree/master/research/object_detection 112 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/__init__.py -------------------------------------------------------------------------------- /data/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/__init__.pyc -------------------------------------------------------------------------------- /data/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__init__.py -------------------------------------------------------------------------------- /data/io/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__init__.pyc -------------------------------------------------------------------------------- /data/io/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /data/io/__pycache__/read_tfrecord.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/__pycache__/read_tfrecord.cpython-35.pyc -------------------------------------------------------------------------------- /data/io/convert_data_to_tfrecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import xml.etree.cElementTree as ET 6 | import numpy as np 7 | import tensorflow as tf 8 | import glob 9 | import cv2 10 | from libs.label_name_dict.label_dict import * 11 | from help_utils.tools import * 12 | 13 | tf.app.flags.DEFINE_string('VOC_dir', '/mnt/USBB/gx/DOTA/DOTA_TOTAL/', 'Voc dir') 14 | tf.app.flags.DEFINE_string('xml_dir', 'XML', 'xml dir') 15 | tf.app.flags.DEFINE_string('image_dir', 'IMG', 'image dir') 16 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name') 17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name') 18 | tf.app.flags.DEFINE_string('img_format', '.png', 'format of image') 19 | tf.app.flags.DEFINE_string('dataset', 'DOTA_TOTAL', 'dataset') 20 | FLAGS = tf.app.flags.FLAGS 21 | 22 | 23 | def _int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | 26 | 27 | def _bytes_feature(value): 28 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 29 | 30 | 31 | def read_xml_gtbox_and_label(xml_path): 32 | """ 33 | :param xml_path: the path of voc xml 34 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 9], 35 | and has [x1, y1, x2, y2, x3, y3, x4, y4, label] in a per row 36 | """ 37 | 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | img_width = None 41 | img_height = None 42 | box_list = [] 43 | for child_of_root in root: 44 | # if child_of_root.tag == 'filename': 45 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ 46 | # + FLAGS.img_format, 'xml_name and img_name cannot match' 47 | 48 | if child_of_root.tag == 'size': 49 | for child_item in child_of_root: 50 | if child_item.tag == 'width': 51 | img_width = int(child_item.text) 52 | if child_item.tag == 'height': 53 | img_height = int(child_item.text) 54 | 55 | if child_of_root.tag == 'object': 56 | label = None 57 | for child_item in child_of_root: 58 | if child_item.tag == 'name': 59 | label = NAME_LABEL_MAP[child_item.text] 60 | if child_item.tag == 'bndbox': 61 | tmp_box = [] 62 | for node in child_item: 63 | tmp_box.append(int(node.text)) 64 | assert label is not None, 'label is none, error' 65 | tmp_box.append(label) 66 | box_list.append(tmp_box) 67 | 68 | gtbox_label = np.array(box_list, dtype=np.int32) 69 | 70 | return img_height, img_width, gtbox_label 71 | 72 | 73 | def convert_pascal_to_tfrecord(): 74 | xml_path = FLAGS.VOC_dir + FLAGS.xml_dir 75 | image_path = FLAGS.VOC_dir + FLAGS.image_dir 76 | save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' 77 | mkdir(FLAGS.save_dir) 78 | 79 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 80 | # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) 81 | writer = tf.python_io.TFRecordWriter(path=save_path) 82 | for count, xml in enumerate(glob.glob(xml_path + '/*.xml')): 83 | # to avoid path error in different development platform 84 | xml = xml.replace('\\', '/') 85 | 86 | img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format 87 | img_path = image_path + '/' + img_name 88 | 89 | if not os.path.exists(img_path): 90 | print('{} is not exist!'.format(img_path)) 91 | continue 92 | 93 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml) 94 | 95 | # img = np.array(Image.open(img_path)) 96 | img = cv2.imread(img_path)[:, :, ::-1] 97 | 98 | feature = tf.train.Features(feature={ 99 | # do not need encode() in linux 100 | # 'img_name': _bytes_feature(img_name.encode()), 101 | 'img_name': _bytes_feature(img_name), 102 | 'img_height': _int64_feature(img_height), 103 | 'img_width': _int64_feature(img_width), 104 | 'img': _bytes_feature(img.tostring()), 105 | 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 106 | 'num_objects': _int64_feature(gtbox_label.shape[0]) 107 | }) 108 | 109 | example = tf.train.Example(features=feature) 110 | 111 | writer.write(example.SerializeToString()) 112 | 113 | view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml'))) 114 | 115 | print('\nConversion is complete!') 116 | 117 | 118 | if __name__ == '__main__': 119 | # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml' 120 | # read_xml_gtbox_and_label(xml_path) 121 | 122 | convert_pascal_to_tfrecord() 123 | -------------------------------------------------------------------------------- /data/io/convert_data_to_tfrecord_raw.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | this file is to convert pascal to tfrecord 5 | ''' 6 | 7 | import numpy as np 8 | import cv2 9 | import os, sys 10 | import tensorflow as tf 11 | import xml.etree.cElementTree as ET 12 | from libs.label_name_dict.label_dict import NAME_LABEL_MAP 13 | 14 | 15 | tf.app.flags.DEFINE_string('VOC_dir', '/home/yjr/DataSet/VOC', 'Voc dir ') 16 | FLAGS = tf.app.flags.FLAGS 17 | 18 | def _int64_feature(value): 19 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 20 | 21 | def _bytes_feature(value): 22 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 23 | 24 | def read_xml_target_box_and_label(xml_path): 25 | ''' 26 | 27 | :param xml_path: 28 | :return:img_height, img_width, gtboxes 29 | gtboxes is a array of shape [num_of_gtboxes, 5] 30 | a row in gtboxes is [xmin. ymin. xmax, ymax, label] 31 | ''' 32 | tree = ET.parse(xml_path) 33 | root = tree.getroot() 34 | img_width = None 35 | img_height = None 36 | box_list = [] 37 | for child_of_root in root: 38 | if child_of_root.tag == 'filename': 39 | assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] + '.jpg', 'xml_name and img_name cannot match' 40 | if child_of_root.tag == 'size': 41 | for child_item in child_of_root: 42 | if child_item.tag == 'width': 43 | img_width = int(child_item.text) 44 | if child_item.tag == 'height': 45 | img_height = int(child_item.text) 46 | if child_of_root.tag == 'object': 47 | label = None 48 | for child_item in child_of_root: 49 | if child_item.tag == 'name': 50 | # print child_item.text 51 | label = NAME_LABEL_MAP[child_item.text] 52 | if child_item.tag == 'bndbox': 53 | tmp_box = [] 54 | for node in child_item: 55 | tmp_box.append(int(node.text)) # [xmin, ymin. xmax, ymax] 56 | assert label is not None, 'label is none, error' 57 | tmp_box.append(label) #[xmin, ymin, xmax, ymax, label] 58 | box_list.append(tmp_box) 59 | 60 | gtbox_list = np.array(box_list, dtype=np.int32) # [xmin, ymin, xmax, ymax, label] 61 | 62 | xmin, ymin, xmax, ymax, label = gtbox_list[:, 0], gtbox_list[:, 1], gtbox_list[:, 2], gtbox_list[:, 3],\ 63 | gtbox_list[:, 4] 64 | 65 | gtbox_list = np.transpose(np.stack([xmin, ymin, xmax, ymax, label], axis=0)) # [xmin, ymin, xmax, ymax, label] 66 | # print gtbox_list.shape 67 | return img_height, img_width, gtbox_list 68 | 69 | def convert_pascal(dataset_name): 70 | 71 | dataset_rootdir = os.path.join(FLAGS.VOC_dir, 'VOCtrain_val/VOC2007') if dataset_name == 'train' \ 72 | else os.path.join(FLAGS.VOC_dir, 'VOC_test/VOC2007') 73 | 74 | imgname_list = [] 75 | part_name = 'trainval.txt' if dataset_name == 'train' else 'test.txt' 76 | with open(os.path.join(dataset_rootdir, 'ImageSets/Main/aeroplane_'+part_name)) as f: 77 | all_lines = f.readlines() 78 | 79 | for a_line in all_lines: 80 | imgname_list.append(a_line.split()[0].strip()) 81 | 82 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 83 | # writer = tf.python_io.TFRecordWriter(path='../data/tfrecords/pascal_'+dataset_name+'.tfrecord', options=writer_options) 84 | writer = tf.python_io.TFRecordWriter(path='../tfrecord/pascal_' + dataset_name + '.tfrecord') 85 | for i, img_name in enumerate(imgname_list): 86 | img_np = cv2.imread(os.path.join(dataset_rootdir, 'JPEGImages/'+img_name+'.jpg')) 87 | # if img_np == None: 88 | # print img_name 89 | img_np = img_np[:, :, ::-1] 90 | assert img_np is not None, 'read img erro, imgnp is None' 91 | xml_path = os.path.join(dataset_rootdir, 'Annotations/'+img_name+'.xml') 92 | img_height, img_width, gtboxes = read_xml_target_box_and_label(xml_path) 93 | 94 | example = tf.train.Example(features=tf.train.Features(feature={ 95 | 'img_name': _bytes_feature(img_name), 96 | 'img_height': _int64_feature(img_height), 97 | 'img_width': _int64_feature(img_width), 98 | 'img': _bytes_feature(img_np.tostring()), 99 | 'gtboxes_and_label': _bytes_feature(gtboxes.tostring()), 100 | 'num_objects': _int64_feature(gtboxes.shape[0]) 101 | })) 102 | writer.write(example.SerializeToString()) 103 | if i % 100 == 0: 104 | print('{} {} imgs convert over'.format(i, dataset_name)) 105 | print(20*"@") 106 | print('all {} imgs convert over, the num is {}'.format(dataset_name, i)) 107 | 108 | if __name__ == '__main__': 109 | # w, h, gtboxes = read_xml_target_box_and_label('/home/yjr/DataSet/VOC/VOCtrain_val/VOC2007/Annotations/000005.xml') 110 | # print w, h 111 | # print gtboxes 112 | convert_pascal('train') 113 | convert_pascal('test') 114 | 115 | -------------------------------------------------------------------------------- /data/io/image_preprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import tensorflow as tf 8 | 9 | import numpy as np 10 | 11 | 12 | def max_length_limitation(length, length_limitation): 13 | return tf.cond(tf.less(length, length_limitation), 14 | true_fn=lambda: length, 15 | false_fn=lambda: length_limitation) 16 | 17 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200): 18 | ''' 19 | 20 | :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5]. gtboxes: [xmin, ymin, xmax, ymax] 21 | :param target_shortside_len: 22 | :param length_limitation: set max length to avoid OUT OF MEMORY 23 | :return: 24 | ''' 25 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 26 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 27 | true_fn=lambda: (target_shortside_len, 28 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 29 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 30 | target_shortside_len)) 31 | 32 | img_tensor = tf.expand_dims(img_tensor, axis=0) 33 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 34 | 35 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 36 | 37 | new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h 38 | new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h 39 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 40 | 41 | return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0)) 42 | 43 | 44 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200): 45 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 46 | 47 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 48 | true_fn=lambda: (target_shortside_len, 49 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 50 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 51 | target_shortside_len)) 52 | 53 | img_tensor = tf.expand_dims(img_tensor, axis=0) 54 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 55 | 56 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 57 | return img_tensor 58 | 59 | def flip_left_to_right(img_tensor, gtboxes_and_label): 60 | 61 | h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 62 | 63 | img_tensor = tf.image.flip_left_right(img_tensor) 64 | 65 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 66 | new_xmax = w - xmin 67 | new_xmin = w - xmax 68 | 69 | return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0)) 70 | 71 | def random_flip_left_right(img_tensor, gtboxes_and_label): 72 | img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5), 73 | lambda: flip_left_to_right(img_tensor, gtboxes_and_label), 74 | lambda: (img_tensor, gtboxes_and_label)) 75 | 76 | return img_tensor, gtboxes_and_label 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /data/io/image_preprocess.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/image_preprocess.pyc -------------------------------------------------------------------------------- /data/io/read_tfrecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | import os 10 | from data.io import image_preprocess 11 | from libs.configs import cfgs 12 | 13 | def read_single_example_and_decode(filename_queue): 14 | 15 | # tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 16 | 17 | # reader = tf.TFRecordReader(options=tfrecord_options) 18 | reader = tf.TFRecordReader() 19 | _, serialized_example = reader.read(filename_queue) 20 | 21 | features = tf.parse_single_example( 22 | serialized=serialized_example, 23 | features={ 24 | 'img_name': tf.FixedLenFeature([], tf.string), 25 | 'img_height': tf.FixedLenFeature([], tf.int64), 26 | 'img_width': tf.FixedLenFeature([], tf.int64), 27 | 'img': tf.FixedLenFeature([], tf.string), 28 | 'gtboxes_and_label': tf.FixedLenFeature([], tf.string), 29 | 'num_objects': tf.FixedLenFeature([], tf.int64) 30 | } 31 | ) 32 | img_name = features['img_name'] 33 | img_height = tf.cast(features['img_height'], tf.int32) 34 | img_width = tf.cast(features['img_width'], tf.int32) 35 | img = tf.decode_raw(features['img'], tf.uint8) 36 | 37 | img = tf.reshape(img, shape=[img_height, img_width, 3]) 38 | 39 | gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32) 40 | gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5]) 41 | 42 | num_objects = tf.cast(features['num_objects'], tf.int32) 43 | return img_name, img, gtboxes_and_label, num_objects 44 | 45 | 46 | def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): 47 | 48 | img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue) 49 | 50 | img = tf.cast(img, tf.float32) 51 | 52 | if is_training: 53 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, 54 | target_shortside_len=shortside_len, 55 | length_limitation=cfgs.IMG_MAX_LENGTH) 56 | img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, 57 | gtboxes_and_label=gtboxes_and_label) 58 | 59 | else: 60 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, 61 | target_shortside_len=shortside_len, 62 | length_limitation=cfgs.IMG_MAX_LENGTH) 63 | img = img - tf.constant([[cfgs.PIXEL_MEAN]]) # sub pixel mean at last 64 | return img_name, img, gtboxes_and_label, num_objects 65 | 66 | 67 | def next_batch(dataset_name, batch_size, shortside_len, is_training): 68 | ''' 69 | :return: 70 | img_name_batch: shape(1, 1) 71 | img_batch: shape:(1, new_imgH, new_imgW, C) 72 | gtboxes_and_label_batch: shape(1, Num_Of_objects, 5] .each row is [x1, y1, x2, y2, label] 73 | ''' 74 | assert batch_size == 1, "we only support batch_size is 1.We may support large batch_size in the future" 75 | 76 | if dataset_name not in ['ship', 'spacenet', 'pascal', 'coco']: 77 | raise ValueError('dataSet name must be in pascal, coco spacenet and ship') 78 | 79 | if is_training: 80 | pattern = os.path.join('../data/tfrecord', dataset_name + '_train*') 81 | else: 82 | pattern = os.path.join('../data/tfrecord', dataset_name + '_test*') 83 | 84 | print('tfrecord path is -->', os.path.abspath(pattern)) 85 | 86 | filename_tensorlist = tf.train.match_filenames_once(pattern) 87 | 88 | filename_queue = tf.train.string_input_producer(filename_tensorlist) 89 | 90 | img_name, img, gtboxes_and_label, num_obs = read_and_prepocess_single_img(filename_queue, shortside_len, 91 | is_training=is_training) 92 | img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch = \ 93 | tf.train.batch( 94 | [img_name, img, gtboxes_and_label, num_obs], 95 | batch_size=batch_size, 96 | capacity=1, 97 | num_threads=1, 98 | dynamic_pad=True) 99 | return img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch 100 | -------------------------------------------------------------------------------- /data/io/read_tfrecord.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/io/read_tfrecord.pyc -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/__init__.py -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/__init__.pyc -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocoEvalDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from pycocotools.coco import COCO\n", 14 | "from pycocotools.cocoeval import COCOeval\n", 15 | "import numpy as np\n", 16 | "import skimage.io as io\n", 17 | "import pylab\n", 18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Running demo for *bbox* results.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "annType = ['segm','bbox','keypoints']\n", 38 | "annType = annType[1] #specify type here\n", 39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n", 40 | "print 'Running demo for *%s* results.'%(annType)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "loading annotations into memory...\n", 55 | "Done (t=8.01s)\n", 56 | "creating index...\n", 57 | "index created!\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "#initialize COCO ground truth api\n", 63 | "dataDir='../'\n", 64 | "dataType='val2014'\n", 65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n", 66 | "cocoGt=COCO(annFile)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Loading and preparing results... \n", 81 | "DONE (t=0.05s)\n", 82 | "creating index...\n", 83 | "index created!\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "#initialize COCO detections api\n", 89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n", 90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n", 91 | "cocoDt=cocoGt.loadRes(resFile)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "imgIds=sorted(cocoGt.getImgIds())\n", 103 | "imgIds=imgIds[0:100]\n", 104 | "imgId = imgIds[np.random.randint(100)]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Running per image evaluation... \n", 119 | "DONE (t=0.46s).\n", 120 | "Accumulating evaluation results... \n", 121 | "DONE (t=0.38s).\n", 122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n", 123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n", 124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n", 125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n", 126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n", 127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n", 128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n", 129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n", 130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n", 131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n", 132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n", 133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# running evaluation\n", 139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n", 140 | "cocoEval.params.imgIds = imgIds\n", 141 | "cocoEval.evaluate()\n", 142 | "cocoEval.accumulate()\n", 143 | "cocoEval.summarize()" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 2", 150 | "language": "python", 151 | "name": "python2" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "2.7.10" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } 169 | -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/__init__.pyc -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/_mask.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/_mask.so -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/coco.pyc -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/PythonAPI/pycocotools/mask.pyc -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy as np 3 | 4 | # To compile and install locally run "python setup.py build_ext --inplace" 5 | # To install library to Python site-packages run "python setup.py build_ext install" 6 | 7 | ext_modules = [ 8 | Extension( 9 | 'pycocotools._mask', 10 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 11 | include_dirs = [np.get_include(), '../common'], 12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 13 | ) 14 | ] 15 | 16 | setup( 17 | name='pycocotools', 18 | packages=['pycocotools'], 19 | package_dir = {'pycocotools': 'pycocotools'}, 20 | install_requires=[ 21 | 'setuptools>=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='2.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /data/lib_coco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/__init__.py -------------------------------------------------------------------------------- /data/lib_coco/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/__init__.pyc -------------------------------------------------------------------------------- /data/lib_coco/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /data/lib_coco/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /data/lib_coco/get_coco_next_batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import sys, os 6 | # sys.path.insert(0, os.path.abspath('.')) 7 | sys.path.insert(0, './PythonAPI/') 8 | # sys.path.insert(0, os.path.abspath('data')) 9 | for _ in sys.path: 10 | print (_) 11 | from PythonAPI.pycocotools.coco import COCO 12 | import cv2 13 | import numpy as np 14 | import os 15 | from libs.label_name_dict import coco_dict 16 | 17 | 18 | annotation_path = '/home/yjr/DataSet/COCO/2017/annotations/instances_train2017.json' 19 | print ("load coco .... it will cost about 17s..") 20 | coco = COCO(annotation_path) 21 | 22 | imgId_list = coco.getImgIds() 23 | imgId_list = np.array(imgId_list) 24 | 25 | total_imgs = len(imgId_list) 26 | 27 | # print (NAME_LABEL_DICT) 28 | 29 | 30 | def next_img(step): 31 | 32 | if step % total_imgs == 0: 33 | np.random.shuffle(imgId_list) 34 | imgid = imgId_list[step % total_imgs] 35 | 36 | imgname = coco.loadImgs(ids=[imgid])[0]['file_name'] 37 | # print (type(imgname), imgname) 38 | img = cv2.imread(os.path.join("/home/yjr/DataSet/COCO/2017/train2017", imgname)) 39 | 40 | annotation = coco.imgToAnns[imgid] 41 | gtbox_and_label_list = [] 42 | for ann in annotation: 43 | box = ann['bbox'] 44 | 45 | box = [box[0], box[1], box[0]+box[2], box[1]+box[3]] # [xmin, ymin, xmax, ymax] 46 | cat_id = ann['category_id'] 47 | cat_name = coco_dict.originID_classes[cat_id] #ID_NAME_DICT[cat_id] 48 | label = coco_dict.NAME_LABEL_MAP[cat_name] 49 | gtbox_and_label_list.append(box + [label]) 50 | gtbox_and_label_list = np.array(gtbox_and_label_list, dtype=np.int32) 51 | # print (img.shape, gtbox_and_label_list.shape) 52 | if gtbox_and_label_list.shape[0] == 0: 53 | return next_img(step+1) 54 | else: 55 | return imgid, img[:, :, ::-1], gtbox_and_label_list 56 | 57 | 58 | if __name__ == '__main__': 59 | 60 | imgid, img, gtbox = next_img(3234) 61 | 62 | print("::") 63 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores 64 | 65 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1], 66 | scores=np.ones(shape=(len(gtbox), ))) 67 | print ("_----") 68 | 69 | 70 | cv2.imshow("test", img) 71 | cv2.waitKey(0) 72 | 73 | 74 | -------------------------------------------------------------------------------- /data/lib_coco/get_coco_next_batch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/data/lib_coco/get_coco_next_batch.pyc -------------------------------------------------------------------------------- /data/pretrained_weights/README.md: -------------------------------------------------------------------------------- 1 | Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)、[resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights. -------------------------------------------------------------------------------- /data/pretrained_weights/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | Please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to data/pretrained_weights/mobilenet. -------------------------------------------------------------------------------- /data/tfrecord/pascal_test.tfrecord: -------------------------------------------------------------------------------- 1 | /home/yjr/PycharmProjects/Faster-RCNN_TensorflowOLD/data/tfrecord/pascal_test.tfrecord -------------------------------------------------------------------------------- /data/tfrecord/pascal_train.tfrecord: -------------------------------------------------------------------------------- 1 | /home/yjr/PycharmProjects/Faster-RCNN_TensorflowOLD/data/tfrecord/pascal_train.tfrecord -------------------------------------------------------------------------------- /help_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__init__.py -------------------------------------------------------------------------------- /help_utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__init__.pyc -------------------------------------------------------------------------------- /help_utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /help_utils/__pycache__/tools.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/__pycache__/tools.cpython-35.pyc -------------------------------------------------------------------------------- /help_utils/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import math 4 | import sys 5 | import os 6 | 7 | 8 | def view_bar(message, num, total): 9 | rate = num / total 10 | rate_num = int(rate * 40) 11 | rate_nums = math.ceil(rate * 100) 12 | r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,) 13 | sys.stdout.write(r) 14 | sys.stdout.flush() 15 | 16 | 17 | def mkdir(path): 18 | if not os.path.exists(path): 19 | os.makedirs(path) -------------------------------------------------------------------------------- /help_utils/tools.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/help_utils/tools.pyc -------------------------------------------------------------------------------- /images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/images.png -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__init__.py -------------------------------------------------------------------------------- /libs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__init__.pyc -------------------------------------------------------------------------------- /libs/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/box_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/__init__.pyc -------------------------------------------------------------------------------- /libs/box_utils/anchor_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | import tensorflow as tf 5 | 6 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios, 7 | featuremap_height, featuremap_width, 8 | stride, name='make_anchors'): 9 | ''' 10 | :param base_anchor_size:256 11 | :param anchor_scales: 12 | :param anchor_ratios: 13 | :param featuremap_height: 14 | :param featuremap_width: 15 | :param stride: 16 | :return: 17 | ''' 18 | with tf.variable_scope(name): 19 | base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32) # [x_center, y_center, w, h] 20 | 21 | ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales), 22 | anchor_ratios) # per locations ws and hs 23 | 24 | x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride 25 | y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride 26 | 27 | x_centers, y_centers = tf.meshgrid(x_centers, y_centers) 28 | 29 | ws, x_centers = tf.meshgrid(ws, x_centers) 30 | hs, y_centers = tf.meshgrid(hs, y_centers) 31 | 32 | anchor_centers = tf.stack([x_centers, y_centers], 2) 33 | anchor_centers = tf.reshape(anchor_centers, [-1, 2]) 34 | 35 | box_sizes = tf.stack([ws, hs], axis=2) 36 | box_sizes = tf.reshape(box_sizes, [-1, 2]) 37 | # anchors = tf.concat([anchor_centers, box_sizes], axis=1) 38 | anchors = tf.concat([anchor_centers - 0.5*box_sizes, 39 | anchor_centers + 0.5*box_sizes], axis=1) 40 | return anchors 41 | 42 | 43 | def enum_scales(base_anchor, anchor_scales): 44 | 45 | anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1)) 46 | 47 | return anchor_scales 48 | 49 | 50 | def enum_ratios(anchors, anchor_ratios): 51 | ''' 52 | ratio = h /w 53 | :param anchors: 54 | :param anchor_ratios: 55 | :return: 56 | ''' 57 | ws = anchors[:, 2] # for base anchor: w == h 58 | hs = anchors[:, 3] 59 | sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios)) 60 | 61 | ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1]) 62 | hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1]) 63 | 64 | return ws, hs 65 | 66 | 67 | -------------------------------------------------------------------------------- /libs/box_utils/anchor_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/anchor_utils.pyc -------------------------------------------------------------------------------- /libs/box_utils/boxes_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | 9 | def ious_calu(boxes_1, boxes_2): 10 | ''' 11 | 12 | :param boxes_1: [N, 4] [xmin, ymin, xmax, ymax] 13 | :param boxes_2: [M, 4] [xmin, ymin. xmax, ymax] 14 | :return: 15 | ''' 16 | boxes_1 = tf.cast(boxes_1, tf.float32) 17 | boxes_2 = tf.cast(boxes_2, tf.float32) 18 | xmin_1, ymin_1, xmax_1, ymax_1 = tf.split(boxes_1, 4, axis=1) # xmin_1 shape is [N, 1].. 19 | xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1) # xmin_2 shape is [M, ].. 20 | 21 | max_xmin = tf.maximum(xmin_1, xmin_2) 22 | min_xmax = tf.minimum(xmax_1, xmax_2) 23 | 24 | max_ymin = tf.maximum(ymin_1, ymin_2) 25 | min_ymax = tf.minimum(ymax_1, ymax_2) 26 | 27 | overlap_h = tf.maximum(0., min_ymax - max_ymin) # avoid h < 0 28 | overlap_w = tf.maximum(0., min_xmax - max_xmin) 29 | 30 | overlaps = overlap_h * overlap_w 31 | 32 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 33 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 34 | 35 | ious = overlaps / (area_1 + area_2 - overlaps) 36 | 37 | return ious 38 | 39 | 40 | def clip_boxes_to_img_boundaries(decode_boxes, img_shape): 41 | ''' 42 | 43 | :param decode_boxes: 44 | :return: decode boxes, and already clip to boundaries 45 | ''' 46 | 47 | with tf.name_scope('clip_boxes_to_img_boundaries'): 48 | 49 | # xmin, ymin, xmax, ymax = tf.unstack(decode_boxes, axis=1) 50 | xmin = decode_boxes[:, 0] 51 | ymin = decode_boxes[:, 1] 52 | xmax = decode_boxes[:, 2] 53 | ymax = decode_boxes[:, 3] 54 | img_h, img_w = img_shape[1], img_shape[2] 55 | 56 | img_h, img_w = tf.cast(img_h, tf.float32), tf.cast(img_w, tf.float32) 57 | 58 | xmin = tf.maximum(tf.minimum(xmin, img_w-1.), 0.) 59 | ymin = tf.maximum(tf.minimum(ymin, img_h-1.), 0.) 60 | 61 | xmax = tf.maximum(tf.minimum(xmax, img_w-1.), 0.) 62 | ymax = tf.maximum(tf.minimum(ymax, img_h-1.), 0.) 63 | 64 | return tf.transpose(tf.stack([xmin, ymin, xmax, ymax])) 65 | 66 | 67 | def filter_outside_boxes(boxes, img_h, img_w): 68 | ''' 69 | :param anchors:boxes with format [xmin, ymin, xmax, ymax] 70 | :param img_h: height of image 71 | :param img_w: width of image 72 | :return: indices of anchors that inside the image boundary 73 | ''' 74 | 75 | with tf.name_scope('filter_outside_boxes'): 76 | xmin, ymin, xmax, ymax = tf.unstack(boxes, axis=1) 77 | 78 | xmin_index = tf.greater_equal(xmin, 0) 79 | ymin_index = tf.greater_equal(ymin, 0) 80 | xmax_index = tf.less_equal(xmax, tf.cast(img_w, tf.float32)) 81 | ymax_index = tf.less_equal(ymax, tf.cast(img_h, tf.float32)) 82 | 83 | indices = tf.transpose(tf.stack([xmin_index, ymin_index, xmax_index, ymax_index])) 84 | indices = tf.cast(indices, dtype=tf.int32) 85 | indices = tf.reduce_sum(indices, axis=1) 86 | indices = tf.where(tf.equal(indices, 4)) 87 | # indices = tf.equal(indices, 4) 88 | return tf.reshape(indices, [-1]) 89 | 90 | 91 | def padd_boxes_with_zeros(boxes, scores, max_num_of_boxes): 92 | 93 | ''' 94 | num of boxes less than max num of boxes, so it need to pad with zeros[0, 0, 0, 0] 95 | :param boxes: 96 | :param scores: [-1] 97 | :param max_num_of_boxes: 98 | :return: 99 | ''' 100 | 101 | pad_num = tf.cast(max_num_of_boxes, tf.int32) - tf.shape(boxes)[0] 102 | 103 | zero_boxes = tf.zeros(shape=[pad_num, 4], dtype=boxes.dtype) 104 | zero_scores = tf.zeros(shape=[pad_num], dtype=scores.dtype) 105 | 106 | final_boxes = tf.concat([boxes, zero_boxes], axis=0) 107 | 108 | final_scores = tf.concat([scores, zero_scores], axis=0) 109 | 110 | return final_boxes, final_scores -------------------------------------------------------------------------------- /libs/box_utils/boxes_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/boxes_utils.pyc -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf */*.pyc 6 | rm -rf */*.so 7 | -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/__init__.pyc -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/cython_bbox.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/cython_bbox.so -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/cython_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/cython_utils/cython_nms.so -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.items(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | # run the customize_compiler 102 | class custom_build_ext(build_ext): 103 | def build_extensions(self): 104 | customize_compiler_for_nvcc(self.compiler) 105 | build_ext.build_extensions(self) 106 | 107 | ext_modules = [ 108 | Extension( 109 | "cython_bbox", 110 | ["bbox.pyx"], 111 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 112 | include_dirs = [numpy_include] 113 | ), 114 | Extension( 115 | "cython_nms", 116 | ["nms.pyx"], 117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 118 | include_dirs = [numpy_include] 119 | ) 120 | # Extension( 121 | # "cpu_nms", 122 | # ["cpu_nms.pyx"], 123 | # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | # include_dirs = [numpy_include] 125 | # ) 126 | ] 127 | 128 | setup( 129 | name='tf_faster_rcnn', 130 | ext_modules=ext_modules, 131 | # inject our custom trigger 132 | cmdclass={'build_ext': custom_build_ext}, 133 | ) 134 | -------------------------------------------------------------------------------- /libs/box_utils/draw_box_in_img.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/draw_box_in_img.pyc -------------------------------------------------------------------------------- /libs/box_utils/encode_and_decode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | 11 | def decode_boxes(encoded_boxes, reference_boxes, scale_factors=None): 12 | ''' 13 | 14 | :param encoded_boxes:[N, 4] 15 | :param reference_boxes: [N, 4] . 16 | :param scale_factors: use for scale. 17 | 18 | in the first stage, reference_boxes are anchors 19 | in the second stage, reference boxes are proposals(decode) produced by first stage 20 | :return:decode boxes [N, 4] 21 | ''' 22 | 23 | t_xcenter, t_ycenter, t_w, t_h = tf.unstack(encoded_boxes, axis=1) 24 | if scale_factors: 25 | t_xcenter /= scale_factors[0] 26 | t_ycenter /= scale_factors[1] 27 | t_w /= scale_factors[2] 28 | t_h /= scale_factors[3] 29 | 30 | reference_xmin, reference_ymin, reference_xmax, reference_ymax = tf.unstack(reference_boxes, axis=1) 31 | # reference boxes are anchors in the first stage 32 | 33 | # reference_xcenter = (reference_xmin + reference_xmax) / 2. 34 | # reference_ycenter = (reference_ymin + reference_ymax) / 2. 35 | reference_w = reference_xmax - reference_xmin 36 | reference_h = reference_ymax - reference_ymin 37 | reference_xcenter = reference_xmin + reference_w/2.0 38 | reference_ycenter = reference_ymin + reference_h/2.0 39 | 40 | predict_xcenter = t_xcenter * reference_w + reference_xcenter 41 | predict_ycenter = t_ycenter * reference_h + reference_ycenter 42 | predict_w = tf.exp(t_w) * reference_w 43 | predict_h = tf.exp(t_h) * reference_h 44 | 45 | predict_xmin = predict_xcenter - predict_w / 2. 46 | predict_xmax = predict_xcenter + predict_w / 2. 47 | predict_ymin = predict_ycenter - predict_h / 2. 48 | predict_ymax = predict_ycenter + predict_h / 2. 49 | 50 | return tf.transpose(tf.stack([predict_xmin, predict_ymin, 51 | predict_xmax, predict_ymax])) 52 | 53 | 54 | def encode_boxes(unencode_boxes, reference_boxes, scale_factors=None): 55 | ''' 56 | 57 | :param unencode_boxes: [-1, 4] 58 | :param reference_boxes: [-1, 4] 59 | :return: encode_boxes [-1, 4] 60 | ''' 61 | 62 | xmin, ymin, xmax, ymax = unencode_boxes[:, 0], unencode_boxes[:, 1], unencode_boxes[:, 2], unencode_boxes[:, 3] 63 | 64 | reference_xmin, reference_ymin, reference_xmax, reference_ymax = reference_boxes[:, 0], reference_boxes[:, 1], \ 65 | reference_boxes[:, 2], reference_boxes[:, 3] 66 | 67 | # x_center = (xmin + xmax) / 2. 68 | # y_center = (ymin + ymax) / 2. 69 | w = xmax - xmin + 1e-8 70 | h = ymax - ymin + 1e-8 71 | x_center = xmin + w/2.0 72 | y_center = ymin + h/2.0 73 | 74 | # reference_xcenter = (reference_xmin + reference_xmax) / 2. 75 | # reference_ycenter = (reference_ymin + reference_ymax) / 2. 76 | reference_w = reference_xmax - reference_xmin + 1e-8 77 | reference_h = reference_ymax - reference_ymin + 1e-8 78 | reference_xcenter = reference_xmin + reference_w/2.0 79 | reference_ycenter = reference_ymin + reference_h/2.0 80 | # w + 1e-8 to avoid NaN in division and log below 81 | 82 | t_xcenter = (x_center - reference_xcenter) / reference_w 83 | t_ycenter = (y_center - reference_ycenter) / reference_h 84 | t_w = np.log(w/reference_w) 85 | t_h = np.log(h/reference_h) 86 | 87 | if scale_factors: 88 | t_xcenter *= scale_factors[0] 89 | t_ycenter *= scale_factors[1] 90 | t_w *= scale_factors[2] 91 | t_h *= scale_factors[3] 92 | 93 | return np.transpose(np.stack([t_xcenter, t_ycenter, t_w, t_h], axis=0)) 94 | -------------------------------------------------------------------------------- /libs/box_utils/encode_and_decode.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/encode_and_decode.pyc -------------------------------------------------------------------------------- /libs/box_utils/show_box_in_tensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | import cv2 10 | from libs.label_name_dict.label_dict import LABEL_NAME_MAP 11 | 12 | from libs.configs import cfgs 13 | 14 | from libs.box_utils import draw_box_in_img 15 | 16 | def only_draw_boxes(img_batch, boxes): 17 | 18 | boxes = tf.stop_gradient(boxes) 19 | img_tensor = tf.squeeze(img_batch, 0) 20 | img_tensor = tf.cast(img_tensor, tf.float32) 21 | labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES 22 | scores = tf.zeros_like(labels, dtype=tf.float32) 23 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 24 | inp=[img_tensor, boxes, labels, scores], 25 | Tout=tf.uint8) 26 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) # [batch_size, h, w, c] 27 | 28 | return img_tensor_with_boxes 29 | 30 | def draw_boxes_with_scores(img_batch, boxes, scores): 31 | 32 | boxes = tf.stop_gradient(boxes) 33 | scores = tf.stop_gradient(scores) 34 | 35 | img_tensor = tf.squeeze(img_batch, 0) 36 | img_tensor = tf.cast(img_tensor, tf.float32) 37 | labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES 38 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 39 | inp=[img_tensor, boxes, labels, scores], 40 | Tout=[tf.uint8]) 41 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 42 | return img_tensor_with_boxes 43 | 44 | def draw_boxes_with_categories(img_batch, boxes, labels): 45 | boxes = tf.stop_gradient(boxes) 46 | 47 | img_tensor = tf.squeeze(img_batch, 0) 48 | img_tensor = tf.cast(img_tensor, tf.float32) 49 | scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32) 50 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 51 | inp=[img_tensor, boxes, labels, scores], 52 | Tout=[tf.uint8]) 53 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 54 | return img_tensor_with_boxes 55 | 56 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores): 57 | boxes = tf.stop_gradient(boxes) 58 | scores = tf.stop_gradient(scores) 59 | 60 | img_tensor = tf.squeeze(img_batch, 0) 61 | img_tensor = tf.cast(img_tensor, tf.float32) 62 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 63 | inp=[img_tensor, boxes, labels, scores], 64 | Tout=[tf.uint8]) 65 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 66 | return img_tensor_with_boxes 67 | 68 | if __name__ == "__main__": 69 | print (1) 70 | 71 | -------------------------------------------------------------------------------- /libs/box_utils/show_box_in_tensor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/show_box_in_tensor.pyc -------------------------------------------------------------------------------- /libs/box_utils/tf_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import tensorflow as tf 6 | 7 | ''' 8 | all of these ops are derived from tenosrflow Object Detection API 9 | ''' 10 | def indices_to_dense_vector(indices, 11 | size, 12 | indices_value=1., 13 | default_value=0, 14 | dtype=tf.float32): 15 | """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros. 16 | 17 | This function exists because it is unclear if it is safe to use 18 | tf.sparse_to_dense(indices, [size], 1, validate_indices=False) 19 | with indices which are not ordered. 20 | This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) 21 | 22 | Args: 23 | indices: 1d Tensor with integer indices which are to be set to 24 | indices_values. 25 | size: scalar with size (integer) of output Tensor. 26 | indices_value: values of elements specified by indices in the output vector 27 | default_value: values of other elements in the output vector. 28 | dtype: data type. 29 | 30 | Returns: 31 | dense 1D Tensor of shape [size] with indices set to indices_values and the 32 | rest set to default_value. 33 | """ 34 | size = tf.to_int32(size) 35 | zeros = tf.ones([size], dtype=dtype) * default_value 36 | values = tf.ones_like(indices, dtype=dtype) * indices_value 37 | 38 | return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], 39 | [zeros, values]) -------------------------------------------------------------------------------- /libs/box_utils/tf_ops.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/box_utils/tf_ops.pyc -------------------------------------------------------------------------------- /libs/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__init__.py -------------------------------------------------------------------------------- /libs/configs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__init__.pyc -------------------------------------------------------------------------------- /libs/configs/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/configs/__pycache__/cfgs.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/__pycache__/cfgs.cpython-35.pyc -------------------------------------------------------------------------------- /libs/configs/cfgs.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/configs/cfgs.pyc -------------------------------------------------------------------------------- /libs/configs/cfgs_coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | 6 | 7 | # ------------------------------------------------ 8 | VERSION = 'FasterRCNN_Res50_20180603COCO' 9 | NET_NAME = 'resnet_v1_50' #'MobilenetV2' 10 | ADD_BOX_IN_TENSORBOARD = True 11 | 12 | # ---------------------------------------- System_config 13 | ROOT_PATH = os.path.abspath('../') 14 | print (20*"++--") 15 | print (ROOT_PATH) 16 | GPU_GROUP = "0" 17 | SHOW_TRAIN_INFO_INTE = 10 18 | SMRY_ITER = 100 19 | SAVE_WEIGHTS_INTE = 10000 20 | 21 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 22 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 23 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 24 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 25 | 26 | if NET_NAME.startswith("resnet"): 27 | weights_name = NET_NAME 28 | elif NET_NAME.startswith("MobilenetV2"): 29 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 30 | else: 31 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 32 | 33 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 34 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 35 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 36 | 37 | # ------------------------------------------ Train config 38 | RESTORE_FROM_RPN = False 39 | IS_FILTER_OUTSIDE_BOXES = True 40 | FIXED_BLOCKS = 1 # allow 0~3 41 | 42 | RPN_LOCATION_LOSS_WEIGHT = 1. 43 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 44 | 45 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 46 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 47 | RPN_SIGMA = 3.0 # 3.0 48 | FASTRCNN_SIGMA = 1.0 49 | 50 | 51 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 52 | GRADIENT_CLIPPING_BY_NORM = None #10.0 if None, will not clip 53 | 54 | EPSILON = 1e-5 55 | MOMENTUM = 0.9 56 | LR = 0.001 # 0.001 # 0.0003 57 | DECAY_STEP = [350000, 490000] # 50000, 70000 58 | MAX_ITERATION = 500000 59 | 60 | # -------------------------------------------- Data_preprocess_config 61 | DATASET_NAME = 'coco' #'pascal' # 'ship', 'spacenet', 'pascal', 'coco' 62 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 63 | IMG_SHORT_SIDE_LEN = 600 64 | IMG_MAX_LENGTH = 1000 65 | CLASS_NUM = 80 #20 66 | 67 | # --------------------------------------------- Network_config 68 | BATCH_SIZE = 1 69 | # INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 70 | INITIALIZER = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) 71 | # BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 72 | BBOX_INITIALIZER = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) 73 | # WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 74 | WEIGHT_DECAY = 0.0 75 | 76 | # ---------------------------------------------Anchor config 77 | BASE_ANCHOR_SIZE_LIST = [256] # can be modified 78 | ANCHOR_STRIDE = [16] # can not be modified in most situations 79 | ANCHOR_SCALES = [0.25, 0.5, 1., 2.0] # [4, 8, 16, 32] 80 | ANCHOR_RATIOS = [0.5, 1., 2.0] 81 | ROI_SCALE_FACTORS = [10., 10., 5.0, 5.0] 82 | ANCHOR_SCALE_FACTORS = None # [10.0, 10.0, 5.0, 5.0] 83 | 84 | 85 | # --------------------------------------------RPN config 86 | KERNEL_SIZE = 3 87 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 88 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 89 | TRAIN_RPN_CLOOBER_POSITIVES = False 90 | 91 | RPN_MINIBATCH_SIZE = 256 92 | RPN_POSITIVE_RATE = 0.5 93 | RPN_NMS_IOU_THRESHOLD = 0.7 94 | RPN_TOP_K_NMS_TRAIN = 12000 95 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 ##########OHEM sample in num 96 | 97 | RPN_TOP_K_NMS_TEST = 6000 # 5000 98 | RPN_MAXIMUM_PROPOSAL_TEST = 300 # 300 99 | 100 | 101 | # -------------------------------------------Fast-RCNN config 102 | ROI_SIZE = 14 103 | ROI_POOL_KERNEL_SIZE = 2 104 | USE_DROPOUT = False 105 | KEEP_PROB = 1.0 106 | SHOW_SCORE_THRSHOLD = 0.5 # only show in tensorboard 107 | 108 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.3 # 0.6 109 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 110 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 111 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 112 | FAST_RCNN_MINIBATCH_SIZE = 256 # 256 # if is -1, that is train with OHEM 113 | FAST_RCNN_POSITIVE_RATE = 0.25 114 | 115 | ADD_GTBOXES_TO_TRAIN = False 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/detection_oprations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/__init__.py -------------------------------------------------------------------------------- /libs/detection_oprations/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/__init__.pyc -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from libs.configs import cfgs 13 | import numpy as np 14 | import numpy.random as npr 15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 16 | from libs.box_utils import encode_and_decode 17 | 18 | 19 | def anchor_target_layer( 20 | gt_boxes, img_shape, all_anchors, is_restrict_bg=False): 21 | """Same as the anchor target layer in original Fast/er RCNN """ 22 | 23 | total_anchors = all_anchors.shape[0] 24 | img_h, img_w = img_shape[1], img_shape[2] 25 | gt_boxes = gt_boxes[:, :-1] # remove class label 26 | 27 | # allow boxes to sit over the edge by a small amount 28 | _allowed_border = 0 29 | 30 | # only keep anchors inside the image 31 | inds_inside = np.where( 32 | (all_anchors[:, 0] >= -_allowed_border) & 33 | (all_anchors[:, 1] >= -_allowed_border) & 34 | (all_anchors[:, 2] < img_w + _allowed_border) & # width 35 | (all_anchors[:, 3] < img_h + _allowed_border) # height 36 | )[0] 37 | 38 | anchors = all_anchors[inds_inside, :] 39 | 40 | # label: 1 is positive, 0 is negative, -1 is dont care 41 | labels = np.empty((len(inds_inside),), dtype=np.float32) 42 | labels.fill(-1) 43 | 44 | # overlaps between the anchors and the gt boxes 45 | overlaps = bbox_overlaps( 46 | np.ascontiguousarray(anchors, dtype=np.float), 47 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 48 | 49 | argmax_overlaps = overlaps.argmax(axis=1) 50 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 51 | gt_argmax_overlaps = overlaps.argmax(axis=0) 52 | gt_max_overlaps = overlaps[ 53 | gt_argmax_overlaps, np.arange(overlaps.shape[1])] 54 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 55 | 56 | if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: 57 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 58 | 59 | labels[gt_argmax_overlaps] = 1 60 | labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 61 | 62 | if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: 63 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 64 | 65 | num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) 66 | fg_inds = np.where(labels == 1)[0] 67 | if len(fg_inds) > num_fg: 68 | disable_inds = npr.choice( 69 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 70 | labels[disable_inds] = -1 71 | 72 | num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) 73 | if is_restrict_bg: 74 | num_bg = max(num_bg, num_fg * 1.5) 75 | bg_inds = np.where(labels == 0)[0] 76 | if len(bg_inds) > num_bg: 77 | disable_inds = npr.choice( 78 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 79 | labels[disable_inds] = -1 80 | 81 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 82 | 83 | # map up to original set of anchors 84 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 85 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 86 | 87 | # labels = labels.reshape((1, height, width, A)) 88 | rpn_labels = labels.reshape((-1, 1)) 89 | 90 | # bbox_targets 91 | bbox_targets = bbox_targets.reshape((-1, 4)) 92 | rpn_bbox_targets = bbox_targets 93 | 94 | return rpn_labels, rpn_bbox_targets 95 | 96 | 97 | def _unmap(data, count, inds, fill=0): 98 | """ Unmap a subset of item (data) back to the original set of items (of 99 | size count) """ 100 | if len(data.shape) == 1: 101 | ret = np.empty((count,), dtype=np.float32) 102 | ret.fill(fill) 103 | ret[inds] = data 104 | else: 105 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 106 | ret.fill(fill) 107 | ret[inds, :] = data 108 | return ret 109 | 110 | 111 | def _compute_targets(ex_rois, gt_rois): 112 | """Compute bounding-box regression targets for an image.""" 113 | # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype( 114 | # np.float32, copy=False) 115 | targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois, 116 | reference_boxes=ex_rois, 117 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS) 118 | # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois, 119 | # gt_rois=gt_rois, 120 | # scale_factor=None) 121 | return targets 122 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/anchor_target_layer_without_boxweight.pyc -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | from libs.configs import cfgs 8 | from libs.box_utils import encode_and_decode 9 | from libs.box_utils import boxes_utils 10 | import tensorflow as tf 11 | import numpy as np 12 | 13 | 14 | def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): 15 | ''' 16 | 17 | :param rpn_bbox_pred: [-1, 4] 18 | :param rpn_cls_prob: [-1, 2] 19 | :param img_shape: 20 | :param anchors:[-1, 4] 21 | :param is_training: 22 | :return: 23 | ''' 24 | 25 | if is_training: 26 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN 27 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN 28 | nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD 29 | else: 30 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST 31 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST 32 | nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD 33 | 34 | cls_prob = rpn_cls_prob[:, 1] 35 | 36 | # 1. decode boxes 37 | decode_boxes = encode_and_decode.decode_boxes(encoded_boxes=rpn_bbox_pred, 38 | reference_boxes=anchors, 39 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS) 40 | 41 | # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors, 42 | # deltas=rpn_bbox_pred, 43 | # scale_factor=None) 44 | 45 | # 2. clip to img boundaries 46 | decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=decode_boxes, 47 | img_shape=img_shape) 48 | 49 | # 3. get top N to NMS 50 | if pre_nms_topN > 0: 51 | pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') 52 | cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) 53 | decode_boxes = tf.gather(decode_boxes, top_k_indices) 54 | 55 | # 4. NMS 56 | keep = tf.image.non_max_suppression( 57 | boxes=decode_boxes, 58 | scores=cls_prob, 59 | max_output_size=post_nms_topN, 60 | iou_threshold=nms_thresh) 61 | 62 | final_boxes = tf.gather(decode_boxes, keep) 63 | final_probs = tf.gather(cls_prob, keep) 64 | 65 | return final_boxes, final_probs 66 | 67 | -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_opr.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/proposal_opr.pyc -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | from libs.configs import cfgs 11 | import numpy as np 12 | import numpy.random as npr 13 | 14 | from libs.box_utils import encode_and_decode 15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 16 | 17 | 18 | def proposal_target_layer(rpn_rois, gt_boxes): 19 | """ 20 | Assign object detection proposals to ground-truth targets. Produces proposal 21 | classification labels and bounding-box regression targets. 22 | """ 23 | # Proposal ROIs (x1, y1, x2, y2) coming from RPN 24 | # gt_boxes (x1, y1, x2, y2, label) 25 | if cfgs.ADD_GTBOXES_TO_TRAIN: 26 | all_rois = np.vstack((rpn_rois, gt_boxes[:, :-1])) 27 | else: 28 | all_rois = rpn_rois 29 | # np.inf 30 | rois_per_image = np.inf if cfgs.FAST_RCNN_MINIBATCH_SIZE == -1 else cfgs.FAST_RCNN_MINIBATCH_SIZE 31 | 32 | fg_rois_per_image = np.round(cfgs.FAST_RCNN_POSITIVE_RATE * rois_per_image) 33 | 34 | # Sample rois with classification labels and bounding box regression 35 | labels, rois, bbox_targets = _sample_rois(all_rois, gt_boxes, fg_rois_per_image, 36 | rois_per_image, cfgs.CLASS_NUM+1) 37 | # print(labels.shape, rois.shape, bbox_targets.shape) 38 | rois = rois.reshape(-1, 4) 39 | labels = labels.reshape(-1) 40 | bbox_targets = bbox_targets.reshape(-1, (cfgs.CLASS_NUM+1) * 4) 41 | 42 | return rois, labels, bbox_targets 43 | 44 | 45 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 46 | """Bounding-box regression targets (bbox_target_data) are stored in a 47 | compact form N x (class, tx, ty, tw, th) 48 | 49 | This function expands those targets into the 4-of-4*K representation used 50 | by the network (i.e. only one class has non-zero targets). 51 | 52 | Returns: 53 | bbox_target (ndarray): N x 4K blob of regression targets 54 | """ 55 | 56 | clss = bbox_target_data[:, 0] 57 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 58 | inds = np.where(clss > 0)[0] 59 | for ind in inds: 60 | cls = clss[ind] 61 | start = int(4 * cls) 62 | end = start + 4 63 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 64 | 65 | return bbox_targets 66 | 67 | 68 | def _compute_targets(ex_rois, gt_rois, labels): 69 | """Compute bounding-box regression targets for an image. 70 | that is : [label, tx, ty, tw, th] 71 | """ 72 | 73 | assert ex_rois.shape[0] == gt_rois.shape[0] 74 | assert ex_rois.shape[1] == 4 75 | assert gt_rois.shape[1] == 4 76 | 77 | targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois, 78 | reference_boxes=ex_rois, 79 | scale_factors=cfgs.ROI_SCALE_FACTORS) 80 | # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois, 81 | # gt_rois=gt_rois, 82 | # scale_factor=cfgs.ROI_SCALE_FACTORS) 83 | 84 | return np.hstack( 85 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 86 | 87 | 88 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, 89 | rois_per_image, num_classes): 90 | """Generate a random sample of RoIs comprising foreground and background 91 | examples. 92 | 93 | all_rois shape is [-1, 4] 94 | gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label] 95 | """ 96 | # overlaps: (rois x gt_boxes) 97 | overlaps = bbox_overlaps( 98 | np.ascontiguousarray(all_rois, dtype=np.float), 99 | np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float)) 100 | gt_assignment = overlaps.argmax(axis=1) 101 | max_overlaps = overlaps.max(axis=1) 102 | labels = gt_boxes[gt_assignment, -1] 103 | 104 | # Select foreground RoIs as those with >= FG_THRESH overlap 105 | fg_inds = np.where(max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0] 106 | 107 | # Guard against the case when an image has fewer than fg_rois_per_image 108 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 109 | bg_inds = np.where((max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD) & 110 | (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0] 111 | # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) 112 | # Guard against the case when an image has fewer than fg_rois_per_image 113 | # foreground RoIs 114 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 115 | 116 | # Sample foreground regions without replacement 117 | if fg_inds.size > 0: 118 | fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) 119 | # Compute number of background RoIs to take from this image (guarding 120 | # against there being fewer than desired) 121 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 122 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 123 | # Sample background regions without replacement 124 | if bg_inds.size > 0: 125 | bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) 126 | 127 | # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) 128 | # The indices that we're selecting (both fg and bg) 129 | keep_inds = np.append(fg_inds, bg_inds) 130 | 131 | 132 | # Select sampled values from various arrays: 133 | labels = labels[keep_inds] 134 | 135 | # Clamp labels for the background RoIs to 0 136 | labels[int(fg_rois_per_this_image):] = 0 137 | rois = all_rois[keep_inds] 138 | 139 | bbox_target_data = _compute_targets( 140 | rois, gt_boxes[gt_assignment[keep_inds], :-1], labels) 141 | bbox_targets = \ 142 | _get_bbox_regression_labels(bbox_target_data, num_classes) 143 | 144 | return labels, rois, bbox_targets 145 | -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_target_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/detection_oprations/proposal_target_layer.pyc -------------------------------------------------------------------------------- /libs/export_pbs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/export_pbs/__init__.py -------------------------------------------------------------------------------- /libs/export_pbs/exportPb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import os, sys 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | from tensorflow.python.tools import freeze_graph 9 | 10 | sys.path.append('../../') 11 | from data.io.image_preprocess import short_side_resize_for_inference_data 12 | from libs.configs import cfgs 13 | from libs.networks import build_whole_network 14 | 15 | CKPT_PATH = '/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/output/trained_weights/FasterRCNN_20180517/voc_200000model.ckpt' 16 | OUT_DIR = '../../output/Pbs' 17 | PB_NAME = 'FasterRCNN_Res101_Pascal.pb' 18 | 19 | 20 | def build_detection_graph(): 21 | # 1. preprocess img 22 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3], 23 | name='input_img') # is RGB. not GBR 24 | raw_shape = tf.shape(img_plac) 25 | raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1]) 26 | 27 | img_batch = tf.cast(img_plac, tf.float32) 28 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, 29 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 30 | length_limitation=cfgs.IMG_MAX_LENGTH) 31 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) 32 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] 33 | 34 | det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, 35 | is_training=False) 36 | 37 | detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( 38 | input_img_batch=img_batch, 39 | gtboxes_batch=None) 40 | 41 | xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ 42 | detected_boxes[:, 2], detected_boxes[:, 3] 43 | 44 | resized_shape = tf.shape(img_batch) 45 | resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2]) 46 | 47 | xmin = xmin * raw_w / resized_w 48 | xmax = xmax * raw_w / resized_w 49 | 50 | ymin = ymin * raw_h / resized_h 51 | ymax = ymax * raw_h / resized_h 52 | 53 | boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax])) 54 | dets = tf.concat([tf.reshape(detection_category, [-1, 1]), 55 | tf.reshape(detection_scores, [-1, 1]), 56 | boxes], axis=1, name='DetResults') 57 | 58 | return dets 59 | 60 | 61 | def export_frozenPB(): 62 | 63 | tf.reset_default_graph() 64 | 65 | dets = build_detection_graph() 66 | 67 | saver = tf.train.Saver() 68 | 69 | with tf.Session() as sess: 70 | print("we have restred the weights from =====>>\n", CKPT_PATH) 71 | saver.restore(sess, CKPT_PATH) 72 | 73 | tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME) 74 | freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME), 75 | input_saver='', 76 | input_binary=False, 77 | input_checkpoint=CKPT_PATH, 78 | output_node_names="DetResults", 79 | restore_op_name="save/restore_all", 80 | filename_tensor_name='save/Const:0', 81 | output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')), 82 | clear_devices=False, 83 | initializer_nodes='') 84 | 85 | if __name__ == '__main__': 86 | os.environ["CUDA_VISIBLE_DEVICES"] = '' 87 | export_frozenPB() 88 | -------------------------------------------------------------------------------- /libs/export_pbs/test_exportPb.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import time 10 | import cv2 11 | import argparse 12 | import numpy as np 13 | sys.path.append("../") 14 | 15 | from data.io.image_preprocess import short_side_resize_for_inference_data 16 | from libs.configs import cfgs 17 | from libs.networks import build_whole_network 18 | from libs.box_utils import draw_box_in_img 19 | from help_utils import tools 20 | 21 | 22 | 23 | 24 | 25 | def load_graph(frozen_graph_file): 26 | 27 | # we parse the graph_def file 28 | with tf.gfile.GFile(frozen_graph_file, 'rb') as f: 29 | graph_def = tf.GraphDef() 30 | graph_def.ParseFromString(f.read()) 31 | 32 | # we load the graph_def in the default graph 33 | 34 | with tf.Graph().as_default() as graph: 35 | tf.import_graph_def(graph_def, 36 | input_map=None, 37 | return_elements=None, 38 | name="", 39 | op_dict=None, 40 | producer_op_list=None) 41 | return graph 42 | 43 | 44 | def test(frozen_graph_path, test_dir): 45 | 46 | graph = load_graph(frozen_graph_path) 47 | print("we are testing ====>>>>", frozen_graph_path) 48 | 49 | img = graph.get_tensor_by_name("input_img:0") 50 | dets = graph.get_tensor_by_name("DetResults:0") 51 | 52 | with tf.Session(graph=graph) as sess: 53 | for img_path in os.listdir(test_dir): 54 | a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1] 55 | st = time.time() 56 | dets_val = sess.run(dets, feed_dict={img: a_img}) 57 | 58 | show_indices = dets_val[:, 1] >= 0.5 59 | dets_val = dets_val[show_indices] 60 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img, 61 | boxes=dets_val[:, 2:], 62 | labels=dets_val[:, 0], 63 | scores=dets_val[:, 1]) 64 | cv2.imwrite(img_path, 65 | final_detections[:, :, ::-1]) 66 | print ("%s cost time: %f" % (img_path, time.time() - st)) 67 | 68 | if __name__ == '__main__': 69 | test('/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/output/Pbs/FasterRCNN_Res101_Pascal_Frozen.pb', 70 | '/home/yjr/PycharmProjects/Faster-RCNN_Tensorflow/tools/demos') 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /libs/label_name_dict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__init__.py -------------------------------------------------------------------------------- /libs/label_name_dict/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__init__.pyc -------------------------------------------------------------------------------- /libs/label_name_dict/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/label_name_dict/__pycache__/remote_sensing_dict.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/__pycache__/remote_sensing_dict.cpython-35.pyc -------------------------------------------------------------------------------- /libs/label_name_dict/coco_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | class_names = [ 6 | 'back_ground', 'person', 'bicycle', 'car', 'motorcycle', 7 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 8 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 9 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 10 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 11 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 12 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 13 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 14 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 15 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 16 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 17 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 18 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 19 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 20 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 21 | 'hair drier', 'toothbrush'] 22 | 23 | 24 | classes_originID = { 25 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 26 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 27 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 28 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 29 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 30 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27, 31 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 32 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 33 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 34 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 35 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 36 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 37 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 38 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 39 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 40 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 41 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 42 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 43 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 44 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 45 | 'toothbrush': 90} 46 | 47 | originID_classes = {item: key for key, item in classes_originID.items()} 48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names)))) 49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names)) 50 | 51 | # print (originID_classes) 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /libs/label_name_dict/coco_dict.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/coco_dict.pyc -------------------------------------------------------------------------------- /libs/label_name_dict/label_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | from libs.configs import cfgs 5 | 6 | if cfgs.DATASET_NAME == 'ship': 7 | NAME_LABEL_MAP = { 8 | 'back_ground': 0, 9 | 'ship': 1 10 | } 11 | elif cfgs.DATASET_NAME == 'FDDB': 12 | NAME_LABEL_MAP = { 13 | 'back_ground': 0, 14 | 'face': 1 15 | } 16 | elif cfgs.DATASET_NAME == 'icdar': 17 | NAME_LABEL_MAP = { 18 | 'back_ground': 0, 19 | 'text': 1 20 | } 21 | elif cfgs.DATASET_NAME.startswith('DOTA'): 22 | NAME_LABEL_MAP = { 23 | 'back_ground': 0, 24 | 'roundabout': 1, 25 | 'tennis-court': 2, 26 | 'swimming-pool': 3, 27 | 'storage-tank': 4, 28 | 'soccer-ball-field': 5, 29 | 'small-vehicle': 6, 30 | 'ship': 7, 31 | 'plane': 8, 32 | 'large-vehicle': 9, 33 | 'helicopter': 10, 34 | 'harbor': 11, 35 | 'ground-track-field': 12, 36 | 'bridge': 13, 37 | 'basketball-court': 14, 38 | 'baseball-diamond': 15 39 | } 40 | elif cfgs.DATASET_NAME == 'pascal': 41 | NAME_LABEL_MAP = { 42 | 'back_ground': 0, 43 | 'aeroplane': 1, 44 | 'bicycle': 2, 45 | 'bird': 3, 46 | 'boat': 4, 47 | 'bottle': 5, 48 | 'bus': 6, 49 | 'car': 7, 50 | 'cat': 8, 51 | 'chair': 9, 52 | 'cow': 10, 53 | 'diningtable': 11, 54 | 'dog': 12, 55 | 'horse': 13, 56 | 'motorbike': 14, 57 | 'person': 15, 58 | 'pottedplant': 16, 59 | 'sheep': 17, 60 | 'sofa': 18, 61 | 'train': 19, 62 | 'tvmonitor': 20 63 | } 64 | else: 65 | assert 'please set label dict!' 66 | 67 | 68 | def get_label_name_map(): 69 | reverse_dict = {} 70 | for name, label in NAME_LABEL_MAP.items(): 71 | reverse_dict[label] = name 72 | return reverse_dict 73 | 74 | LABEL_NAME_MAP = get_label_name_map() -------------------------------------------------------------------------------- /libs/label_name_dict/remote_sensing_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | NAME_LABEL_MAP = { 4 | 'back_ground': 0, 5 | 'building': 1 6 | } 7 | 8 | 9 | def get_label_name_map(): 10 | reverse_dict = {} 11 | for name, label in NAME_LABEL_MAP.items(): 12 | reverse_dict[label] = name 13 | return reverse_dict 14 | 15 | LABEL_NAME_MAP = get_label_name_map() -------------------------------------------------------------------------------- /libs/label_name_dict/remote_sensing_dict.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/label_name_dict/remote_sensing_dict.pyc -------------------------------------------------------------------------------- /libs/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__init__.py -------------------------------------------------------------------------------- /libs/losses/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__init__.pyc -------------------------------------------------------------------------------- /libs/losses/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/losses/__pycache__/losses.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/__pycache__/losses.cpython-35.pyc -------------------------------------------------------------------------------- /libs/losses/losses.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import tensorflow as tf 11 | 12 | 13 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=1.0): 14 | ''' 15 | 16 | :param bbox_pred: [-1, 4] in RPN. [-1, cls_num+1, 4] in Fast-rcnn 17 | :param bbox_targets: shape is same as bbox_pred 18 | :param sigma: 19 | :return: 20 | ''' 21 | sigma_2 = sigma**2 22 | 23 | box_diff = bbox_pred - bbox_targets 24 | 25 | abs_box_diff = tf.abs(box_diff) 26 | 27 | smoothL1_sign = tf.stop_gradient( 28 | tf.to_float(tf.less(abs_box_diff, 1. / sigma_2))) 29 | loss_box = tf.pow(box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \ 30 | + (abs_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign) 31 | return loss_box 32 | 33 | def smooth_l1_loss_rpn(bbox_pred, bbox_targets, label, sigma=1.0): 34 | ''' 35 | 36 | :param bbox_pred: [-1, 4] 37 | :param bbox_targets: [-1, 4] 38 | :param label: [-1] 39 | :param sigma: 40 | :return: 41 | ''' 42 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=sigma) 43 | value = tf.reduce_sum(value, axis=1) # to sum in axis 1 44 | rpn_select = tf.where(tf.greater(label, 0)) 45 | 46 | # rpn_select = tf.stop_gradient(rpn_select) # to avoid 47 | selected_value = tf.gather(value, rpn_select) 48 | non_ignored_mask = tf.stop_gradient( 49 | 1.0 - tf.to_float(tf.equal(label, -1))) # positve is 1.0 others is 0.0 50 | 51 | bbox_loss = tf.reduce_sum(selected_value) / tf.maximum(1.0, tf.reduce_sum(non_ignored_mask)) 52 | 53 | return bbox_loss 54 | 55 | 56 | 57 | def smooth_l1_loss_rcnn(bbox_pred, bbox_targets, label, num_classes, sigma=1.0): 58 | ''' 59 | 60 | :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4] 61 | :param bbox_targets:[-1, (cfgs.CLS_NUM +1) * 4] 62 | :param label:[-1] 63 | :param num_classes: 64 | :param sigma: 65 | :return: 66 | ''' 67 | 68 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 69 | 70 | bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4]) 71 | bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4]) 72 | 73 | value = _smooth_l1_loss_base(bbox_pred, 74 | bbox_targets, 75 | sigma=sigma) 76 | value = tf.reduce_sum(value, 2) 77 | value = tf.reshape(value, [-1, num_classes]) 78 | 79 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]), 80 | depth=num_classes, axis=1) 81 | 82 | inside_mask = tf.stop_gradient( 83 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes]))) 84 | 85 | normalizer = tf.to_float(tf.shape(bbox_pred)[0]) 86 | bbox_loss = tf.reduce_sum( 87 | tf.reduce_sum(value * inside_mask, 1)*outside_mask) / normalizer 88 | 89 | return bbox_loss 90 | 91 | 92 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets, 93 | num_classes, num_ohem_samples=256, sigma=1.0): 94 | ''' 95 | 96 | :param cls_score: [-1, cls_num+1] 97 | :param label: [-1] 98 | :param bbox_pred: [-1, 4*(cls_num+1)] 99 | :param bbox_targets: [-1, 4*(cls_num+1)] 100 | :param num_ohem_samples: 256 by default 101 | :param num_classes: cls_num+1 102 | :param sigma: 103 | :return: 104 | ''' 105 | 106 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label) # [-1, ] 107 | # cls_loss = tf.Print(cls_loss, [tf.shape(cls_loss)], summarize=10, message='CLS losss shape ****') 108 | 109 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 110 | bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4]) 111 | bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4]) 112 | 113 | value = _smooth_l1_loss_base(bbox_pred, 114 | bbox_targets, 115 | sigma=sigma) 116 | value = tf.reduce_sum(value, 2) 117 | value = tf.reshape(value, [-1, num_classes]) 118 | 119 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]), 120 | depth=num_classes, axis=1) 121 | 122 | inside_mask = tf.stop_gradient( 123 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes]))) 124 | loc_loss = tf.reduce_sum(value * inside_mask, 1)*outside_mask 125 | # loc_loss = tf.Print(loc_loss, [tf.shape(loc_loss)], summarize=10, message='loc_loss shape***') 126 | 127 | sum_loss = cls_loss + loc_loss 128 | 129 | num_ohem_samples = tf.stop_gradient(tf.minimum(num_ohem_samples, tf.shape(sum_loss)[0])) 130 | _, top_k_indices = tf.nn.top_k(sum_loss, k=num_ohem_samples) 131 | 132 | cls_loss_ohem = tf.gather(cls_loss, top_k_indices) 133 | cls_loss_ohem = tf.reduce_mean(cls_loss_ohem) 134 | 135 | loc_loss_ohem = tf.gather(loc_loss, top_k_indices) 136 | normalizer = tf.to_float(num_ohem_samples) 137 | loc_loss_ohem = tf.reduce_sum(loc_loss_ohem) / normalizer 138 | 139 | return cls_loss_ohem, loc_loss_ohem 140 | 141 | -------------------------------------------------------------------------------- /libs/losses/losses.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/losses.pyc -------------------------------------------------------------------------------- /libs/losses/tfapi_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Those loss are derived tensorflow detection api. 4 | But yjr modified it to suit for this project. 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import tensorflow as tf 11 | 12 | 13 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, weights): 14 | ''' 15 | Smooth L1 localization loss function aka Huber Loss.. 16 | 17 | The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and 18 | 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between 19 | predictions and target. 20 | 21 | See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015) 22 | 23 | :param bbox_pred: [-1, 4] in RPN. [-1, (cls_num+1) * 4] in Fast-rcnn 24 | :param bbox_targets: shape is same as bbox_pred 25 | :param sigma: 26 | :return: 27 | ''' 28 | loss_box = tf.losses.huber_loss(labels=bbox_targets, 29 | predictions=bbox_pred, 30 | weights=tf.expand_dims(weights, axis=1), # (n, ) -->(n, 1) 31 | delta=1.0, 32 | loss_collection=None, 33 | reduction=tf.losses.Reduction.NONE) 34 | return loss_box 35 | 36 | def smooth_l1_loss_rpn(bbox_pred, bbox_targets, label, sigma=1.0): 37 | ''' 38 | 39 | :param bbox_pred: [-1, 4] 40 | :param bbox_targets: [-1, 4] 41 | :param label: [-1] 42 | :param sigma: 43 | :return: 44 | ''' 45 | rpn_selected = tf.to_float(tf.greater(label, 0)) # that positive anchors 46 | 47 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, weights=rpn_selected) 48 | value = tf.reduce_sum(value, axis=1) # to sum in axis 1 49 | 50 | non_ignored_mask = tf.stop_gradient( 51 | tf.to_float(tf.not_equal(label, -1))) # positve is 1.0 negative is 0.0. Ignored is -1 52 | 53 | bbox_loss = tf.reduce_sum(value) / tf.maximum(1.0, tf.reduce_sum(non_ignored_mask)) 54 | 55 | return bbox_loss 56 | 57 | 58 | 59 | def smooth_l1_loss_rcnn(bbox_pred, bbox_targets, label, num_classes, sigma=1.0): 60 | ''' 61 | 62 | :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4] 63 | :param bbox_targets:[-1, (cfgs.CLS_NUM +1) * 4] 64 | :param label:[-1] 65 | :param num_classes: 66 | :param sigma: 67 | :return: 68 | ''' 69 | 70 | outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 71 | 72 | # bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4]) 73 | # bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4]) 74 | 75 | value = _smooth_l1_loss_base(bbox_pred, 76 | bbox_targets, 77 | weights=outside_mask) # [-1, (num_classes)*4] 78 | value = tf.reshape(value, [-1, num_classes, 4]) 79 | 80 | value = tf.reduce_sum(value, 2) 81 | value = tf.reshape(value, [-1, num_classes]) 82 | 83 | inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]), 84 | depth=num_classes, axis=1) 85 | 86 | inside_mask = tf.stop_gradient( 87 | tf.to_float(tf.reshape(inside_mask, [-1, num_classes]))) 88 | 89 | normalizer = tf.to_float(tf.shape(bbox_pred)[0]) 90 | 91 | bbox_loss = tf.reduce_sum( 92 | tf.reduce_sum(value * inside_mask, 1)*outside_mask) / normalizer 93 | 94 | return bbox_loss 95 | 96 | 97 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets, 98 | num_classes, num_ohem_samples=256, sigma=1.0): 99 | ''' 100 | 101 | :param cls_score: [-1, cls_num+1] 102 | :param label: [-1] 103 | :param bbox_pred: [-1, 4*(cls_num+1)] 104 | :param bbox_targets: [-1, 4*(cls_num+1)] 105 | :param num_ohem_samples: 256 by default 106 | :param num_classes: cls_num+1 107 | :param sigma: 108 | :return: 109 | ''' 110 | 111 | # cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label) # [-1, ] 112 | # # cls_loss = tf.Print(cls_loss, [tf.shape(cls_loss)], summarize=10, message='CLS losss shape ****') 113 | # 114 | # outside_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 115 | # bbox_pred = tf.reshape(bbox_pred, [-1, num_classes, 4]) 116 | # bbox_targets = tf.reshape(bbox_targets, [-1, num_classes, 4]) 117 | # 118 | # value = _smooth_l1_loss_base(bbox_pred, 119 | # bbox_targets, 120 | # sigma=sigma) 121 | # value = tf.reduce_sum(value, 2) 122 | # value = tf.reshape(value, [-1, num_classes]) 123 | # 124 | # inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]), 125 | # depth=num_classes, axis=1) 126 | # 127 | # inside_mask = tf.stop_gradient( 128 | # tf.to_float(tf.reshape(inside_mask, [-1, num_classes]))) 129 | # loc_loss = tf.reduce_sum(value * inside_mask, 1)*outside_mask 130 | # # loc_loss = tf.Print(loc_loss, [tf.shape(loc_loss)], summarize=10, message='loc_loss shape***') 131 | # 132 | # sum_loss = cls_loss + loc_loss 133 | # 134 | # num_ohem_samples = tf.stop_gradient(tf.minimum(num_ohem_samples, tf.shape(sum_loss)[0])) 135 | # _, top_k_indices = tf.nn.top_k(sum_loss, k=num_ohem_samples) 136 | # 137 | # cls_loss_ohem = tf.gather(cls_loss, top_k_indices) 138 | # cls_loss_ohem = tf.reduce_mean(cls_loss_ohem) 139 | # 140 | # loc_loss_ohem = tf.gather(loc_loss, top_k_indices) 141 | # normalizer = tf.to_float(num_ohem_samples) 142 | # loc_loss_ohem = tf.reduce_sum(loc_loss_ohem) / normalizer 143 | # 144 | # return cls_loss_ohem, loc_loss_ohem 145 | 146 | raise NotImplementedError('ohem not implemented') 147 | 148 | -------------------------------------------------------------------------------- /libs/losses/tfapi_loss.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/losses/tfapi_loss.pyc -------------------------------------------------------------------------------- /libs/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__init__.py -------------------------------------------------------------------------------- /libs/networks/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__init__.pyc -------------------------------------------------------------------------------- /libs/networks/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/build_whole_network.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/build_whole_network.pyc -------------------------------------------------------------------------------- /libs/networks/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | # Mobilenet V2 2 | This folder contains building code for Mobilenet V2, based on 3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation] 4 | (https://arxiv.org/abs/1801.04381) 5 | 6 | # Pretrained model 7 | TODO 8 | 9 | # Example 10 | TODO 11 | 12 | 13 | -------------------------------------------------------------------------------- /libs/networks/mobilenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/__init__.py -------------------------------------------------------------------------------- /libs/networks/mobilenet/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/__init__.pyc -------------------------------------------------------------------------------- /libs/networks/mobilenet/conv_blocks.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/conv_blocks.pyc -------------------------------------------------------------------------------- /libs/networks/mobilenet/mobilenet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/mobilenet.pyc -------------------------------------------------------------------------------- /libs/networks/mobilenet/mobilenet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet/mobilenet_v2.pyc -------------------------------------------------------------------------------- /libs/networks/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | import tensorflow.contrib.slim as slim 5 | import tensorflow as tf 6 | 7 | from libs.networks.mobilenet import mobilenet_v2 8 | from libs.networks.mobilenet.mobilenet import training_scope 9 | from libs.networks.mobilenet.mobilenet_v2 import op 10 | from libs.networks.mobilenet.mobilenet_v2 import ops 11 | expand_input = ops.expand_input_by_factor 12 | 13 | V2_BASE_DEF = dict( 14 | defaults={ 15 | # Note: these parameters of batch norm affect the architecture 16 | # that's why they are here and not in training_scope. 17 | (slim.batch_norm,): {'center': True, 'scale': True}, 18 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 19 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 20 | }, 21 | (ops.expanded_conv,): { 22 | 'expansion_size': expand_input(6), 23 | 'split_expansion': 1, 24 | 'normalizer_fn': slim.batch_norm, 25 | 'residual': True 26 | }, 27 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 28 | }, 29 | spec=[ 30 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), 31 | op(ops.expanded_conv, 32 | expansion_size=expand_input(1, divisible_by=1), 33 | num_outputs=16, scope='expanded_conv'), 34 | op(ops.expanded_conv, stride=2, num_outputs=24, scope='expanded_conv_1'), 35 | op(ops.expanded_conv, stride=1, num_outputs=24, scope='expanded_conv_2'), 36 | op(ops.expanded_conv, stride=2, num_outputs=32, scope='expanded_conv_3'), 37 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_4'), 38 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_5'), 39 | op(ops.expanded_conv, stride=2, num_outputs=64, scope='expanded_conv_6'), 40 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_7'), 41 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_8'), 42 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_9'), 43 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_10'), 44 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_11'), 45 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_12') 46 | ], 47 | ) 48 | 49 | 50 | V2_HEAD_DEF = dict( 51 | defaults={ 52 | # Note: these parameters of batch norm affect the architecture 53 | # that's why they are here and not in training_scope. 54 | (slim.batch_norm,): {'center': True, 'scale': True}, 55 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 56 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 57 | }, 58 | (ops.expanded_conv,): { 59 | 'expansion_size': expand_input(6), 60 | 'split_expansion': 1, 61 | 'normalizer_fn': slim.batch_norm, 62 | 'residual': True 63 | }, 64 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 65 | }, 66 | spec=[ 67 | op(ops.expanded_conv, stride=2, num_outputs=160, scope='expanded_conv_13'), 68 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_14'), 69 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_15'), 70 | op(ops.expanded_conv, stride=1, num_outputs=320, scope='expanded_conv_16'), 71 | op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280, scope='Conv_1') 72 | ], 73 | ) 74 | 75 | def mobilenetv2_scope(is_training=True, 76 | trainable=True, 77 | weight_decay=0.00004, 78 | stddev=0.09, 79 | dropout_keep_prob=0.8, 80 | bn_decay=0.997): 81 | """Defines Mobilenet training scope. 82 | In default. We do not use BN 83 | 84 | ReWrite the scope. 85 | """ 86 | batch_norm_params = { 87 | 'is_training': False, 88 | 'trainable': False, 89 | 'decay': bn_decay, 90 | } 91 | with slim.arg_scope(training_scope(is_training=is_training, weight_decay=weight_decay)): 92 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_conv2d], 93 | trainable=trainable): 94 | with slim.arg_scope([slim.batch_norm], **batch_norm_params) as sc: 95 | return sc 96 | 97 | 98 | 99 | def mobilenetv2_base(img_batch, is_training=True): 100 | 101 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)): 102 | 103 | feature_to_crop, endpoints = mobilenet_v2.mobilenet_base(input_tensor=img_batch, 104 | num_classes=None, 105 | is_training=False, 106 | depth_multiplier=1.0, 107 | scope='MobilenetV2', 108 | conv_defs=V2_BASE_DEF, 109 | finegrain_classification_mode=False) 110 | 111 | # feature_to_crop = tf.Print(feature_to_crop, [tf.shape(feature_to_crop)], summarize=10, message='rpn_shape') 112 | return feature_to_crop 113 | 114 | 115 | def mobilenetv2_head(inputs, is_training=True): 116 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)): 117 | net, _ = mobilenet_v2.mobilenet(input_tensor=inputs, 118 | num_classes=None, 119 | is_training=False, 120 | depth_multiplier=1.0, 121 | scope='MobilenetV2', 122 | conv_defs=V2_HEAD_DEF, 123 | finegrain_classification_mode=False) 124 | 125 | net = tf.squeeze(net, [1, 2]) 126 | 127 | return net -------------------------------------------------------------------------------- /libs/networks/mobilenet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/mobilenet_v2.pyc -------------------------------------------------------------------------------- /libs/networks/resnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | from libs.configs import cfgs 9 | from tensorflow.contrib.slim.nets import resnet_v1 10 | from tensorflow.contrib.slim.nets import resnet_utils 11 | from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block 12 | # import tfplot as tfp 13 | 14 | def resnet_arg_scope( 15 | is_training=True, weight_decay=cfgs.WEIGHT_DECAY, batch_norm_decay=0.997, 16 | batch_norm_epsilon=1e-5, batch_norm_scale=True): 17 | ''' 18 | 19 | In Default, we do not use BN to train resnet, since batch_size is too small. 20 | So is_training is False and trainable is False in the batch_norm params. 21 | 22 | ''' 23 | batch_norm_params = { 24 | 'is_training': False, 'decay': batch_norm_decay, 25 | 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 26 | 'trainable': False, 27 | 'updates_collections': tf.GraphKeys.UPDATE_OPS 28 | } 29 | 30 | with slim.arg_scope( 31 | [slim.conv2d], 32 | weights_regularizer=slim.l2_regularizer(weight_decay), 33 | weights_initializer=slim.variance_scaling_initializer(), 34 | trainable=is_training, 35 | activation_fn=tf.nn.relu, 36 | normalizer_fn=slim.batch_norm, 37 | normalizer_params=batch_norm_params): 38 | with slim.arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc: 39 | return arg_sc 40 | 41 | 42 | # def add_heatmap(feature_maps, name): 43 | # ''' 44 | # 45 | # :param feature_maps:[B, H, W, C] 46 | # :return: 47 | # ''' 48 | # 49 | # def figure_attention(activation): 50 | # fig, ax = tfp.subplots() 51 | # im = ax.imshow(activation, cmap='jet') 52 | # fig.colorbar(im) 53 | # return fig 54 | # 55 | # heatmap = tf.reduce_sum(feature_maps, axis=-1) 56 | # heatmap = tf.squeeze(heatmap, axis=0) 57 | # tfp.summary.plot(name, figure_attention, [heatmap]) 58 | 59 | 60 | def resnet_base(img_batch, scope_name, is_training=True): 61 | ''' 62 | this code is derived from light-head rcnn. 63 | https://github.com/zengarden/light_head_rcnn 64 | 65 | It is convenient to freeze blocks. So we adapt this mode. 66 | ''' 67 | if scope_name == 'resnet_v1_50': 68 | middle_num_units = 6 69 | elif scope_name == 'resnet_v1_101': 70 | middle_num_units = 23 71 | else: 72 | raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr') 73 | 74 | blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 75 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), 76 | # use stride 1 for the last conv4 layer. 77 | 78 | resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)] 79 | # when use fpn . stride list is [1, 2, 2] 80 | 81 | with slim.arg_scope(resnet_arg_scope(is_training=False)): 82 | with tf.variable_scope(scope_name, scope_name): 83 | # Do the first few layers manually, because 'SAME' padding can behave inconsistently 84 | # for images of different sizes: sometimes 0, sometimes 1 85 | net = resnet_utils.conv2d_same( 86 | img_batch, 64, 7, stride=2, scope='conv1') 87 | net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) 88 | net = slim.max_pool2d( 89 | net, [3, 3], stride=2, padding='VALID', scope='pool1') 90 | 91 | not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] 92 | # Fixed_Blocks can be 1~3 93 | 94 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): 95 | C2, _ = resnet_v1.resnet_v1(net, 96 | blocks[0:1], 97 | global_pool=False, 98 | include_root_block=False, 99 | scope=scope_name) 100 | 101 | # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') 102 | # add_heatmap(C2, 'Layer/C2') 103 | 104 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): 105 | C3, _ = resnet_v1.resnet_v1(C2, 106 | blocks[1:2], 107 | global_pool=False, 108 | include_root_block=False, 109 | scope=scope_name) 110 | # add_heatmap(C3, name='Layer/C3') 111 | # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') 112 | 113 | with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): 114 | C4, _ = resnet_v1.resnet_v1(C3, 115 | blocks[2:3], 116 | global_pool=False, 117 | include_root_block=False, 118 | scope=scope_name) 119 | # add_heatmap(C4, name='Layer/C4') 120 | # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') 121 | return C4 122 | 123 | 124 | def restnet_head(input, is_training, scope_name): 125 | block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] 126 | 127 | with slim.arg_scope(resnet_arg_scope(is_training=is_training)): 128 | C5, _ = resnet_v1.resnet_v1(input, 129 | block4, 130 | global_pool=False, 131 | include_root_block=False, 132 | scope=scope_name) 133 | # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') 134 | C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling') 135 | # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape') 136 | 137 | # global average pooling C5 to obtain fc layers 138 | return C5_flatten 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /libs/networks/resnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/resnet.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__init__.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/inception_resnet_v2.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/inception_resnet_v2.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/mobilenet_v1.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/mobilenet_v1.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/resnet_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/resnet_utils.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/resnet_v1.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/resnet_v1.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/__pycache__/vgg.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/__pycache__/vgg.cpython-35.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2'): 61 | """AlexNet version 2. 62 | 63 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 64 | Parameters from: 65 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 66 | layers-imagenet-1gpu.cfg 67 | 68 | Note: All the fully_connected layers have been transformed to conv2d layers. 69 | To use in classification mode, resize input to 224x224. To use in fully 70 | convolutional mode, set spatial_squeeze to false. 71 | The LRN layers have been removed and change the initializers from 72 | random_normal_initializer to xavier_initializer. 73 | 74 | Args: 75 | inputs: a tensor of size [batch_size, height, width, channels]. 76 | num_classes: number of predicted classes. 77 | is_training: whether or not the model is being trained. 78 | dropout_keep_prob: the probability that activations are kept in the dropout 79 | layers during training. 80 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 81 | outputs. Useful to remove unnecessary dimensions for classification. 82 | scope: Optional scope for the variables. 83 | 84 | Returns: 85 | the last op containing the log predictions and end_points dict. 86 | """ 87 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 88 | end_points_collection = sc.name + '_end_points' 89 | # Collect outputs for conv2d, fully_connected and max_pool2d. 90 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 91 | outputs_collections=[end_points_collection]): 92 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 93 | scope='conv1') 94 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 95 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 96 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 97 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 98 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 99 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 100 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 101 | 102 | # Use conv2d instead of fully_connected layers. 103 | with slim.arg_scope([slim.conv2d], 104 | weights_initializer=trunc_normal(0.005), 105 | biases_initializer=tf.constant_initializer(0.1)): 106 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 107 | scope='fc6') 108 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 109 | scope='dropout6') 110 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 111 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 112 | scope='dropout7') 113 | net = slim.conv2d(net, num_classes, [1, 1], 114 | activation_fn=None, 115 | normalizer_fn=None, 116 | biases_initializer=tf.zeros_initializer(), 117 | scope='fc8') 118 | 119 | # Convert end_points_collection into a end_point dict. 120 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 121 | if spatial_squeeze: 122 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 123 | end_points[sc.name + '/fc8'] = net 124 | return net, end_points 125 | alexnet_v2.default_image_size = 224 126 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/alexnet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.slim_nets.alexnet.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import alexnet 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class AlexnetV2Test(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 224, 224 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = alexnet.alexnet_v2(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 300, 400 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 4, 7, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 224, 224 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = alexnet.alexnet_v2(inputs, num_classes) 58 | expected_names = ['alexnet_v2/conv1', 59 | 'alexnet_v2/pool1', 60 | 'alexnet_v2/conv2', 61 | 'alexnet_v2/pool2', 62 | 'alexnet_v2/conv3', 63 | 'alexnet_v2/conv4', 64 | 'alexnet_v2/conv5', 65 | 'alexnet_v2/pool5', 66 | 'alexnet_v2/fc6', 67 | 'alexnet_v2/fc7', 68 | 'alexnet_v2/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 224, 224 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | alexnet.alexnet_v2(inputs, num_classes) 79 | expected_names = ['alexnet_v2/conv1/weights', 80 | 'alexnet_v2/conv1/biases', 81 | 'alexnet_v2/conv2/weights', 82 | 'alexnet_v2/conv2/biases', 83 | 'alexnet_v2/conv3/weights', 84 | 'alexnet_v2/conv3/biases', 85 | 'alexnet_v2/conv4/weights', 86 | 'alexnet_v2/conv4/biases', 87 | 'alexnet_v2/conv5/weights', 88 | 'alexnet_v2/conv5/biases', 89 | 'alexnet_v2/fc6/weights', 90 | 'alexnet_v2/fc6/biases', 91 | 'alexnet_v2/fc7/weights', 92 | 'alexnet_v2/fc7/biases', 93 | 'alexnet_v2/fc8/weights', 94 | 'alexnet_v2/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 224, 224 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 224, 224 115 | eval_height, eval_width = 300, 400 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = alexnet.alexnet_v2(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 4, 7, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 224, 224 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = alexnet.alexnet_v2(inputs) 140 | sess.run(tf.global_variables_initializer()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | logits: the pre-softmax activations, a tensor of size 54 | [batch_size, `num_classes`] 55 | end_points: a dictionary from components of the network to the corresponding 56 | activation. 57 | """ 58 | end_points = {} 59 | 60 | with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): 61 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 62 | end_points['conv1'] = net 63 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | end_points['pool1'] = net 65 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 66 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 67 | end_points['conv2'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 69 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 70 | end_points['pool2'] = net 71 | net = slim.flatten(net) 72 | end_points['Flatten'] = net 73 | net = slim.fully_connected(net, 384, scope='fc3') 74 | end_points['fc3'] = net 75 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 76 | scope='dropout3') 77 | net = slim.fully_connected(net, 192, scope='fc4') 78 | end_points['fc4'] = net 79 | logits = slim.fully_connected(net, num_classes, 80 | biases_initializer=tf.zeros_initializer(), 81 | weights_initializer=trunc_normal(1/192.0), 82 | weights_regularizer=None, 83 | activation_fn=None, 84 | scope='logits') 85 | 86 | end_points['Logits'] = logits 87 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 88 | 89 | return logits, end_points 90 | cifarnet.default_image_size = 32 91 | 92 | 93 | def cifarnet_arg_scope(weight_decay=0.004): 94 | """Defines the default cifarnet argument scope. 95 | 96 | Args: 97 | weight_decay: The weight decay to use for regularizing the model. 98 | 99 | Returns: 100 | An `arg_scope` to use for the inception v3 model. 101 | """ 102 | with slim.arg_scope( 103 | [slim.conv2d], 104 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 105 | activation_fn=tf.nn.relu): 106 | with slim.arg_scope( 107 | [slim.fully_connected], 108 | biases_initializer=tf.constant_initializer(0.1), 109 | weights_initializer=trunc_normal(0.04), 110 | weights_regularizer=slim.l2_regularizer(weight_decay), 111 | activation_fn=tf.nn.relu) as sc: 112 | return sc 113 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_resnet_v2 import inception_resnet_v2_base 25 | from nets.inception_v1 import inception_v1 26 | from nets.inception_v1 import inception_v1_arg_scope 27 | from nets.inception_v1 import inception_v1_base 28 | from nets.inception_v2 import inception_v2 29 | from nets.inception_v2 import inception_v2_arg_scope 30 | from nets.inception_v2 import inception_v2_base 31 | from nets.inception_v3 import inception_v3 32 | from nets.inception_v3 import inception_v3_arg_scope 33 | from nets.inception_v3 import inception_v3_base 34 | from nets.inception_v4 import inception_v4 35 | from nets.inception_v4 import inception_v4_arg_scope 36 | from nets.inception_v4 import inception_v4_base 37 | # pylint: enable=unused-import 38 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception_resnet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/inception_resnet_v2.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. 44 | is_training: specifies whether or not we're currently training the model. 45 | This variable will determine the behaviour of the dropout layer. 46 | dropout_keep_prob: the percentage of activation values that are retained. 47 | prediction_fn: a function to get predictions out of logits. 48 | scope: Optional variable_scope. 49 | 50 | Returns: 51 | logits: the pre-softmax activations, a tensor of size 52 | [batch_size, `num_classes`] 53 | end_points: a dictionary from components of the network to the corresponding 54 | activation. 55 | """ 56 | end_points = {} 57 | 58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]): 59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1') 60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 63 | net = slim.flatten(net) 64 | end_points['Flatten'] = net 65 | 66 | net = slim.fully_connected(net, 1024, scope='fc3') 67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 68 | scope='dropout3') 69 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 70 | scope='fc4') 71 | 72 | end_points['Logits'] = logits 73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 74 | 75 | return logits, end_points 76 | lenet.default_image_size = 28 77 | 78 | 79 | def lenet_arg_scope(weight_decay=0.0): 80 | """Defines the default lenet argument scope. 81 | 82 | Args: 83 | weight_decay: The weight decay to use for regularizing the model. 84 | 85 | Returns: 86 | An `arg_scope` to use for the inception v3 model. 87 | """ 88 | with slim.arg_scope( 89 | [slim.conv2d, slim.fully_connected], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 92 | activation_fn=tf.nn.relu) as sc: 93 | return sc 94 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.md: -------------------------------------------------------------------------------- 1 | # MobileNet_v1 2 | 3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 4 | 5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature. 6 | 7 | ![alt text](mobilenet_v1.png "MobileNet Graph") 8 | 9 | # Pre-trained Models 10 | 11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the 12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) 13 | image classification dataset. Accuracies were computed by evaluating using a single image crop. 14 | 15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy | 16 | :----:|:------------:|:----------:|:-------:|:-------:| 17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5| 18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9| 19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5| 20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3| 21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2| 22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3| 23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1| 24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6| 25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4| 26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0| 27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5| 28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6| 29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0| 30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6| 31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7| 32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2| 33 | 34 | 35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint: 36 | 37 | ```shell 38 | $ CHECKPOINT_DIR=/tmp/checkpoints 39 | $ mkdir ${CHECKPOINT_DIR} 40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz 41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz 42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR} 43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz 44 | ``` 45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md). 46 | 47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 48 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/mobilenet_v1.png -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/mobilenet_v1.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from nets import alexnet 25 | from nets import cifarnet 26 | from nets import inception 27 | from nets import lenet 28 | from nets import mobilenet_v1 29 | from nets import overfeat 30 | from nets import resnet_v1 31 | from nets import resnet_v2 32 | from nets import vgg 33 | 34 | slim = tf.contrib.slim 35 | 36 | networks_map = {'alexnet_v2': alexnet.alexnet_v2, 37 | 'cifarnet': cifarnet.cifarnet, 38 | 'overfeat': overfeat.overfeat, 39 | 'vgg_a': vgg.vgg_a, 40 | 'vgg_16': vgg.vgg_16, 41 | 'vgg_19': vgg.vgg_19, 42 | 'inception_v1': inception.inception_v1, 43 | 'inception_v2': inception.inception_v2, 44 | 'inception_v3': inception.inception_v3, 45 | 'inception_v4': inception.inception_v4, 46 | 'inception_resnet_v2': inception.inception_resnet_v2, 47 | 'lenet': lenet.lenet, 48 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 49 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 50 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 51 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 52 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 53 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 54 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 55 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 56 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1, 57 | } 58 | 59 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, 60 | 'cifarnet': cifarnet.cifarnet_arg_scope, 61 | 'overfeat': overfeat.overfeat_arg_scope, 62 | 'vgg_a': vgg.vgg_arg_scope, 63 | 'vgg_16': vgg.vgg_arg_scope, 64 | 'vgg_19': vgg.vgg_arg_scope, 65 | 'inception_v1': inception.inception_v3_arg_scope, 66 | 'inception_v2': inception.inception_v3_arg_scope, 67 | 'inception_v3': inception.inception_v3_arg_scope, 68 | 'inception_v4': inception.inception_v4_arg_scope, 69 | 'inception_resnet_v2': 70 | inception.inception_resnet_v2_arg_scope, 71 | 'lenet': lenet.lenet_arg_scope, 72 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 73 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 74 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 75 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 76 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 77 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 78 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 79 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 80 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope, 81 | } 82 | 83 | 84 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 85 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 86 | 87 | Args: 88 | name: The name of the network. 89 | num_classes: The number of classes to use for classification. 90 | weight_decay: The l2 coefficient for the model weights. 91 | is_training: `True` if the model is being used for training and `False` 92 | otherwise. 93 | 94 | Returns: 95 | network_fn: A function that applies the model to a batch of images. It has 96 | the following signature: 97 | logits, end_points = network_fn(images) 98 | Raises: 99 | ValueError: If network `name` is not recognized. 100 | """ 101 | if name not in networks_map: 102 | raise ValueError('Name of network unknown %s' % name) 103 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 104 | func = networks_map[name] 105 | @functools.wraps(func) 106 | def network_fn(images): 107 | with slim.arg_scope(arg_scope): 108 | return func(images, num_classes, is_training=is_training) 109 | if hasattr(func, 'default_image_size'): 110 | network_fn.default_image_size = func.default_image_size 111 | 112 | return network_fn 113 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from nets import nets_factory 25 | 26 | slim = tf.contrib.slim 27 | 28 | 29 | class NetworksTest(tf.test.TestCase): 30 | 31 | def testGetNetworkFn(self): 32 | batch_size = 5 33 | num_classes = 1000 34 | for net in nets_factory.networks_map: 35 | with self.test_session(): 36 | net_fn = nets_factory.get_network_fn(net, num_classes) 37 | # Most networks use 224 as their default_image_size 38 | image_size = getattr(net_fn, 'default_image_size', 224) 39 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 40 | logits, end_points = net_fn(inputs) 41 | self.assertTrue(isinstance(logits, tf.Tensor)) 42 | self.assertTrue(isinstance(end_points, dict)) 43 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 44 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 45 | 46 | def testGetNetworkFnArgScope(self): 47 | batch_size = 5 48 | num_classes = 10 49 | net = 'cifarnet' 50 | with self.test_session(use_gpu=True): 51 | net_fn = nets_factory.get_network_fn(net, num_classes) 52 | image_size = getattr(net_fn, 'default_image_size', 224) 53 | with slim.arg_scope([slim.model_variable, slim.variable], 54 | device='/CPU:0'): 55 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 56 | net_fn(inputs) 57 | weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0] 58 | self.assertDeviceEqual('/CPU:0', weights.device) 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer()): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat'): 56 | """Contains the model definition for the OverFeat network. 57 | 58 | The definition for the network was obtained from: 59 | OverFeat: Integrated Recognition, Localization and Detection using 60 | Convolutional Networks 61 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 62 | Yann LeCun, 2014 63 | http://arxiv.org/abs/1312.6229 64 | 65 | Note: All the fully_connected layers have been transformed to conv2d layers. 66 | To use in classification mode, resize input to 231x231. To use in fully 67 | convolutional mode, set spatial_squeeze to false. 68 | 69 | Args: 70 | inputs: a tensor of size [batch_size, height, width, channels]. 71 | num_classes: number of predicted classes. 72 | is_training: whether or not the model is being trained. 73 | dropout_keep_prob: the probability that activations are kept in the dropout 74 | layers during training. 75 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 76 | outputs. Useful to remove unnecessary dimensions for classification. 77 | scope: Optional scope for the variables. 78 | 79 | Returns: 80 | the last op containing the log predictions and end_points dict. 81 | 82 | """ 83 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 84 | end_points_collection = sc.name + '_end_points' 85 | # Collect outputs for conv2d, fully_connected and max_pool2d 86 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 87 | outputs_collections=end_points_collection): 88 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 89 | scope='conv1') 90 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 91 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 92 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 93 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 94 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 95 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 96 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 97 | with slim.arg_scope([slim.conv2d], 98 | weights_initializer=trunc_normal(0.005), 99 | biases_initializer=tf.constant_initializer(0.1)): 100 | # Use conv2d instead of fully_connected layers. 101 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 102 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 103 | scope='dropout6') 104 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 105 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 106 | scope='dropout7') 107 | net = slim.conv2d(net, num_classes, [1, 1], 108 | activation_fn=None, 109 | normalizer_fn=None, 110 | biases_initializer=tf.zeros_initializer(), 111 | scope='fc8') 112 | # Convert end_points_collection into a end_point dict. 113 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 114 | if spatial_squeeze: 115 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 116 | end_points[sc.name + '/fc8'] = net 117 | return net, end_points 118 | overfeat.default_image_size = 231 119 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/overfeat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.slim_nets.overfeat.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import overfeat 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class OverFeatTest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 231, 231 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = overfeat.overfeat(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 281, 281 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 231, 231 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = overfeat.overfeat(inputs, num_classes) 58 | expected_names = ['overfeat/conv1', 59 | 'overfeat/pool1', 60 | 'overfeat/conv2', 61 | 'overfeat/pool2', 62 | 'overfeat/conv3', 63 | 'overfeat/conv4', 64 | 'overfeat/conv5', 65 | 'overfeat/pool5', 66 | 'overfeat/fc6', 67 | 'overfeat/fc7', 68 | 'overfeat/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 231, 231 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | overfeat.overfeat(inputs, num_classes) 79 | expected_names = ['overfeat/conv1/weights', 80 | 'overfeat/conv1/biases', 81 | 'overfeat/conv2/weights', 82 | 'overfeat/conv2/biases', 83 | 'overfeat/conv3/weights', 84 | 'overfeat/conv3/biases', 85 | 'overfeat/conv4/weights', 86 | 'overfeat/conv4/biases', 87 | 'overfeat/conv5/weights', 88 | 'overfeat/conv5/biases', 89 | 'overfeat/fc6/weights', 90 | 'overfeat/fc6/biases', 91 | 'overfeat/fc7/weights', 92 | 'overfeat/fc7/biases', 93 | 'overfeat/fc8/weights', 94 | 'overfeat/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 231, 231 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 231, 231 115 | eval_height, eval_width = 281, 281 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = overfeat.overfeat(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 2, 2, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 231, 231 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = overfeat.overfeat(inputs) 140 | sess.run(tf.global_variables_initializer()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/resnet_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/resnet_utils.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/resnet_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/resnet_v1.pyc -------------------------------------------------------------------------------- /libs/networks/slim_nets/vgg.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/networks/slim_nets/vgg.pyc -------------------------------------------------------------------------------- /libs/val_libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/__init__.py -------------------------------------------------------------------------------- /libs/val_libs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/__init__.pyc -------------------------------------------------------------------------------- /libs/val_libs/voc_eval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/libs/val_libs/voc_eval.pyc -------------------------------------------------------------------------------- /output/trained_weights/README.md: -------------------------------------------------------------------------------- 1 | Please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/Faster-RCNN_Tensorflow) by this project, then put it here. -------------------------------------------------------------------------------- /scalars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/scalars.png -------------------------------------------------------------------------------- /tools/FasterRCNN_20180516_mobile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/FasterRCNN_20180516_mobile.jpg -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/__init__.py -------------------------------------------------------------------------------- /tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/__init__.pyc -------------------------------------------------------------------------------- /tools/demos/000058.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000058.jpg -------------------------------------------------------------------------------- /tools/demos/000108.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000108.jpg -------------------------------------------------------------------------------- /tools/demos/000237.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000237.jpg -------------------------------------------------------------------------------- /tools/demos/000449.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000449.jpg -------------------------------------------------------------------------------- /tools/demos/000611.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000611.jpg -------------------------------------------------------------------------------- /tools/demos/000706.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000706.jpg -------------------------------------------------------------------------------- /tools/demos/000719.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/000719.jpg -------------------------------------------------------------------------------- /tools/demos/004640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/demos/004640.jpg -------------------------------------------------------------------------------- /tools/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import time 10 | import cv2 11 | import argparse 12 | import numpy as np 13 | sys.path.append("../") 14 | 15 | from data.io.image_preprocess import short_side_resize_for_inference_data 16 | from libs.configs import cfgs 17 | from libs.networks import build_whole_network 18 | from libs.box_utils import draw_box_in_img 19 | from help_utils import tools 20 | 21 | 22 | def detect(det_net, inference_save_path, real_test_imgname_list): 23 | 24 | # 1. preprocess img 25 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR 26 | img_batch = tf.cast(img_plac, tf.float32) 27 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, 28 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 29 | length_limitation=cfgs.IMG_MAX_LENGTH) 30 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) 31 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] 32 | 33 | detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( 34 | input_img_batch=img_batch, 35 | gtboxes_batch=None) 36 | 37 | init_op = tf.group( 38 | tf.global_variables_initializer(), 39 | tf.local_variables_initializer() 40 | ) 41 | 42 | restorer, restore_ckpt = det_net.get_restorer() 43 | 44 | config = tf.ConfigProto() 45 | config.gpu_options.allow_growth = True 46 | 47 | with tf.Session(config=config) as sess: 48 | sess.run(init_op) 49 | if not restorer is None: 50 | restorer.restore(sess, restore_ckpt) 51 | print('restore model') 52 | 53 | for i, a_img_name in enumerate(real_test_imgname_list): 54 | 55 | raw_img = cv2.imread(a_img_name) 56 | start = time.time() 57 | resized_img, detected_boxes, detected_scores, detected_categories = \ 58 | sess.run( 59 | [img_batch, detection_boxes, detection_scores, detection_category], 60 | feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB 61 | ) 62 | end = time.time() 63 | # print("{} cost time : {} ".format(img_name, (end - start))) 64 | 65 | show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD 66 | show_scores = detected_scores[show_indices] 67 | show_boxes = detected_boxes[show_indices] 68 | show_categories = detected_categories[show_indices] 69 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(np.squeeze(resized_img, 0), 70 | boxes=show_boxes, 71 | labels=show_categories, 72 | scores=show_scores) 73 | nake_name = a_img_name.split('/')[-1] 74 | # print (inference_save_path + '/' + nake_name) 75 | cv2.imwrite(inference_save_path + '/' + nake_name, 76 | final_detections[:, :, ::-1]) 77 | 78 | tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list)) 79 | 80 | 81 | def inference(test_dir, inference_save_path): 82 | 83 | test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir) 84 | if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))] 85 | assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \ 86 | ' Note that, we only support img format of (.jpg, .png, and .tiff) ' 87 | 88 | faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, 89 | is_training=False) 90 | detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list) 91 | 92 | 93 | def parse_args(): 94 | """ 95 | Parse input arguments 96 | """ 97 | parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir') 98 | parser.add_argument('--data_dir', dest='data_dir', 99 | help='data path', 100 | default='demos', type=str) 101 | parser.add_argument('--save_dir', dest='save_dir', 102 | help='demo imgs to save', 103 | default='inference_results', type=str) 104 | parser.add_argument('--GPU', dest='GPU', 105 | help='gpu id ', 106 | default='0', type=str) 107 | 108 | if len(sys.argv) == 1: 109 | parser.print_help() 110 | sys.exit(1) 111 | 112 | args = parser.parse_args() 113 | 114 | return args 115 | if __name__ == '__main__': 116 | 117 | args = parse_args() 118 | print('Called with args:') 119 | print(args) 120 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU 121 | inference(args.data_dir, 122 | inference_save_path=args.save_dir) 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /tools/inference_results/000058.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000058.jpg -------------------------------------------------------------------------------- /tools/inference_results/000108.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000108.jpg -------------------------------------------------------------------------------- /tools/inference_results/000237.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000237.jpg -------------------------------------------------------------------------------- /tools/inference_results/000449.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000449.jpg -------------------------------------------------------------------------------- /tools/inference_results/000611.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000611.jpg -------------------------------------------------------------------------------- /tools/inference_results/000706.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000706.jpg -------------------------------------------------------------------------------- /tools/inference_results/000719.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/000719.jpg -------------------------------------------------------------------------------- /tools/inference_results/004640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/tools/inference_results/004640.jpg -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import time 10 | import cv2 11 | import argparse 12 | import numpy as np 13 | sys.path.append("../") 14 | 15 | from data.io.image_preprocess import short_side_resize_for_inference_data 16 | from libs.configs import cfgs 17 | from libs.networks import build_whole_network 18 | from libs.box_utils import draw_box_in_img 19 | from help_utils import tools 20 | 21 | 22 | def detect(det_net, inference_save_path, real_test_imgname_list): 23 | 24 | # 1. preprocess img 25 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR 26 | img_batch = tf.cast(img_plac, tf.float32) 27 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, 28 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 29 | length_limitation=cfgs.IMG_MAX_LENGTH) 30 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) 31 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] 32 | 33 | detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( 34 | input_img_batch=img_batch, 35 | gtboxes_batch=None) 36 | 37 | init_op = tf.group( 38 | tf.global_variables_initializer(), 39 | tf.local_variables_initializer() 40 | ) 41 | 42 | restorer, restore_ckpt = det_net.get_restorer() 43 | 44 | config = tf.ConfigProto() 45 | config.gpu_options.allow_growth = True 46 | 47 | with tf.Session(config=config) as sess: 48 | sess.run(init_op) 49 | if not restorer is None: 50 | restorer.restore(sess, restore_ckpt) 51 | print('restore model') 52 | 53 | for i, a_img_name in enumerate(real_test_imgname_list): 54 | 55 | raw_img = cv2.imread(a_img_name) 56 | start = time.time() 57 | resized_img, detected_boxes, detected_scores, detected_categories = \ 58 | sess.run( 59 | [img_batch, detection_boxes, detection_scores, detection_category], 60 | feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB 61 | ) 62 | end = time.time() 63 | # print("{} cost time : {} ".format(img_name, (end - start))) 64 | 65 | raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] 66 | 67 | xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ 68 | detected_boxes[:, 2], detected_boxes[:, 3] 69 | 70 | resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] 71 | 72 | xmin = xmin * raw_w / resized_w 73 | xmax = xmax * raw_w / resized_w 74 | 75 | ymin = ymin * raw_h / resized_h 76 | ymax = ymax * raw_h / resized_h 77 | 78 | detected_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) 79 | 80 | show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD 81 | show_scores = detected_scores[show_indices] 82 | show_boxes = detected_boxes[show_indices] 83 | show_categories = detected_categories[show_indices] 84 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(raw_img - np.array(cfgs.PIXEL_MEAN), 85 | boxes=show_boxes, 86 | labels=show_categories, 87 | scores=show_scores) 88 | nake_name = a_img_name.split('/')[-1] 89 | # print (inference_save_path + '/' + nake_name) 90 | cv2.imwrite(inference_save_path + '/' + nake_name, 91 | final_detections[:, :, ::-1]) 92 | 93 | tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list)) 94 | 95 | 96 | def test(test_dir, inference_save_path): 97 | 98 | test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir) 99 | if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))] 100 | assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \ 101 | ' Note that, we only support img format of (.jpg, .png, and .tiff) ' 102 | 103 | faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, 104 | is_training=False) 105 | detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list) 106 | 107 | 108 | def parse_args(): 109 | """ 110 | Parse input arguments 111 | """ 112 | parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir') 113 | parser.add_argument('--data_dir', dest='data_dir', 114 | help='data path', 115 | default='demos', type=str) 116 | parser.add_argument('--save_dir', dest='save_dir', 117 | help='demo imgs to save', 118 | default='inference_results', type=str) 119 | parser.add_argument('--GPU', dest='GPU', 120 | help='gpu id ', 121 | default='0', type=str) 122 | 123 | if len(sys.argv) == 1: 124 | parser.print_help() 125 | sys.exit(1) 126 | 127 | args = parser.parse_args() 128 | 129 | return args 130 | if __name__ == '__main__': 131 | 132 | args = parse_args() 133 | print('Called with args:') 134 | print(args) 135 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU 136 | test(args.data_dir, 137 | inference_save_path=args.save_dir) 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /voc_2007.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/da7299926a25c293f58e662efb9a6ddcd29cce09/voc_2007.gif --------------------------------------------------------------------------------