├── .gitignore ├── LICENSE ├── README.md ├── comparison.png ├── data ├── __init__.py ├── io │ ├── BDD100K │ │ ├── BDD2VOC.py │ │ └── get_bdd100k_next_batch.py │ ├── COCO │ │ └── get_coco_next_batch.py │ ├── DOAI2019 │ │ ├── __init__.py │ │ └── train_crop.py │ ├── DOTA │ │ ├── get_dota_next_batch.py │ │ ├── train_crop.py │ │ └── val_crop.py │ ├── __init__.py │ ├── convert_data_to_tfrecord.py │ ├── convert_data_to_tfrecord_coco.py │ ├── convert_data_to_tfrecord_voc2012.py │ ├── image_preprocess.py │ ├── image_preprocess_multi_gpu.py │ ├── image_preprocess_multi_gpu_aug.py │ ├── read_tfrecord.py │ ├── read_tfrecord_multi_gpu.py │ └── read_tfrecord_multi_gpu_aug.py ├── lib_coco │ ├── PythonAPI │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── pycocoDemo.ipynb │ │ ├── pycocoEvalDemo.ipynb │ │ ├── pycocotools │ │ │ ├── __init__.py │ │ │ ├── _mask.c │ │ │ ├── _mask.pyx │ │ │ ├── coco.py │ │ │ ├── cocoeval.py │ │ │ └── mask.py │ │ └── setup.py │ ├── __init__.py │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ └── get_coco_next_batch.py └── pretrained_weights │ ├── README.md │ └── mobilenet │ └── README.md ├── help_utils ├── __init__.py └── tools.py ├── images.png ├── libs ├── __init__.py ├── box_utils │ ├── __init__.py │ ├── anchor_utils.py │ ├── boxes_utils.py │ ├── coordinate_convert.py │ ├── cython_utils │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── nms.c │ │ ├── nms.pyx │ │ └── setup.py │ ├── draw_box_in_img.py │ ├── encode_and_decode.py │ ├── iou.py │ ├── nms.py │ ├── show_box_in_tensor.py │ └── tf_ops.py ├── configs │ ├── COCO │ │ ├── __init__.py │ │ ├── cfgs_res50_1x_coco_v1.py │ │ ├── cfgs_res50_1x_coco_v2.py │ │ ├── cfgs_res50_1x_coco_v3.py │ │ └── cfgs_res50_1x_coco_v4.py │ ├── __init__.py │ └── cfgs.py ├── detection_oprations │ ├── __init__.py │ ├── anchor_target_layer_without_boxweight.py │ ├── proposal_opr.py │ ├── proposal_target_layer.py │ ├── proposal_target_layer_cascade.py │ └── proposal_target_layer_cascade_.py ├── export_pbs │ ├── __init__.py │ ├── exportPb.py │ ├── test_TensorRT.py │ └── test_exportPb.py ├── gluon2TF │ ├── .gitignore │ ├── README.md │ ├── mxnet_weights │ │ ├── mxnet_weights_namefile.py │ │ └── readme.txt │ └── resnet │ │ ├── __init__.py │ │ ├── download_mxnet_resnet_weights.py │ │ ├── parse_mxnet_weights.py │ │ ├── resnet.py │ │ ├── resnet_utils.py │ │ ├── resnet_utils_NCHW.py │ │ ├── some_test.py │ │ ├── test_resnet.py │ │ └── weights_map.py ├── label_name_dict │ ├── __init__.py │ ├── coco_dict.py │ ├── label_dict.py │ └── remote_sensing_dict.py ├── losses │ ├── __init__.py │ ├── losses.py │ └── losses_cascade.py ├── networks │ ├── __init__.py │ ├── build_whole_network.py │ ├── build_whole_network_cascade.py │ ├── layer.py │ ├── mobilenet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conv_blocks.py │ │ ├── mobilenet.py │ │ ├── mobilenet_v2.py │ │ └── mobilenet_v2_test.py │ ├── mobilenet_v2.py │ ├── ops.py │ ├── resnet.py │ ├── resnet_gluoncv.py │ └── slim_nets │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── alexnet_test.py │ │ ├── cifarnet.py │ │ ├── inception.py │ │ ├── inception_resnet_v2.py │ │ ├── inception_resnet_v2_test.py │ │ ├── inception_utils.py │ │ ├── inception_v1.py │ │ ├── inception_v1_test.py │ │ ├── inception_v2.py │ │ ├── inception_v2_test.py │ │ ├── inception_v3.py │ │ ├── inception_v3_test.py │ │ ├── inception_v4.py │ │ ├── inception_v4_test.py │ │ ├── lenet.py │ │ ├── mobilenet_v1.md │ │ ├── mobilenet_v1.png │ │ ├── mobilenet_v1.py │ │ ├── mobilenet_v1_test.py │ │ ├── nets_factory.py │ │ ├── nets_factory_test.py │ │ ├── overfeat.py │ │ ├── overfeat_test.py │ │ ├── resnet_utils.py │ │ ├── resnet_v1.py │ │ ├── resnet_v1_test.py │ │ ├── resnet_v2.py │ │ ├── resnet_v2_test.py │ │ ├── vgg.py │ │ └── vgg_test.py ├── setup.py └── val_libs │ ├── __init__.py │ └── voc_eval.py ├── output └── trained_weights │ └── README.md ├── scalars.png └── tools ├── __init__.py ├── cocoval.py ├── demo.py ├── eval.py ├── eval_bdd.py ├── eval_coco.py ├── eval_coco_pyramid.py ├── eval_voc2012.py ├── inference.py ├── inference_for_coco.py ├── multi_gpu_train.py ├── multi_gpu_train_aug.py ├── multi_gpu_train_cascade.py ├── multi_gpu_train_warmup_cosine.py ├── test.py ├── test_coco.py ├── test_coco_pyramid.py ├── test_pyramid_dota.py ├── train.py ├── train_for_coco.py └── train_with_placeholder.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .pyc 104 | .so 105 | *.data-00000-of-00001 106 | *.index 107 | *.meta 108 | events.* 109 | checkpoint 110 | .idea/ 111 | __pycache__/ 112 | *.json 113 | *.zip 114 | 115 | */tools/demos/* 116 | */output/* 117 | */data/pretrained_weights/* 118 | */data/tfrecord/* 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 DetectionTeamUCAS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cascade R-CNN: Delving into High Quality Object Detection 2 | 3 | ## Abstract 4 | This repo is based on [FPN](https://github.com/DetectionTeamUCAS/FPN_Tensorflow), and completed by [YangXue](https://github.com/yangxue0827). 5 | 6 | ## Train on COCO train2017 and test on COCO val2017 (coco minival). 7 | |Model|Backbone|Train Schedule|GPU|Image/GPU|FP16|Box AP(Mask AP)|test stage| 8 | |-----|--------|--------------|---|---------|----|---------------|---| 9 | |Faster (paper)|R50v1-FPN|1X|8X TITAN XP|1|no|38.3|3| 10 | |Faster (ours)|R50v1-FPN|1X|8X 2080 Ti|1|no|38.2|3| 11 | |Faster (Face++)|R50v1-FPN|1X|8X 2080 Ti|2|no|39.1|3| 12 | 13 | ![2](comparison.png) 14 | 15 | ## My Development Environment 16 | 1、python3.5 (anaconda recommend) 17 | 2、cuda9.0 **(If you want to use cuda8, please set CUDA9 = False in the cfgs.py file.)** 18 | 3、[opencv(cv2)](https://pypi.org/project/opencv-python/) 19 | 4、[tfplot](https://github.com/wookayin/tensorflow-plot) 20 | 5、tensorflow == 1.12 21 | 22 | ## Download Model 23 | ### Pretrain weights 24 | 1、Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights. 25 | 2、Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). [Pretrain Model Link](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. 26 | 27 | ### Trained weights 28 | **Select a configuration file in the folder ($PATH_ROOT/libs/configs/) and copy its contents into cfgs.py, then download the corresponding [weights](https://github.com/DetectionTeamUCAS/Models/tree/master/Cascade_FPN_Tensorflow).** 29 | 30 | ## Compile 31 | ``` 32 | cd $PATH_ROOT/libs/box_utils/cython_utils 33 | python setup.py build_ext --inplace 34 | ``` 35 | 36 | ## Train 37 | 38 | 1、If you want to train your own data, please note: 39 | ``` 40 | (1) Modify parameters (such as CLASS_NUM, DATASET_NAME, VERSION, etc.) in $PATH_ROOT/libs/configs/cfgs.py 41 | (2) Add category information in $PATH_ROOT/libs/label_name_dict/lable_dict.py 42 | (3) Add data_name to $PATH_ROOT/data/io/read_tfrecord_multi_gpu.py 43 | ``` 44 | 45 | 2、make tfrecord 46 | ``` 47 | cd $PATH_ROOT/data/io/ 48 | python convert_data_to_tfrecord_coco.py --VOC_dir='/PATH/TO/JSON/FILE/' 49 | --save_name='train' 50 | --dataset='coco' 51 | ``` 52 | 53 | 3、multi-gpu train 54 | ``` 55 | cd $PATH_ROOT/tools 56 | python multi_gpu_train.py 57 | ``` 58 | 59 | ## Eval 60 | ``` 61 | cd $PATH_ROOT/tools 62 | python eval_coco.py --eval_data='/PATH/TO/IMAGES/' 63 | --eval_gt='/PATH/TO/TEST/ANNOTATION/' 64 | --GPU='0' 65 | ``` 66 | 67 | ## Tensorboard 68 | ``` 69 | cd $PATH_ROOT/output/summary 70 | tensorboard --logdir=. 71 | ``` 72 | ![3](images.png) 73 | 74 | ![4](scalars.png) 75 | 76 | ## Reference 77 | 1、https://github.com/endernewton/tf-faster-rcnn 78 | 2、https://github.com/zengarden/light_head_rcnn 79 | 3、https://github.com/tensorflow/models/tree/master/research/object_detection 80 | 4、https://github.com/CharlesShang/FastMaskRCNN 81 | 5、https://github.com/matterport/Mask_RCNN 82 | 6、https://github.com/msracver/Deformable-ConvNets 83 | 7、https://github.com/tensorpack/tensorpack 84 | -------------------------------------------------------------------------------- /comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/comparison.png -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/__init__.py -------------------------------------------------------------------------------- /data/io/BDD100K/BDD2VOC.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import cv2 4 | from xml.dom.minidom import Document 5 | import xml.dom.minidom 6 | 7 | label_map = {'bus': 1, 'traffic light': 2, 'traffic sign': 3, 'person': 4, 'bike': 5, 8 | 'truck': 6, 'motor': 7, 'car': 8, 'train': 9, 'rider': 10} 9 | FLAG = ['train', 'val'] 10 | 11 | 12 | def write_xml(save_path, name, box_list, label_list, w, h, d): 13 | 14 | # dict_box[filename]=json_dict[filename] 15 | doc = xml.dom.minidom.Document() 16 | root = doc.createElement('annotation') 17 | doc.appendChild(root) 18 | 19 | foldername = doc.createElement("folder") 20 | foldername.appendChild(doc.createTextNode("JPEGImages")) 21 | root.appendChild(foldername) 22 | 23 | nodeFilename = doc.createElement('filename') 24 | nodeFilename.appendChild(doc.createTextNode(name)) 25 | root.appendChild(nodeFilename) 26 | 27 | pathname = doc.createElement("path") 28 | pathname.appendChild(doc.createTextNode("xxxx")) 29 | root.appendChild(pathname) 30 | 31 | sourcename=doc.createElement("source") 32 | 33 | databasename = doc.createElement("database") 34 | databasename.appendChild(doc.createTextNode("Unknown")) 35 | sourcename.appendChild(databasename) 36 | 37 | annotationname = doc.createElement("annotation") 38 | annotationname.appendChild(doc.createTextNode("xxx")) 39 | sourcename.appendChild(annotationname) 40 | 41 | imagename = doc.createElement("image") 42 | imagename.appendChild(doc.createTextNode("xxx")) 43 | sourcename.appendChild(imagename) 44 | 45 | flickridname = doc.createElement("flickrid") 46 | flickridname.appendChild(doc.createTextNode("0")) 47 | sourcename.appendChild(flickridname) 48 | 49 | root.appendChild(sourcename) 50 | 51 | nodesize = doc.createElement('size') 52 | nodewidth = doc.createElement('width') 53 | nodewidth.appendChild(doc.createTextNode(str(w))) 54 | nodesize.appendChild(nodewidth) 55 | nodeheight = doc.createElement('height') 56 | nodeheight.appendChild(doc.createTextNode(str(h))) 57 | nodesize.appendChild(nodeheight) 58 | nodedepth = doc.createElement('depth') 59 | nodedepth.appendChild(doc.createTextNode(str(d))) 60 | nodesize.appendChild(nodedepth) 61 | root.appendChild(nodesize) 62 | 63 | segname = doc.createElement("segmented") 64 | segname.appendChild(doc.createTextNode("0")) 65 | root.appendChild(segname) 66 | 67 | for (box, label) in zip(box_list, label_list): 68 | 69 | nodeobject = doc.createElement('object') 70 | nodename = doc.createElement('name') 71 | nodename.appendChild(doc.createTextNode(str(label))) 72 | nodeobject.appendChild(nodename) 73 | nodebndbox = doc.createElement('bndbox') 74 | nodex1 = doc.createElement('x1') 75 | nodex1.appendChild(doc.createTextNode(str(box[0]))) 76 | nodebndbox.appendChild(nodex1) 77 | nodey1 = doc.createElement('y1') 78 | nodey1.appendChild(doc.createTextNode(str(box[1]))) 79 | nodebndbox.appendChild(nodey1) 80 | nodex2 = doc.createElement('x2') 81 | nodex2.appendChild(doc.createTextNode(str(box[2]))) 82 | nodebndbox.appendChild(nodex2) 83 | nodey2 = doc.createElement('y2') 84 | nodey2.appendChild(doc.createTextNode(str(box[3]))) 85 | nodebndbox.appendChild(nodey2) 86 | 87 | nodeobject.appendChild(nodebndbox) 88 | root.appendChild(nodeobject) 89 | fp = open(save_path, 'w') 90 | doc.writexml(fp, indent='\n') 91 | fp.close() 92 | 93 | 94 | for flag in FLAG: 95 | BDD_path = '/unsullied/sharefs/_research_detection/GeneralDetection/BDD100K/bdd100k/' 96 | BDD_labels_dir = os.path.join(BDD_path, 'labels/bdd100k_labels_images_{}.json'.format(flag)) 97 | BDD_labels = json.load(open(BDD_labels_dir, 'r')) 98 | BDD_images_dir = os.path.join(BDD_path, 'images/100k/{}'.format(flag)) 99 | 100 | for cnt, bdd in enumerate(BDD_labels): 101 | img_name = bdd['name'] 102 | img_path = os.path.join(BDD_images_dir, img_name) 103 | # img = cv2.imread(img_path) 104 | # h, w, d = img.shape 105 | h, w, d = 720, 1280, 3 106 | bdd_boxes = bdd['labels'] 107 | box_list, label_list = [], [] 108 | for bb in bdd_boxes: 109 | if bb['category'] not in label_map.keys(): 110 | continue 111 | box = bb['box2d'] 112 | box_list.append([round(box['x1']), round(box['y1']), 113 | round(box['x2']), round(box['y2'])]) 114 | label_list.append(bb['category']) 115 | 116 | if len(box_list) != 0: 117 | save_path = os.path.join('/unsullied/sharefs/yangxue/isilon/yangxue/data/BDD100K/BDD100K_VOC/bdd100k_{}/Annotations'.format(flag), 118 | img_name.replace('.jpg', '.xml')) 119 | write_xml(save_path, img_name, box_list, label_list, w, h, d) 120 | if cnt % 100 == 0: 121 | print('{} process: {}/{}'.format(flag, cnt+1, len(BDD_labels))) 122 | print('Finish!') 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /data/io/BDD100K/get_bdd100k_next_batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import xml.etree.cElementTree as ET 6 | import cv2 7 | import numpy as np 8 | import os 9 | from libs.label_name_dict import coco_dict 10 | from libs.label_name_dict.label_dict import * 11 | 12 | 13 | root_path = '/unsullied/sharefs/yangxue/isilon/yangxue/data/BDD100K/BDD100K_VOC/bdd100k_train/' 14 | xmls = os.listdir(os.path.join(root_path, 'Annotations')) 15 | total_imgs = len(xmls) 16 | 17 | # print (NAME_LABEL_DICT) 18 | 19 | 20 | def read_xml_gtbox_and_label(xml_path): 21 | """ 22 | :param xml_path: the path of voc xml 23 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5], 24 | and has [xmin, ymin, xmax, ymax, label] in a per row 25 | """ 26 | 27 | tree = ET.parse(xml_path) 28 | root = tree.getroot() 29 | img_width = None 30 | img_height = None 31 | box_list = [] 32 | for child_of_root in root: 33 | # if child_of_root.tag == 'filename': 34 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ 35 | # + FLAGS.img_format, 'xml_name and img_name cannot match' 36 | 37 | if child_of_root.tag == 'size': 38 | for child_item in child_of_root: 39 | if child_item.tag == 'width': 40 | img_width = int(child_item.text) 41 | if child_item.tag == 'height': 42 | img_height = int(child_item.text) 43 | 44 | if child_of_root.tag == 'object': 45 | label = None 46 | for child_item in child_of_root: 47 | if child_item.tag == 'name': 48 | label = NAME_LABEL_MAP[child_item.text] 49 | if child_item.tag == 'bndbox': 50 | tmp_box = [] 51 | for node in child_item: 52 | tmp_box.append(int(node.text)) 53 | assert label is not None, 'label is none, error' 54 | tmp_box.append(label) 55 | box_list.append(tmp_box) 56 | 57 | gtbox_label = np.array(box_list, dtype=np.int32) 58 | 59 | return img_height, img_width, gtbox_label 60 | 61 | 62 | def next_img(step): 63 | 64 | if step % total_imgs == 0: 65 | np.random.shuffle(xmls) 66 | xml_name = xmls[step % total_imgs] 67 | img_name = xml_name.replace('.xml', '.jpg') 68 | 69 | img = cv2.imread(os.path.join(root_path, 'train', img_name)) 70 | 71 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(os.path.join(root_path, 'Annotations', xml_name)) 72 | 73 | gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32) 74 | if gtbox_and_label_list.shape[0] == 0: 75 | return next_img(step+1) 76 | else: 77 | return img_name, img[:, :, ::-1], gtbox_and_label_list 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | imgid, img, gtbox = next_img(3234) 83 | 84 | print("::") 85 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores 86 | 87 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1], 88 | scores=np.ones(shape=(len(gtbox), ))) 89 | print ("_----") 90 | 91 | 92 | cv2.imshow("test", img) 93 | cv2.waitKey(0) 94 | 95 | 96 | -------------------------------------------------------------------------------- /data/io/COCO/get_coco_next_batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import xml.etree.cElementTree as ET 6 | import cv2 7 | import numpy as np 8 | import json 9 | import os 10 | from libs.label_name_dict import coco_dict 11 | from libs.label_name_dict.label_dict import * 12 | 13 | 14 | coco_trainvalmini = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/odformat/coco_trainvalmini.odgt' 15 | 16 | 17 | def next_img(step): 18 | with open(coco_trainvalmini) as f: 19 | files = f.readlines() 20 | 21 | total_imgs = len(files) 22 | if step % total_imgs == 0: 23 | np.random.shuffle(files) 24 | 25 | raw_line = files[step % total_imgs] 26 | file = json.loads(raw_line) 27 | img_name = file['ID'] 28 | # img_height, img_width = file['height'], file['width'] 29 | 30 | img = cv2.imread(file['fpath']) 31 | 32 | gtboxes = file['gtboxes'] 33 | 34 | gtbox_label = [] 35 | for gt in gtboxes: 36 | box = gt['box'] 37 | label = gt['tag'] 38 | gtbox_label.append([box[0], box[1], box[0]+box[2], box[1]+box[3], NAME_LABEL_MAP[label]]) 39 | 40 | gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32) 41 | if gtbox_and_label_list.shape[0] == 0: 42 | return next_img(step+1) 43 | else: 44 | return img_name, img[:, :, ::-1], gtbox_and_label_list 45 | 46 | 47 | if __name__ == '__main__': 48 | 49 | imgid, img, gtbox = next_img(3234) 50 | 51 | print("::") 52 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores 53 | 54 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1], 55 | scores=np.ones(shape=(len(gtbox), ))) 56 | print("_----") 57 | 58 | cv2.imwrite("test.jpg", img) 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /data/io/DOAI2019/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/io/DOAI2019/__init__.py -------------------------------------------------------------------------------- /data/io/DOTA/get_dota_next_batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import xml.etree.cElementTree as ET 6 | import cv2 7 | import numpy as np 8 | import os 9 | from libs.label_name_dict import coco_dict 10 | from libs.label_name_dict.label_dict import * 11 | 12 | 13 | root_path = '/unsullied/sharefs/yangxue/isilon/yangxue/data/DOTA/DOTA_TOTAL/' 14 | xmls = os.listdir(os.path.join(root_path, 'xml_h')) 15 | total_imgs = len(xmls) 16 | 17 | # print (NAME_LABEL_DICT) 18 | 19 | 20 | def read_xml_gtbox_and_label(xml_path): 21 | """ 22 | :param xml_path: the path of voc xml 23 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5], 24 | and has [xmin, ymin, xmax, ymax, label] in a per row 25 | """ 26 | 27 | tree = ET.parse(xml_path) 28 | root = tree.getroot() 29 | img_width = None 30 | img_height = None 31 | box_list = [] 32 | for child_of_root in root: 33 | # if child_of_root.tag == 'filename': 34 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ 35 | # + FLAGS.img_format, 'xml_name and img_name cannot match' 36 | 37 | if child_of_root.tag == 'size': 38 | for child_item in child_of_root: 39 | if child_item.tag == 'width': 40 | img_width = int(child_item.text) 41 | if child_item.tag == 'height': 42 | img_height = int(child_item.text) 43 | 44 | if child_of_root.tag == 'object': 45 | label = None 46 | for child_item in child_of_root: 47 | if child_item.tag == 'name': 48 | label = NAME_LABEL_MAP[child_item.text] 49 | if child_item.tag == 'bndbox': 50 | tmp_box = [] 51 | for node in child_item: 52 | tmp_box.append(int(node.text)) 53 | assert label is not None, 'label is none, error' 54 | tmp_box.append(label) 55 | box_list.append(tmp_box) 56 | 57 | gtbox_label = np.array(box_list, dtype=np.int32) 58 | 59 | return img_height, img_width, gtbox_label 60 | 61 | 62 | def next_img(step): 63 | 64 | if step % total_imgs == 0: 65 | np.random.shuffle(xmls) 66 | xml_name = xmls[step % total_imgs] 67 | img_name = xml_name.replace('.xml', '.jpg') 68 | 69 | img = cv2.imread(os.path.join(root_path, 'img', img_name)) 70 | 71 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(os.path.join(root_path, 'xml_h', xml_name)) 72 | 73 | gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32) 74 | if gtbox_and_label_list.shape[0] == 0: 75 | return next_img(step+1) 76 | else: 77 | return img_name, img[:, :, ::-1], gtbox_and_label_list 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | imgid, img, gtbox = next_img(3234) 83 | 84 | print("::") 85 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores 86 | 87 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1], 88 | scores=np.ones(shape=(len(gtbox), ))) 89 | print ("_----") 90 | 91 | 92 | cv2.imshow("test", img) 93 | cv2.waitKey(0) 94 | 95 | 96 | -------------------------------------------------------------------------------- /data/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/io/__init__.py -------------------------------------------------------------------------------- /data/io/convert_data_to_tfrecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import xml.etree.cElementTree as ET 6 | import numpy as np 7 | import tensorflow as tf 8 | import glob 9 | import cv2 10 | from libs.label_name_dict.label_dict import * 11 | from help_utils.tools import * 12 | 13 | tf.app.flags.DEFINE_string('VOC_dir', '/data/DOTA/DOTA_TOTAL/', 'Voc dir') 14 | tf.app.flags.DEFINE_string('xml_dir', 'xml', 'xml dir') 15 | tf.app.flags.DEFINE_string('image_dir', 'img', 'image dir') 16 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name') 17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name') 18 | tf.app.flags.DEFINE_string('img_format', '.png', 'format of image') 19 | tf.app.flags.DEFINE_string('dataset', 'DOAI2019', 'dataset') 20 | FLAGS = tf.app.flags.FLAGS 21 | 22 | 23 | def _int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | 26 | 27 | def _bytes_feature(value): 28 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 29 | 30 | 31 | def read_xml_gtbox_and_label(xml_path): 32 | """ 33 | :param xml_path: the path of voc xml 34 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5], 35 | and has [xmin, ymin, xmax, ymax, label] in a per row 36 | """ 37 | 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | img_width = None 41 | img_height = None 42 | box_list = [] 43 | for child_of_root in root: 44 | # if child_of_root.tag == 'filename': 45 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ 46 | # + FLAGS.img_format, 'xml_name and img_name cannot match' 47 | 48 | if child_of_root.tag == 'size': 49 | for child_item in child_of_root: 50 | if child_item.tag == 'width': 51 | img_width = int(child_item.text) 52 | if child_item.tag == 'height': 53 | img_height = int(child_item.text) 54 | 55 | if child_of_root.tag == 'object': 56 | label = None 57 | for child_item in child_of_root: 58 | if child_item.tag == 'name': 59 | label = NAME_LABEL_MAP[child_item.text] 60 | if child_item.tag == 'bndbox': 61 | tmp_box = [] 62 | for node in child_item: 63 | tmp_box.append(int(node.text)) 64 | assert label is not None, 'label is none, error' 65 | tmp_box.append(label) 66 | box_list.append(tmp_box) 67 | 68 | gtbox_label = np.array(box_list, dtype=np.int32) 69 | 70 | return img_height, img_width, gtbox_label 71 | 72 | 73 | def convert_pascal_to_tfrecord(): 74 | xml_path = FLAGS.VOC_dir + FLAGS.xml_dir 75 | image_path = FLAGS.VOC_dir + FLAGS.image_dir 76 | save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' 77 | mkdir(FLAGS.save_dir) 78 | 79 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 80 | # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) 81 | writer = tf.python_io.TFRecordWriter(path=save_path) 82 | for count, xml in enumerate(glob.glob(xml_path + '/*.xml')): 83 | # to avoid path error in different development platform 84 | xml = xml.replace('\\', '/') 85 | 86 | img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format 87 | img_path = image_path + '/' + img_name 88 | 89 | if not os.path.exists(img_path): 90 | print('{} is not exist!'.format(img_path)) 91 | continue 92 | 93 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml) 94 | 95 | # img = np.array(Image.open(img_path)) 96 | img = cv2.imread(img_path)[:, :, ::-1] 97 | 98 | feature = tf.train.Features(feature={ 99 | # do not need encode() in linux 100 | 'img_name': _bytes_feature(img_name.encode()), 101 | # 'img_name': _bytes_feature(img_name), 102 | 'img_height': _int64_feature(img_height), 103 | 'img_width': _int64_feature(img_width), 104 | 'img': _bytes_feature(img.tostring()), 105 | 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 106 | 'num_objects': _int64_feature(gtbox_label.shape[0]) 107 | }) 108 | 109 | example = tf.train.Example(features=feature) 110 | 111 | writer.write(example.SerializeToString()) 112 | 113 | view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml'))) 114 | 115 | print('\nConversion is complete!') 116 | 117 | 118 | if __name__ == '__main__': 119 | # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml' 120 | # read_xml_gtbox_and_label(xml_path) 121 | 122 | convert_pascal_to_tfrecord() 123 | -------------------------------------------------------------------------------- /data/io/convert_data_to_tfrecord_coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import xml.etree.cElementTree as ET 6 | import numpy as np 7 | import tensorflow as tf 8 | import glob 9 | import cv2 10 | import json 11 | from libs.label_name_dict.label_dict import * 12 | from help_utils.tools import * 13 | 14 | tf.app.flags.DEFINE_string('coco_dir', '/data/COCO/coco_trainvalmini.odgt', 'coco dir') 15 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name') 16 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name') 17 | tf.app.flags.DEFINE_string('dataset', 'coco', 'dataset') 18 | FLAGS = tf.app.flags.FLAGS 19 | 20 | 21 | def _int64_feature(value): 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 23 | 24 | 25 | def _bytes_feature(value): 26 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 27 | 28 | 29 | def convert_pascal_to_tfrecord(coco_trainvalmini): 30 | save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' 31 | mkdir(FLAGS.save_dir) 32 | 33 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 34 | # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) 35 | writer = tf.python_io.TFRecordWriter(path=save_path) 36 | 37 | with open(coco_trainvalmini) as f: 38 | files = f.readlines() 39 | 40 | img_count = 0 41 | gt_count = 0 42 | 43 | for count, raw_line in enumerate(files): 44 | file = json.loads(raw_line) 45 | img_path = os.path.join('/data/COCO/train2017', file['fpath'].split('_')[-1]) 46 | img_name = file['ID'] 47 | 48 | if not os.path.exists(img_path): 49 | # print('{} is not exist!'.format(img_path)) 50 | img_count += 1 51 | continue 52 | # img = np.array(Image.open(img_path)) 53 | img = cv2.imread(img_path)[:, :, ::-1] 54 | 55 | if img is None: 56 | continue 57 | 58 | gtboxes = file['gtboxes'] 59 | img_height = file['height'] 60 | img_width = file['width'] 61 | 62 | if len(gtboxes) == 0: 63 | # print('{}: gt is not exist!'.format(img_path)) 64 | gt_count += 1 65 | continue 66 | 67 | gtbox_label = [] 68 | for gt in gtboxes: 69 | box = gt['box'] 70 | label = gt['tag'] 71 | gtbox_label.append([box[0], box[1], box[0]+box[2], box[1]+box[3], NAME_LABEL_MAP[label]]) 72 | 73 | gtbox_label = np.array(gtbox_label, np.int32) 74 | 75 | feature = tf.train.Features(feature={ 76 | # do not need encode() in linux 77 | 'img_name': _bytes_feature(img_name.encode()), 78 | # 'img_name': _bytes_feature(img_name), 79 | 'img_height': _int64_feature(img_height), 80 | 'img_width': _int64_feature(img_width), 81 | 'img': _bytes_feature(img.tostring()), 82 | 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 83 | 'num_objects': _int64_feature(gtbox_label.shape[0]) 84 | }) 85 | 86 | example = tf.train.Example(features=feature) 87 | 88 | writer.write(example.SerializeToString()) 89 | 90 | view_bar('Conversion progress', count + 1, len(files)) 91 | 92 | print('{} images not exist!'.format(img_count)) 93 | print('{} gts not exist!'.format(gt_count)) 94 | print('\nConversion is complete!') 95 | 96 | 97 | if __name__ == '__main__': 98 | # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml' 99 | # read_xml_gtbox_and_label(xml_path) 100 | 101 | # coco_path = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/odformat/coco_trainvalmini.odgt' 102 | # convert_pascal_to_tfrecord(coco_path) 103 | convert_pascal_to_tfrecord(FLAGS.coco_dir) 104 | -------------------------------------------------------------------------------- /data/io/convert_data_to_tfrecord_voc2012.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import xml.etree.cElementTree as ET 6 | import numpy as np 7 | import tensorflow as tf 8 | import glob 9 | import cv2 10 | from libs.label_name_dict.label_dict import * 11 | from help_utils.tools import * 12 | 13 | tf.app.flags.DEFINE_string('VOC_dir', '/unsullied/sharefs/yangxue/isilon/yangxue/data/VOC2012/VOCdevkit/VOC2012/', 'Voc dir') 14 | tf.app.flags.DEFINE_string('xml_dir', 'Annotations', 'xml dir') 15 | tf.app.flags.DEFINE_string('image_dir', 'JPEGImages', 'image dir') 16 | tf.app.flags.DEFINE_string('save_name', 'train2012', 'save name') 17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name') 18 | tf.app.flags.DEFINE_string('img_format', '.jpg', 'format of image') 19 | tf.app.flags.DEFINE_string('dataset', 'pascal', 'dataset') 20 | FLAGS = tf.app.flags.FLAGS 21 | 22 | 23 | def _int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | 26 | 27 | def _bytes_feature(value): 28 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 29 | 30 | 31 | def read_xml_gtbox_and_label(xml_path): 32 | """ 33 | :param xml_path: the path of voc xml 34 | :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5], 35 | and has [xmin, ymin, xmax, ymax, label] in a per row 36 | """ 37 | 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | img_width = None 41 | img_height = None 42 | box_list = [] 43 | for child_of_root in root: 44 | # if child_of_root.tag == 'filename': 45 | # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ 46 | # + FLAGS.img_format, 'xml_name and img_name cannot match' 47 | 48 | if child_of_root.tag == 'size': 49 | for child_item in child_of_root: 50 | if child_item.tag == 'width': 51 | img_width = int(child_item.text) 52 | if child_item.tag == 'height': 53 | img_height = int(child_item.text) 54 | 55 | if child_of_root.tag == 'object': 56 | label = None 57 | for child_item in child_of_root: 58 | if child_item.tag == 'name': 59 | label = NAME_LABEL_MAP[child_item.text] 60 | if child_item.tag == 'bndbox': 61 | tmp_box = [0, 0, 0, 0] 62 | for node in child_item: 63 | if node.tag == 'xmin': 64 | tmp_box[0] = int(node.text) 65 | if node.tag == 'ymin': 66 | tmp_box[1] = int(node.text) 67 | if node.tag == 'xmax': 68 | tmp_box[2] = int(node.text) 69 | if node.tag == 'ymax': 70 | tmp_box[3] = int(node.text) 71 | assert label is not None, 'label is none, error' 72 | tmp_box.append(label) 73 | box_list.append(tmp_box) 74 | 75 | gtbox_label = np.array(box_list, dtype=np.int32) 76 | 77 | return img_height, img_width, gtbox_label 78 | 79 | 80 | def convert_pascal_to_tfrecord(): 81 | xml_path = FLAGS.VOC_dir + FLAGS.xml_dir 82 | image_path = FLAGS.VOC_dir + FLAGS.image_dir 83 | save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' 84 | mkdir(FLAGS.save_dir) 85 | 86 | # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 87 | # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) 88 | writer = tf.python_io.TFRecordWriter(path=save_path) 89 | 90 | fr = open('/unsullied/sharefs/yangxue/isilon/yangxue/data/VOC2012/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt', 'r') 91 | lines = fr.readlines() 92 | 93 | real_cnt = 0 94 | 95 | for count, xml in enumerate(glob.glob(xml_path + '/*.xml')): 96 | # to avoid path error in different development platform 97 | xml = xml.replace('\\', '/') 98 | 99 | tmp = xml.split('/')[-1].split('.')[0] + "\n" 100 | if tmp not in lines: 101 | continue 102 | 103 | img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format 104 | img_path = image_path + '/' + img_name 105 | 106 | if not os.path.exists(img_path): 107 | print('{} is not exist!'.format(img_path)) 108 | continue 109 | 110 | img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml) 111 | 112 | # img = np.array(Image.open(img_path)) 113 | img = cv2.imread(img_path)[:, :, ::-1] 114 | 115 | feature = tf.train.Features(feature={ 116 | # do not need encode() in linux 117 | 'img_name': _bytes_feature(img_name.encode()), 118 | # 'img_name': _bytes_feature(img_name), 119 | 'img_height': _int64_feature(img_height), 120 | 'img_width': _int64_feature(img_width), 121 | 'img': _bytes_feature(img.tostring()), 122 | 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 123 | 'num_objects': _int64_feature(gtbox_label.shape[0]) 124 | }) 125 | 126 | example = tf.train.Example(features=feature) 127 | 128 | writer.write(example.SerializeToString()) 129 | real_cnt += 1 130 | 131 | view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml'))) 132 | 133 | print('\nConversion is complete! {} images.'.format(real_cnt)) 134 | 135 | 136 | if __name__ == '__main__': 137 | # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml' 138 | # read_xml_gtbox_and_label(xml_path) 139 | 140 | convert_pascal_to_tfrecord() 141 | -------------------------------------------------------------------------------- /data/io/image_preprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import tensorflow as tf 8 | 9 | import numpy as np 10 | 11 | 12 | def max_length_limitation(length, length_limitation): 13 | return tf.cond(tf.less(length, length_limitation), 14 | true_fn=lambda: length, 15 | false_fn=lambda: length_limitation) 16 | 17 | 18 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200): 19 | ''' 20 | 21 | :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5]. gtboxes: [xmin, ymin, xmax, ymax] 22 | :param target_shortside_len: 23 | :param length_limitation: set max length to avoid OUT OF MEMORY 24 | :return: 25 | ''' 26 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 27 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 28 | true_fn=lambda: (target_shortside_len, 29 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 30 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 31 | target_shortside_len)) 32 | 33 | img_tensor = tf.expand_dims(img_tensor, axis=0) 34 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 35 | 36 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 37 | 38 | new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h 39 | new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h 40 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 41 | 42 | return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0)) 43 | 44 | 45 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True): 46 | if is_resize: 47 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 48 | 49 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 50 | true_fn=lambda: (target_shortside_len, 51 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 52 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 53 | target_shortside_len)) 54 | 55 | img_tensor = tf.expand_dims(img_tensor, axis=0) 56 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 57 | 58 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 59 | return img_tensor 60 | 61 | 62 | def flip_left_to_right(img_tensor, gtboxes_and_label): 63 | 64 | h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 65 | 66 | img_tensor = tf.image.flip_left_right(img_tensor) 67 | 68 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 69 | new_xmax = w - xmin 70 | new_xmin = w - xmax 71 | 72 | return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0)) 73 | 74 | 75 | def random_flip_left_right(img_tensor, gtboxes_and_label): 76 | img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5), 77 | lambda: flip_left_to_right(img_tensor, gtboxes_and_label), 78 | lambda: (img_tensor, gtboxes_and_label)) 79 | 80 | return img_tensor, gtboxes_and_label 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /data/io/image_preprocess_multi_gpu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import tensorflow as tf 8 | 9 | import numpy as np 10 | 11 | 12 | def max_length_limitation(length, length_limitation): 13 | return tf.cond(tf.less(length, length_limitation), 14 | true_fn=lambda: length, 15 | false_fn=lambda: length_limitation) 16 | 17 | 18 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200): 19 | ''' 20 | 21 | :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5]. gtboxes: [xmin, ymin, xmax, ymax] 22 | :param target_shortside_len: 23 | :param length_limitation: set max length to avoid OUT OF MEMORY 24 | :return: 25 | ''' 26 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 27 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 28 | true_fn=lambda: (target_shortside_len, 29 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 30 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 31 | target_shortside_len)) 32 | 33 | img_tensor = tf.expand_dims(img_tensor, axis=0) 34 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 35 | 36 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 37 | 38 | new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h 39 | new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h 40 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 41 | 42 | return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0)), new_h, new_w 43 | 44 | 45 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True): 46 | if is_resize: 47 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 48 | 49 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 50 | true_fn=lambda: (target_shortside_len, 51 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 52 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 53 | target_shortside_len)) 54 | 55 | img_tensor = tf.expand_dims(img_tensor, axis=0) 56 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 57 | 58 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 59 | return img_tensor 60 | 61 | 62 | def flip_left_to_right(img_tensor, gtboxes_and_label): 63 | 64 | h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 65 | 66 | img_tensor = tf.image.flip_left_right(img_tensor) 67 | 68 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 69 | new_xmax = w - xmin 70 | new_xmin = w - xmax 71 | 72 | return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0)) 73 | 74 | 75 | def random_flip_left_right(img_tensor, gtboxes_and_label): 76 | img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5), 77 | lambda: flip_left_to_right(img_tensor, gtboxes_and_label), 78 | lambda: (img_tensor, gtboxes_and_label)) 79 | 80 | return img_tensor, gtboxes_and_label 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /data/io/read_tfrecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | import os 10 | from data.io import image_preprocess 11 | from libs.configs import cfgs 12 | 13 | def read_single_example_and_decode(filename_queue): 14 | 15 | # tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) 16 | 17 | # reader = tf.TFRecordReader(options=tfrecord_options) 18 | reader = tf.TFRecordReader() 19 | _, serialized_example = reader.read(filename_queue) 20 | 21 | features = tf.parse_single_example( 22 | serialized=serialized_example, 23 | features={ 24 | 'img_name': tf.FixedLenFeature([], tf.string), 25 | 'img_height': tf.FixedLenFeature([], tf.int64), 26 | 'img_width': tf.FixedLenFeature([], tf.int64), 27 | 'img': tf.FixedLenFeature([], tf.string), 28 | 'gtboxes_and_label': tf.FixedLenFeature([], tf.string), 29 | 'num_objects': tf.FixedLenFeature([], tf.int64) 30 | } 31 | ) 32 | img_name = features['img_name'] 33 | img_height = tf.cast(features['img_height'], tf.int32) 34 | img_width = tf.cast(features['img_width'], tf.int32) 35 | img = tf.decode_raw(features['img'], tf.uint8) 36 | 37 | img = tf.reshape(img, shape=[img_height, img_width, 3]) 38 | 39 | gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32) 40 | gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5]) 41 | 42 | num_objects = tf.cast(features['num_objects'], tf.int32) 43 | return img_name, img, gtboxes_and_label, num_objects 44 | 45 | 46 | def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): 47 | 48 | img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue) 49 | 50 | img = tf.cast(img, tf.float32) 51 | 52 | if is_training: 53 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, 54 | target_shortside_len=shortside_len, 55 | length_limitation=cfgs.IMG_MAX_LENGTH) 56 | img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, 57 | gtboxes_and_label=gtboxes_and_label) 58 | 59 | else: 60 | img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, 61 | target_shortside_len=shortside_len, 62 | length_limitation=cfgs.IMG_MAX_LENGTH) 63 | if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: 64 | img = img / 255 - tf.constant([[cfgs.PIXEL_MEAN_]]) 65 | else: 66 | img = img - tf.constant([[cfgs.PIXEL_MEAN]]) # sub pixel mean at last 67 | return img_name, img, gtboxes_and_label, num_objects 68 | 69 | 70 | def next_batch(dataset_name, batch_size, shortside_len, is_training): 71 | ''' 72 | :return: 73 | img_name_batch: shape(1, 1) 74 | img_batch: shape:(1, new_imgH, new_imgW, C) 75 | gtboxes_and_label_batch: shape(1, Num_Of_objects, 5] .each row is [x1, y1, x2, y2, label] 76 | ''' 77 | assert batch_size == 1, "we only support batch_size is 1.We may support large batch_size in the future" 78 | 79 | if dataset_name not in ['ship', 'spacenet', 'pascal', 'coco', 'bdd100k', 'DOTA', 'DOTA_H']: 80 | raise ValueError('dataSet name must be in pascal, coco spacenet and ship') 81 | 82 | if is_training: 83 | pattern = os.path.join('../data/tfrecord', dataset_name + '_train*') 84 | else: 85 | pattern = os.path.join('../data/tfrecord', dataset_name + '_test*') 86 | 87 | print('tfrecord path is -->', os.path.abspath(pattern)) 88 | 89 | filename_tensorlist = tf.train.match_filenames_once(pattern) 90 | 91 | filename_queue = tf.train.string_input_producer(filename_tensorlist) 92 | 93 | # shortside_len = tf.constant(shortside_len) 94 | # shortside_len = tf.random_shuffle(shortside_len)[0] 95 | 96 | img_name, img, gtboxes_and_label, num_obs = read_and_prepocess_single_img(filename_queue, shortside_len, 97 | is_training=is_training) 98 | img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch = \ 99 | tf.train.batch( 100 | [img_name, img, gtboxes_and_label, num_obs], 101 | batch_size=batch_size, 102 | capacity=1, 103 | num_threads=1, 104 | dynamic_pad=True) 105 | return img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch 106 | 107 | 108 | if __name__ == '__main__': 109 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 110 | img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ 111 | next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' 112 | batch_size=cfgs.BATCH_SIZE, 113 | shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 114 | is_training=True) 115 | gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) 116 | 117 | init_op = tf.group( 118 | tf.global_variables_initializer(), 119 | tf.local_variables_initializer() 120 | ) 121 | 122 | config = tf.ConfigProto() 123 | config.gpu_options.allow_growth = True 124 | 125 | with tf.Session(config=config) as sess: 126 | sess.run(init_op) 127 | 128 | coord = tf.train.Coordinator() 129 | threads = tf.train.start_queue_runners(sess, coord) 130 | 131 | img_name_batch_, img_batch_, gtboxes_and_label_batch_, num_objects_batch_ \ 132 | = sess.run([img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch]) 133 | 134 | print(img_name_batch_) 135 | 136 | print('debug') 137 | 138 | coord.request_stop() 139 | coord.join(threads) 140 | -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/lib_coco/PythonAPI/__init__.py -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocoEvalDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from pycocotools.coco import COCO\n", 14 | "from pycocotools.cocoeval import COCOeval\n", 15 | "import numpy as np\n", 16 | "import skimage.io as io\n", 17 | "import pylab\n", 18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Running demo for *bbox* results.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "annType = ['segm','bbox','keypoints']\n", 38 | "annType = annType[1] #specify type here\n", 39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n", 40 | "print 'Running demo for *%s* results.'%(annType)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "loading annotations into memory...\n", 55 | "Done (t=8.01s)\n", 56 | "creating index...\n", 57 | "index created!\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "#initialize COCO ground truth api\n", 63 | "dataDir='../'\n", 64 | "dataType='val2014'\n", 65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n", 66 | "cocoGt=COCO(annFile)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Loading and preparing results... \n", 81 | "DONE (t=0.05s)\n", 82 | "creating index...\n", 83 | "index created!\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "#initialize COCO detections api\n", 89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n", 90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n", 91 | "cocoDt=cocoGt.loadRes(resFile)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "imgIds=sorted(cocoGt.getImgIds())\n", 103 | "imgIds=imgIds[0:100]\n", 104 | "imgId = imgIds[np.random.randint(100)]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Running per image evaluation... \n", 119 | "DONE (t=0.46s).\n", 120 | "Accumulating evaluation results... \n", 121 | "DONE (t=0.38s).\n", 122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n", 123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n", 124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n", 125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n", 126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n", 127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n", 128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n", 129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n", 130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n", 131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n", 132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n", 133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# running evaluation\n", 139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n", 140 | "cocoEval.params.imgIds = imgIds\n", 141 | "cocoEval.evaluate()\n", 142 | "cocoEval.accumulate()\n", 143 | "cocoEval.summarize()" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 2", 150 | "language": "python", 151 | "name": "python2" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "2.7.10" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } 169 | -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /data/lib_coco/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy as np 3 | 4 | # To compile and install locally run "python setup.py build_ext --inplace" 5 | # To install library to Python site-packages run "python setup.py build_ext install" 6 | 7 | ext_modules = [ 8 | Extension( 9 | 'pycocotools._mask', 10 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 11 | include_dirs = [np.get_include(), '../common'], 12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 13 | ) 14 | ] 15 | 16 | setup( 17 | name='pycocotools', 18 | packages=['pycocotools'], 19 | package_dir = {'pycocotools': 'pycocotools'}, 20 | install_requires=[ 21 | 'setuptools>=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='2.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /data/lib_coco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/lib_coco/__init__.py -------------------------------------------------------------------------------- /data/lib_coco/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /data/lib_coco/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /data/lib_coco/get_coco_next_batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import sys, os 6 | # sys.path.insert(0, os.path.abspath('.')) 7 | sys.path.insert(0, './PythonAPI/') 8 | # sys.path.insert(0, os.path.abspath('data')) 9 | for _ in sys.path: 10 | print (_) 11 | from PythonAPI.pycocotools.coco import COCO 12 | import cv2 13 | import numpy as np 14 | import os 15 | from libs.label_name_dict import coco_dict 16 | 17 | 18 | annotation_path = '/home/yjr/DataSet/COCO/2017/annotations/instances_train2017.json' 19 | print ("load coco .... it will cost about 17s..") 20 | coco = COCO(annotation_path) 21 | 22 | imgId_list = coco.getImgIds() 23 | imgId_list = np.array(imgId_list) 24 | 25 | total_imgs = len(imgId_list) 26 | 27 | # print (NAME_LABEL_DICT) 28 | 29 | 30 | def next_img(step): 31 | 32 | if step % total_imgs == 0: 33 | np.random.shuffle(imgId_list) 34 | imgid = imgId_list[step % total_imgs] 35 | 36 | imgname = coco.loadImgs(ids=[imgid])[0]['file_name'] 37 | # print (type(imgname), imgname) 38 | img = cv2.imread(os.path.join("/home/yjr/DataSet/COCO/2017/train2017", imgname)) 39 | 40 | annotation = coco.imgToAnns[imgid] 41 | gtbox_and_label_list = [] 42 | for ann in annotation: 43 | box = ann['bbox'] 44 | 45 | box = [box[0], box[1], box[0]+box[2], box[1]+box[3]] # [xmin, ymin, xmax, ymax] 46 | cat_id = ann['category_id'] 47 | cat_name = coco_dict.originID_classes[cat_id] #ID_NAME_DICT[cat_id] 48 | label = coco_dict.NAME_LABEL_MAP[cat_name] 49 | gtbox_and_label_list.append(box + [label]) 50 | gtbox_and_label_list = np.array(gtbox_and_label_list, dtype=np.int32) 51 | # print (img.shape, gtbox_and_label_list.shape) 52 | if gtbox_and_label_list.shape[0] == 0: 53 | return next_img(step+1) 54 | else: 55 | return imgid, img[:, :, ::-1], gtbox_and_label_list 56 | 57 | 58 | if __name__ == '__main__': 59 | 60 | imgid, img, gtbox = next_img(3234) 61 | 62 | print("::") 63 | from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores 64 | 65 | img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1], 66 | scores=np.ones(shape=(len(gtbox), ))) 67 | print ("_----") 68 | 69 | 70 | cv2.imshow("test", img) 71 | cv2.waitKey(0) 72 | 73 | 74 | -------------------------------------------------------------------------------- /data/pretrained_weights/README.md: -------------------------------------------------------------------------------- 1 | 1、Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights. 2 | 2、Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). [Pretrain Model Link](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. 3 | -------------------------------------------------------------------------------- /data/pretrained_weights/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | Please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to data/pretrained_weights/mobilenet. -------------------------------------------------------------------------------- /help_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/help_utils/__init__.py -------------------------------------------------------------------------------- /help_utils/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import math 4 | import sys 5 | import os 6 | 7 | 8 | def view_bar(message, num, total): 9 | rate = num / total 10 | rate_num = int(rate * 40) 11 | rate_nums = math.ceil(rate * 100) 12 | r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,) 13 | sys.stdout.write(r) 14 | sys.stdout.flush() 15 | 16 | 17 | def mkdir(path): 18 | if not os.path.exists(path): 19 | os.makedirs(path) -------------------------------------------------------------------------------- /images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/images.png -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/box_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/anchor_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | import tensorflow as tf 5 | from libs.configs import cfgs 6 | 7 | 8 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios, 9 | featuremap_height, featuremap_width, 10 | stride, name='make_anchors'): 11 | ''' 12 | :param base_anchor_size:256 13 | :param anchor_scales: 14 | :param anchor_ratios: 15 | :param featuremap_height: 16 | :param featuremap_width: 17 | :param stride: 18 | :return: 19 | ''' 20 | with tf.variable_scope(name): 21 | base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32) # [x_center, y_center, w, h] 22 | 23 | ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales), 24 | anchor_ratios) # per locations ws and hs 25 | 26 | # featuremap_height = tf.Print(featuremap_height, 27 | # [featuremap_height, featuremap_width], summarize=10, 28 | # message=name+"_SHAPE***") 29 | 30 | x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride 31 | y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride 32 | 33 | if cfgs.USE_CENTER_OFFSET: 34 | x_centers = x_centers + stride/2. 35 | y_centers = y_centers + stride/2. 36 | 37 | x_centers, y_centers = tf.meshgrid(x_centers, y_centers) 38 | 39 | ws, x_centers = tf.meshgrid(ws, x_centers) 40 | hs, y_centers = tf.meshgrid(hs, y_centers) 41 | 42 | anchor_centers = tf.stack([x_centers, y_centers], 2) 43 | anchor_centers = tf.reshape(anchor_centers, [-1, 2]) 44 | 45 | box_sizes = tf.stack([ws, hs], axis=2) 46 | box_sizes = tf.reshape(box_sizes, [-1, 2]) 47 | # anchors = tf.concat([anchor_centers, box_sizes], axis=1) 48 | anchors = tf.concat([anchor_centers - 0.5*box_sizes, 49 | anchor_centers + 0.5*box_sizes], axis=1) 50 | return anchors 51 | 52 | 53 | def enum_scales(base_anchor, anchor_scales): 54 | 55 | anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1)) 56 | 57 | return anchor_scales 58 | 59 | 60 | def enum_ratios(anchors, anchor_ratios): 61 | ''' 62 | ratio = h /w 63 | :param anchors: 64 | :param anchor_ratios: 65 | :return: 66 | ''' 67 | ws = anchors[:, 2] # for base anchor: w == h 68 | hs = anchors[:, 3] 69 | sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios)) 70 | 71 | ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1]) 72 | hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1]) 73 | 74 | return ws, hs 75 | 76 | 77 | if __name__ == '__main__': 78 | import os 79 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 80 | base_anchor_size = 256 81 | anchor_scales = [1.0] 82 | anchor_ratios = [0.5, 2.0, 1.0] 83 | anchors = make_anchors(base_anchor_size=base_anchor_size, anchor_ratios=anchor_ratios, 84 | anchor_scales=anchor_scales, 85 | featuremap_width=32, 86 | featuremap_height=63, 87 | stride=16) 88 | init = tf.global_variables_initializer() 89 | with tf.Session() as sess: 90 | sess.run(init) 91 | anchor_result = sess.run(anchors) 92 | print (anchor_result.shape) 93 | -------------------------------------------------------------------------------- /libs/box_utils/coordinate_convert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import cv2 8 | import numpy as np 9 | 10 | 11 | def forward_convert(coordinate, with_label=True): 12 | """ 13 | :param coordinate: format [x_c, y_c, w, h, theta] 14 | :return: format [x1, y1, x2, y2, x3, y3, x4, y4] 15 | """ 16 | boxes = [] 17 | if with_label: 18 | for rect in coordinate: 19 | box = cv2.boxPoints(((rect[0], rect[1]), (rect[2], rect[3]), rect[4])) 20 | box = np.reshape(box, [-1, ]) 21 | boxes.append([box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7], rect[5]]) 22 | else: 23 | for rect in coordinate: 24 | box = cv2.boxPoints(((rect[0], rect[1]), (rect[2], rect[3]), rect[4])) 25 | box = np.reshape(box, [-1, ]) 26 | boxes.append([box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7]]) 27 | 28 | return np.array(boxes, dtype=np.float32) 29 | 30 | 31 | def back_forward_convert(coordinate, with_label=True): 32 | """ 33 | :param coordinate: format [x1, y1, x2, y2, x3, y3, x4, y4, (label)] 34 | :param with_label: default True 35 | :return: format [x_c, y_c, w, h, theta, (label)] 36 | """ 37 | 38 | boxes = [] 39 | if with_label: 40 | for rect in coordinate: 41 | box = np.int0(rect[:-1]) 42 | box = box.reshape([4, 2]) 43 | rect1 = cv2.minAreaRect(box) 44 | 45 | x, y, w, h, theta = rect1[0][0], rect1[0][1], rect1[1][0], rect1[1][1], rect1[2] 46 | boxes.append([x, y, w, h, theta, rect[-1]]) 47 | 48 | else: 49 | for rect in coordinate: 50 | box = np.int0(rect) 51 | box = box.reshape([4, 2]) 52 | rect1 = cv2.minAreaRect(box) 53 | 54 | x, y, w, h, theta = rect1[0][0], rect1[0][1], rect1[1][0], rect1[1][1], rect1[2] 55 | boxes.append([x, y, w, h, theta]) 56 | 57 | return np.array(boxes, dtype=np.float32) 58 | 59 | 60 | if __name__ == '__main__': 61 | coord = np.array([[150, 150, 50, 100, -90, 1], 62 | [150, 150, 100, 50, -90, 1], 63 | [150, 150, 50, 100, -45, 1], 64 | [150, 150, 100, 50, -45, 1]]) 65 | 66 | coord1 = np.array([[150, 150, 100, 50, 0], 67 | [150, 150, 100, 50, -90], 68 | [150, 150, 100, 50, 45], 69 | [150, 150, 100, 50, -45]]) 70 | 71 | coord2 = forward_convert(coord) 72 | # coord3 = forward_convert(coord1, mode=-1) 73 | print(coord2) 74 | # print(coord3-coord2) 75 | # coord_label = np.array([[167., 203., 96., 132., 132., 96., 203., 167., 1.]]) 76 | # 77 | # coord4 = back_forward_convert(coord_label, mode=1) 78 | # coord5 = back_forward_convert(coord_label) 79 | 80 | # print(coord4) 81 | # print(coord5) 82 | 83 | # coord3 = coordinate_present_convert(coord, -1) 84 | # print(coord3) 85 | # coord4 = coordinate_present_convert(coord3, mode=1) 86 | # print(coord4) -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf */*.pyc 6 | rm -rf */*.so 7 | -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/box_utils/cython_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.items(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | # run the customize_compiler 102 | class custom_build_ext(build_ext): 103 | def build_extensions(self): 104 | customize_compiler_for_nvcc(self.compiler) 105 | build_ext.build_extensions(self) 106 | 107 | ext_modules = [ 108 | Extension( 109 | "cython_bbox", 110 | ["bbox.pyx"], 111 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 112 | include_dirs = [numpy_include] 113 | ), 114 | Extension( 115 | "cython_nms", 116 | ["nms.pyx"], 117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 118 | include_dirs = [numpy_include] 119 | ) 120 | # Extension( 121 | # "cpu_nms", 122 | # ["cpu_nms.pyx"], 123 | # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | # include_dirs = [numpy_include] 125 | # ) 126 | ] 127 | 128 | setup( 129 | name='tf_faster_rcnn', 130 | ext_modules=ext_modules, 131 | # inject our custom trigger 132 | cmdclass={'build_ext': custom_build_ext}, 133 | ) 134 | -------------------------------------------------------------------------------- /libs/box_utils/iou.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | 12 | def iou_calculate(boxes_1, boxes_2): 13 | 14 | with tf.name_scope('iou_caculate'): 15 | xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3] 16 | 17 | xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3] 18 | 19 | max_xmin = tf.maximum(xmin_1, xmin_2) 20 | min_xmax = tf.minimum(xmax_1, xmax_2) 21 | 22 | max_ymin = tf.maximum(ymin_1, ymin_2) 23 | min_ymax = tf.minimum(ymax_1, ymax_2) 24 | 25 | overlap_h = tf.maximum(0., min_ymax - max_ymin) # avoid h < 0 26 | overlap_w = tf.maximum(0., min_xmax - max_xmin) 27 | 28 | overlaps = overlap_h * overlap_w 29 | 30 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 31 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 32 | 33 | iou = overlaps / (area_1 + area_2 - overlaps) 34 | 35 | return iou 36 | 37 | 38 | def iou_calculate1(boxes_1, boxes_2): 39 | xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3] 40 | 41 | xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3] 42 | 43 | max_xmin = np.maximum(xmin_1, xmin_2) 44 | min_xmax = np.minimum(xmax_1, xmax_2) 45 | 46 | max_ymin = np.maximum(ymin_1, ymin_2) 47 | min_ymax = np.minimum(ymax_1, ymax_2) 48 | 49 | overlap_h = np.maximum(0., min_ymax - max_ymin) # avoid h < 0 50 | overlap_w = np.maximum(0., min_xmax - max_xmin) 51 | 52 | overlaps = overlap_h * overlap_w 53 | 54 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 55 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 56 | 57 | iou = overlaps / (area_1 + area_2 - overlaps) 58 | 59 | return iou 60 | 61 | 62 | if __name__ == '__main__': 63 | import os 64 | os.environ["CUDA_VISIBLE_DEVICES"] = '13' 65 | boxes1 = np.array([[50, 50, 100, 300], 66 | [60, 60, 100, 200]], np.float32) 67 | 68 | boxes2 = np.array([[50, 50, 100, 300], 69 | [200, 200, 100, 200]], np.float32) 70 | 71 | print(iou_calculate1(boxes1, boxes2)) 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /libs/box_utils/nms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import numpy as np 8 | 9 | 10 | def py_cpu_nms(dets, thresh, max_output_size): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | keep = [] 21 | while order.size > 0: 22 | if len(keep) >= max_output_size: 23 | break 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1 + 1) 32 | h = np.maximum(0.0, yy2 - yy1 + 1) 33 | inter = w * h 34 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return np.array(keep, np.int64) 39 | -------------------------------------------------------------------------------- /libs/box_utils/show_box_in_tensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | import cv2 10 | from libs.label_name_dict.label_dict import LABEl_NAME_MAP 11 | 12 | from libs.configs import cfgs 13 | 14 | from libs.box_utils import draw_box_in_img 15 | 16 | def only_draw_boxes(img_batch, boxes): 17 | 18 | boxes = tf.stop_gradient(boxes) 19 | img_tensor = tf.squeeze(img_batch, 0) 20 | img_tensor = tf.cast(img_tensor, tf.float32) 21 | labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES 22 | scores = tf.zeros_like(labels, dtype=tf.float32) 23 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 24 | inp=[img_tensor, boxes, labels, scores], 25 | Tout=tf.uint8) 26 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) # [batch_size, h, w, c] 27 | 28 | return img_tensor_with_boxes 29 | 30 | def draw_boxes_with_scores(img_batch, boxes, scores): 31 | 32 | boxes = tf.stop_gradient(boxes) 33 | scores = tf.stop_gradient(scores) 34 | 35 | img_tensor = tf.squeeze(img_batch, 0) 36 | img_tensor = tf.cast(img_tensor, tf.float32) 37 | labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES 38 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 39 | inp=[img_tensor, boxes, labels, scores], 40 | Tout=[tf.uint8]) 41 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 42 | return img_tensor_with_boxes 43 | 44 | def draw_boxes_with_categories(img_batch, boxes, labels): 45 | boxes = tf.stop_gradient(boxes) 46 | 47 | img_tensor = tf.squeeze(img_batch, 0) 48 | img_tensor = tf.cast(img_tensor, tf.float32) 49 | scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32) 50 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 51 | inp=[img_tensor, boxes, labels, scores], 52 | Tout=[tf.uint8]) 53 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 54 | return img_tensor_with_boxes 55 | 56 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores): 57 | boxes = tf.stop_gradient(boxes) 58 | scores = tf.stop_gradient(scores) 59 | 60 | img_tensor = tf.squeeze(img_batch, 0) 61 | img_tensor = tf.cast(img_tensor, tf.float32) 62 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 63 | inp=[img_tensor, boxes, labels, scores], 64 | Tout=[tf.uint8]) 65 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 66 | return img_tensor_with_boxes 67 | 68 | if __name__ == "__main__": 69 | print (1) 70 | 71 | -------------------------------------------------------------------------------- /libs/box_utils/tf_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import tensorflow as tf 6 | 7 | ''' 8 | all of these ops are derived from tenosrflow Object Detection API 9 | ''' 10 | def indices_to_dense_vector(indices, 11 | size, 12 | indices_value=1., 13 | default_value=0, 14 | dtype=tf.float32): 15 | """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros. 16 | 17 | This function exists because it is unclear if it is safe to use 18 | tf.sparse_to_dense(indices, [size], 1, validate_indices=False) 19 | with indices which are not ordered. 20 | This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) 21 | 22 | Args: 23 | indices: 1d Tensor with integer indices which are to be set to 24 | indices_values. 25 | size: scalar with size (integer) of output Tensor. 26 | indices_value: values of elements specified by indices in the output vector 27 | default_value: values of other elements in the output vector. 28 | dtype: data type. 29 | 30 | Returns: 31 | dense 1D Tensor of shape [size] with indices set to indices_values and the 32 | rest set to default_value. 33 | """ 34 | size = tf.to_int32(size) 35 | zeros = tf.ones([size], dtype=dtype) * default_value 36 | values = tf.ones_like(indices, dtype=dtype) * indices_value 37 | 38 | return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], 39 | [zeros, values]) 40 | 41 | 42 | 43 | 44 | def test_plt(): 45 | from PIL import Image 46 | import matplotlib.pyplot as plt 47 | import numpy as np 48 | 49 | a = np.random.rand(20, 30) 50 | print (a.shape) 51 | # plt.subplot() 52 | b = plt.imshow(a) 53 | plt.show() 54 | 55 | 56 | if __name__ == '__main__': 57 | test_plt() 58 | -------------------------------------------------------------------------------- /libs/configs/COCO/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/configs/COCO/__init__.py -------------------------------------------------------------------------------- /libs/configs/COCO/cfgs_res50_1x_coco_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | ''' 6 | gluoncv backbone + multi_gpu 7 | 8 | ''' 9 | 10 | # ------------------------------------------------ 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190416_v1' 12 | NET_NAME = 'resnet50_v1d' 13 | ADD_BOX_IN_TENSORBOARD = True 14 | 15 | # ---------------------------------------- System_config 16 | ROOT_PATH = os.path.abspath('../') 17 | print(20*"++--") 18 | print(ROOT_PATH) 19 | GPU_GROUP = "0,1,2,3,4,5,6,7" 20 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 21 | SHOW_TRAIN_INFO_INTE = 10 22 | SMRY_ITER = 200 23 | SAVE_WEIGHTS_INTE = 80000 24 | 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise NotImplementedError 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | IS_FILTER_OUTSIDE_BOXES = False 45 | FIXED_BLOCKS = 0 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | CUDA9 = True 49 | EVAL_THRESHOLD = 0.5 50 | 51 | RPN_LOCATION_LOSS_WEIGHT = 1. 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 53 | 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 56 | RPN_SIGMA = 3.0 57 | FASTRCNN_SIGMA = 1.0 58 | 59 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 60 | GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip 61 | 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | BATCH_SIZE = 1 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE) 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE] # 50000, 70000 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE 69 | 70 | # -------------------------------------------- Data_preprocess_config 71 | DATASET_NAME = 'coco' # 'pascal', 'coco' 72 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 74 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 75 | IMG_SHORT_SIDE_LEN = 800 76 | IMG_MAX_LENGTH = 1333 77 | CLASS_NUM = 80 78 | 79 | 80 | # --------------------------------------------- Network_config 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 84 | IS_ASSIGN = True 85 | 86 | # ---------------------------------------------Anchor config 87 | USE_CENTER_OFFSET = True 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6'] 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64] 91 | ANCHOR_SCALES = [1.0] 92 | ANCHOR_RATIOS = [0.5, 1., 2.0] 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [10., 10., 5.0, 5.0], [10., 10., 5.0, 5.0]] 94 | ANCHOR_SCALE_FACTORS = None # [10., 10., 5.0, 5.0] 95 | 96 | # --------------------------------------------FPN config 97 | SHARE_HEADS = True 98 | KERNEL_SIZE = 3 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 101 | TRAIN_RPN_CLOOBER_POSITIVES = False 102 | 103 | RPN_MINIBATCH_SIZE = 256 104 | RPN_POSITIVE_RATE = 0.5 105 | RPN_NMS_IOU_THRESHOLD = 0.7 106 | RPN_TOP_K_NMS_TRAIN = 12000 107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 108 | 109 | RPN_TOP_K_NMS_TEST = 6000 110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000 111 | 112 | # -------------------------------------------Fast-RCNN config 113 | ROI_SIZE = 14 114 | ROI_POOL_KERNEL_SIZE = 2 115 | USE_DROPOUT = False 116 | KEEP_PROB = 1.0 117 | SHOW_SCORE_THRSHOLD = 0.6 # only show in tensorboard 118 | 119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5 # 0.6 120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 123 | FAST_RCNN_MINIBATCH_SIZE = 512 # if is -1, that is train with OHEM 124 | FAST_RCNN_POSITIVE_RATE = 0.25 125 | 126 | ADD_GTBOXES_TO_TRAIN = False 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /libs/configs/COCO/cfgs_res50_1x_coco_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | ''' 6 | gluoncv backbone + multi_gpu 7 | 8 | ''' 9 | 10 | # ------------------------------------------------ 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190420_v2' 12 | NET_NAME = 'resnet50_v1d' 13 | ADD_BOX_IN_TENSORBOARD = True 14 | 15 | # ---------------------------------------- System_config 16 | ROOT_PATH = os.path.abspath('../') 17 | print(20*"++--") 18 | print(ROOT_PATH) 19 | GPU_GROUP = "0,1,2,3,4,5,6,7" 20 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 21 | SHOW_TRAIN_INFO_INTE = 10 22 | SMRY_ITER = 200 23 | SAVE_WEIGHTS_INTE = 80000 24 | 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise NotImplementedError 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | IS_FILTER_OUTSIDE_BOXES = False 45 | FIXED_BLOCKS = 0 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | CUDA9 = True 49 | EVAL_THRESHOLD = 0.5 50 | 51 | RPN_LOCATION_LOSS_WEIGHT = 1. 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 53 | 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 56 | RPN_SIGMA = 3.0 57 | FASTRCNN_SIGMA = 1.0 58 | 59 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 60 | GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip 61 | 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | BATCH_SIZE = 1 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE) 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE] # 50000, 70000 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE 69 | 70 | # -------------------------------------------- Data_preprocess_config 71 | DATASET_NAME = 'coco' # 'pascal', 'coco' 72 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 74 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 75 | IMG_SHORT_SIDE_LEN = 800 76 | IMG_MAX_LENGTH = 1333 77 | CLASS_NUM = 80 78 | 79 | 80 | # --------------------------------------------- Network_config 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 84 | IS_ASSIGN = True 85 | 86 | # ---------------------------------------------Anchor config 87 | USE_CENTER_OFFSET = True 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6'] 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64] 91 | ANCHOR_SCALES = [1.0] 92 | ANCHOR_RATIOS = [0.5, 1., 2.0] 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]] 94 | ANCHOR_SCALE_FACTORS = None # [10., 10., 5.0, 5.0] 95 | 96 | # --------------------------------------------FPN config 97 | SHARE_HEADS = True 98 | KERNEL_SIZE = 3 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 101 | TRAIN_RPN_CLOOBER_POSITIVES = False 102 | 103 | RPN_MINIBATCH_SIZE = 256 104 | RPN_POSITIVE_RATE = 0.5 105 | RPN_NMS_IOU_THRESHOLD = 0.7 106 | RPN_TOP_K_NMS_TRAIN = 12000 107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 108 | 109 | RPN_TOP_K_NMS_TEST = 6000 110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000 111 | 112 | # -------------------------------------------Fast-RCNN config 113 | ROI_SIZE = 14 114 | ROI_POOL_KERNEL_SIZE = 2 115 | USE_DROPOUT = False 116 | KEEP_PROB = 1.0 117 | SHOW_SCORE_THRSHOLD = 0.6 # only show in tensorboard 118 | 119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5 # 0.6 120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 123 | FAST_RCNN_MINIBATCH_SIZE = 512 # if is -1, that is train with OHEM 124 | FAST_RCNN_POSITIVE_RATE = 0.25 125 | 126 | ADD_GTBOXES_TO_TRAIN = False 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /libs/configs/COCO/cfgs_res50_1x_coco_v3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | ''' 6 | gluoncv backbone + multi_gpu 7 | 8 | ''' 9 | 10 | # ------------------------------------------------ 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190421_v3' 12 | NET_NAME = 'resnet50_v1d' 13 | ADD_BOX_IN_TENSORBOARD = True 14 | 15 | # ---------------------------------------- System_config 16 | ROOT_PATH = os.path.abspath('../') 17 | print(20*"++--") 18 | print(ROOT_PATH) 19 | GPU_GROUP = "0,1,2,3,4,5,6,7" 20 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 21 | SHOW_TRAIN_INFO_INTE = 20 22 | SMRY_ITER = 200 23 | SAVE_WEIGHTS_INTE = 80000 24 | 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise NotImplementedError 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | IS_FILTER_OUTSIDE_BOXES = False 45 | FIXED_BLOCKS = 0 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | CUDA9 = True 49 | EVAL_THRESHOLD = 0.5 50 | 51 | RPN_LOCATION_LOSS_WEIGHT = 1. 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 53 | 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 56 | RPN_SIGMA = 3.0 57 | FASTRCNN_SIGMA = 1.0 58 | 59 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 60 | GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip 61 | 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | BATCH_SIZE = 1 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE) 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE] # 50000, 70000 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE 69 | 70 | # -------------------------------------------- Data_preprocess_config 71 | DATASET_NAME = 'coco' # 'pascal', 'coco' 72 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 74 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 75 | IMG_SHORT_SIDE_LEN = 800 76 | IMG_MAX_LENGTH = 1333 77 | CLASS_NUM = 80 78 | 79 | 80 | # --------------------------------------------- Network_config 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 84 | IS_ASSIGN = True 85 | 86 | # ---------------------------------------------Anchor config 87 | USE_CENTER_OFFSET = True 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6'] 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64] 91 | ANCHOR_SCALES = [1.0] 92 | ANCHOR_RATIOS = [0.5, 1., 2.0] 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [40., 40., 20.0, 20.0]] 94 | ANCHOR_SCALE_FACTORS = [10., 10., 5.0, 5.0] 95 | 96 | # --------------------------------------------FPN config 97 | SHARE_HEADS = True 98 | KERNEL_SIZE = 3 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 101 | TRAIN_RPN_CLOOBER_POSITIVES = False 102 | 103 | RPN_MINIBATCH_SIZE = 256 104 | RPN_POSITIVE_RATE = 0.5 105 | RPN_NMS_IOU_THRESHOLD = 0.7 106 | RPN_TOP_K_NMS_TRAIN = 12000 107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 108 | 109 | RPN_TOP_K_NMS_TEST = 6000 110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000 111 | 112 | # -------------------------------------------Fast-RCNN config 113 | ROI_SIZE = 14 114 | ROI_POOL_KERNEL_SIZE = 2 115 | USE_DROPOUT = False 116 | KEEP_PROB = 1.0 117 | SHOW_SCORE_THRSHOLD = 0.6 # only show in tensorboard 118 | 119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5 # 0.6 120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 123 | FAST_RCNN_MINIBATCH_SIZE = 512 # if is -1, that is train with OHEM 124 | FAST_RCNN_POSITIVE_RATE = 0.25 125 | 126 | ADD_GTBOXES_TO_TRAIN = False 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /libs/configs/COCO/cfgs_res50_1x_coco_v4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | ''' 6 | gluoncv backbone + multi_gpu 7 | 8 | ''' 9 | 10 | # ------------------------------------------------ 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190425_v4' 12 | NET_NAME = 'resnet50_v1d' 13 | ADD_BOX_IN_TENSORBOARD = True 14 | 15 | # ---------------------------------------- System_config 16 | ROOT_PATH = os.path.abspath('../') 17 | print(20*"++--") 18 | print(ROOT_PATH) 19 | GPU_GROUP = "0,1,2,3,4,5,6,7" 20 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 21 | SHOW_TRAIN_INFO_INTE = 10 22 | SMRY_ITER = 200 23 | SAVE_WEIGHTS_INTE = 80000 24 | 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise NotImplementedError 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | IS_FILTER_OUTSIDE_BOXES = False 45 | FIXED_BLOCKS = 0 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | CUDA9 = True 49 | EVAL_THRESHOLD = 0.5 50 | 51 | RPN_LOCATION_LOSS_WEIGHT = 1. 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 53 | 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 56 | RPN_SIGMA = 3.0 57 | FASTRCNN_SIGMA = 1.0 58 | 59 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 60 | GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip 61 | 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | BATCH_SIZE = 1 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE) 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE] # 50000, 70000 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE 69 | 70 | # -------------------------------------------- Data_preprocess_config 71 | DATASET_NAME = 'coco' # 'pascal', 'coco' 72 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 74 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 75 | IMG_SHORT_SIDE_LEN = 800 76 | IMG_MAX_LENGTH = 1333 77 | CLASS_NUM = 80 78 | 79 | 80 | # --------------------------------------------- Network_config 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 84 | IS_ASSIGN = True 85 | 86 | # ---------------------------------------------Anchor config 87 | USE_CENTER_OFFSET = True 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6'] 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64] 91 | ANCHOR_SCALES = [1.0] 92 | ANCHOR_RATIOS = [0.5, 1., 2.0] 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]] 94 | ANCHOR_SCALE_FACTORS = None # [10., 10., 5.0, 5.0] 95 | 96 | # --------------------------------------------FPN config 97 | SHARE_HEADS = True 98 | KERNEL_SIZE = 3 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 101 | TRAIN_RPN_CLOOBER_POSITIVES = False 102 | 103 | RPN_MINIBATCH_SIZE = 256 104 | RPN_POSITIVE_RATE = 0.5 105 | RPN_NMS_IOU_THRESHOLD = 0.7 106 | RPN_TOP_K_NMS_TRAIN = 12000 107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 108 | 109 | RPN_TOP_K_NMS_TEST = 6000 110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000 111 | 112 | # -------------------------------------------Fast-RCNN config 113 | ROI_SIZE = 14 114 | ROI_POOL_KERNEL_SIZE = 2 115 | USE_DROPOUT = False 116 | KEEP_PROB = 1.0 117 | SHOW_SCORE_THRSHOLD = 0.6 # only show in tensorboard 118 | 119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5 # 0.6 120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 123 | FAST_RCNN_MINIBATCH_SIZE = 512 # if is -1, that is train with OHEM 124 | FAST_RCNN_POSITIVE_RATE = 0.25 125 | 126 | ADD_GTBOXES_TO_TRAIN = False 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /libs/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/configs/__init__.py -------------------------------------------------------------------------------- /libs/configs/cfgs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | 6 | # ------------------------------------------------ 7 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190420_v2' 8 | NET_NAME = 'resnet_v1_50' 9 | ADD_BOX_IN_TENSORBOARD = True 10 | 11 | # ---------------------------------------- System_config 12 | ROOT_PATH = os.path.abspath('../') 13 | print(20*"++--") 14 | print(ROOT_PATH) 15 | GPU_GROUP = "0,1,2,3,4,5,6,7" 16 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 17 | SHOW_TRAIN_INFO_INTE = 10 18 | SMRY_ITER = 200 19 | SAVE_WEIGHTS_INTE = 80000 20 | 21 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 22 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 23 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image' 24 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results' 25 | 26 | if NET_NAME.startswith("resnet"): 27 | weights_name = NET_NAME 28 | elif NET_NAME.startswith("MobilenetV2"): 29 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 30 | else: 31 | raise NotImplementedError 32 | 33 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 34 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 35 | 36 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 37 | 38 | # ------------------------------------------ Train config 39 | RESTORE_FROM_RPN = False 40 | IS_FILTER_OUTSIDE_BOXES = False 41 | FIXED_BLOCKS = 0 # allow 0~3 42 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 43 | USE_07_METRIC = True 44 | CUDA9 = True 45 | EVAL_THRESHOLD = 0.5 46 | 47 | RPN_LOCATION_LOSS_WEIGHT = 1. 48 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0 49 | 50 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0 51 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0 52 | RPN_SIGMA = 3.0 53 | FASTRCNN_SIGMA = 1.0 54 | 55 | MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy 56 | GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip 57 | 58 | EPSILON = 1e-5 59 | MOMENTUM = 0.9 60 | BATCH_SIZE = 1 61 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE) 62 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE 63 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE] # 50000, 70000 64 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE 65 | 66 | # -------------------------------------------- Data_preprocess_config 67 | DATASET_NAME = 'coco' # 'pascal', 'coco' 68 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 70 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 71 | IMG_SHORT_SIDE_LEN = 800 72 | IMG_MAX_LENGTH = 1333 73 | CLASS_NUM = 80 74 | 75 | 76 | # --------------------------------------------- Network_config 77 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01) 78 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001) 79 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001 80 | IS_ASSIGN = True 81 | 82 | # ---------------------------------------------Anchor config 83 | USE_CENTER_OFFSET = True 84 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6'] 85 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 86 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64] 87 | ANCHOR_SCALES = [1.0] 88 | ANCHOR_RATIOS = [0.5, 1., 2.0] 89 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]] 90 | ANCHOR_SCALE_FACTORS = None # [10., 10., 5.0, 5.0] 91 | 92 | # --------------------------------------------FPN config 93 | SHARE_HEADS = True 94 | KERNEL_SIZE = 3 95 | RPN_IOU_POSITIVE_THRESHOLD = 0.7 96 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3 97 | TRAIN_RPN_CLOOBER_POSITIVES = False 98 | 99 | RPN_MINIBATCH_SIZE = 256 100 | RPN_POSITIVE_RATE = 0.5 101 | RPN_NMS_IOU_THRESHOLD = 0.7 102 | RPN_TOP_K_NMS_TRAIN = 12000 103 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000 104 | 105 | RPN_TOP_K_NMS_TEST = 6000 106 | RPN_MAXIMUM_PROPOSAL_TEST = 1000 107 | 108 | # -------------------------------------------Fast-RCNN config 109 | ROI_SIZE = 14 110 | ROI_POOL_KERNEL_SIZE = 2 111 | USE_DROPOUT = False 112 | KEEP_PROB = 1.0 113 | SHOW_SCORE_THRSHOLD = 0.6 # only show in tensorboard 114 | 115 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5 # 0.6 116 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100 117 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5 118 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0 # 0.1 < IOU < 0.5 is negative 119 | FAST_RCNN_MINIBATCH_SIZE = 512 # if is -1, that is train with OHEM 120 | FAST_RCNN_POSITIVE_RATE = 0.25 121 | 122 | ADD_GTBOXES_TO_TRAIN = False 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /libs/detection_oprations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/detection_oprations/__init__.py -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from libs.configs import cfgs 13 | import numpy as np 14 | import numpy.random as npr 15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 16 | from libs.box_utils import encode_and_decode 17 | 18 | 19 | def anchor_target_layer( 20 | gt_boxes, img_shape, all_anchors, is_restrict_bg=False): 21 | """Same as the anchor target layer in original Fast/er RCNN """ 22 | 23 | total_anchors = all_anchors.shape[0] 24 | img_h, img_w = img_shape[1], img_shape[2] 25 | gt_boxes = gt_boxes[:, :-1] # remove class label 26 | 27 | # allow boxes to sit over the edge by a small amount 28 | _allowed_border = 0 29 | 30 | # only keep anchors inside the image 31 | if cfgs.IS_FILTER_OUTSIDE_BOXES: 32 | inds_inside = np.where( 33 | (all_anchors[:, 0] >= -_allowed_border) & 34 | (all_anchors[:, 1] >= -_allowed_border) & 35 | (all_anchors[:, 2] < img_w + _allowed_border) & # width 36 | (all_anchors[:, 3] < img_h + _allowed_border) # height 37 | )[0] 38 | else: 39 | inds_inside = range(all_anchors.shape[0]) 40 | 41 | anchors = all_anchors[inds_inside, :] 42 | 43 | # label: 1 is positive, 0 is negative, -1 is dont care 44 | labels = np.empty((len(inds_inside),), dtype=np.float32) 45 | labels.fill(-1) 46 | 47 | # overlaps between the anchors and the gt boxes 48 | overlaps = bbox_overlaps( 49 | np.ascontiguousarray(anchors, dtype=np.float), 50 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 51 | 52 | argmax_overlaps = overlaps.argmax(axis=1) 53 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 54 | gt_argmax_overlaps = overlaps.argmax(axis=0) 55 | gt_max_overlaps = overlaps[ 56 | gt_argmax_overlaps, np.arange(overlaps.shape[1])] 57 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 58 | 59 | if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: 60 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 61 | 62 | labels[gt_argmax_overlaps] = 1 63 | labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 64 | 65 | if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: 66 | labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 67 | 68 | num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) 69 | fg_inds = np.where(labels == 1)[0] 70 | if len(fg_inds) > num_fg: 71 | disable_inds = npr.choice( 72 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 73 | labels[disable_inds] = -1 74 | 75 | num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) 76 | if is_restrict_bg: 77 | num_bg = max(num_bg, num_fg * 1.5) 78 | bg_inds = np.where(labels == 0)[0] 79 | if len(bg_inds) > num_bg: 80 | disable_inds = npr.choice( 81 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 82 | labels[disable_inds] = -1 83 | 84 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 85 | 86 | # map up to original set of anchors 87 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 88 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 89 | 90 | # labels = labels.reshape((1, height, width, A)) 91 | rpn_labels = labels.reshape((-1, 1)) 92 | 93 | # bbox_targets 94 | bbox_targets = bbox_targets.reshape((-1, 4)) 95 | rpn_bbox_targets = bbox_targets 96 | 97 | return rpn_labels, rpn_bbox_targets 98 | 99 | 100 | def _unmap(data, count, inds, fill=0): 101 | """ Unmap a subset of item (data) back to the original set of items (of 102 | size count) """ 103 | if len(data.shape) == 1: 104 | ret = np.empty((count,), dtype=np.float32) 105 | ret.fill(fill) 106 | ret[inds] = data 107 | else: 108 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 109 | ret.fill(fill) 110 | ret[inds, :] = data 111 | return ret 112 | 113 | 114 | def _compute_targets(ex_rois, gt_rois): 115 | """Compute bounding-box regression targets for an image.""" 116 | # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype( 117 | # np.float32, copy=False) 118 | targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois, 119 | reference_boxes=ex_rois, 120 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS) 121 | # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois, 122 | # gt_rois=gt_rois, 123 | # scale_factor=None) 124 | return targets 125 | -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | from libs.configs import cfgs 4 | from libs.box_utils import encode_and_decode 5 | from libs.box_utils import boxes_utils 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | 10 | def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): 11 | ''' 12 | 13 | :param rpn_bbox_pred: [-1, 4] 14 | :param rpn_cls_prob: [-1, 2] 15 | :param img_shape: 16 | :param anchors:[-1, 4] 17 | :param is_training: 18 | :return: 19 | ''' 20 | 21 | if is_training: 22 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN 23 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN 24 | # pre_nms_topN = cfgs.FPN_TOP_K_PER_LEVEL_TRAIN 25 | # post_nms_topN = pre_nms_topN 26 | else: 27 | pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST 28 | post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST 29 | # pre_nms_topN = cfgs.FPN_TOP_K_PER_LEVEL_TEST 30 | # post_nms_topN = pre_nms_topN 31 | 32 | nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD 33 | 34 | cls_prob = rpn_cls_prob[:, 1] 35 | 36 | # 1. decode boxes 37 | decode_boxes = encode_and_decode.decode_boxes(encoded_boxes=rpn_bbox_pred, 38 | reference_boxes=anchors, 39 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS) 40 | 41 | # 2. clip to img boundaries 42 | decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=decode_boxes, 43 | img_shape=img_shape) 44 | 45 | # 3. get top N to NMS 46 | if pre_nms_topN > 0: 47 | pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') 48 | cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) 49 | decode_boxes = tf.gather(decode_boxes, top_k_indices) 50 | 51 | # 4. NMS 52 | keep = tf.image.non_max_suppression( 53 | boxes=decode_boxes, 54 | scores=cls_prob, 55 | max_output_size=post_nms_topN, 56 | iou_threshold=nms_thresh) 57 | 58 | final_boxes = tf.gather(decode_boxes, keep) 59 | final_probs = tf.gather(cls_prob, keep) 60 | 61 | return final_boxes, final_probs 62 | 63 | -------------------------------------------------------------------------------- /libs/export_pbs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/export_pbs/__init__.py -------------------------------------------------------------------------------- /libs/export_pbs/exportPb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import os, sys 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | from tensorflow.python.tools import freeze_graph 9 | 10 | sys.path.append('../../') 11 | from data.io.image_preprocess import short_side_resize_for_inference_data 12 | from libs.configs import cfgs 13 | from libs.networks import build_whole_network 14 | 15 | CKPT_PATH = '/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/trained_weights/FPN_Res50_COCO_20190211_v18/voc_1599999model.ckpt' 16 | OUT_DIR = '../../output/Pbs' 17 | PB_NAME = 'FPN_Res50_COCO.pb' 18 | 19 | 20 | def build_detection_graph(): 21 | # 1. preprocess img 22 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3], 23 | name='input_img') # is RGB. not GBR 24 | raw_shape = tf.shape(img_plac) 25 | raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1]) 26 | 27 | img_batch = tf.cast(img_plac, tf.float32) 28 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, 29 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 30 | length_limitation=cfgs.IMG_MAX_LENGTH) 31 | if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: 32 | img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) 33 | else: 34 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) 35 | 36 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] 37 | 38 | det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, 39 | is_training=False) 40 | 41 | detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( 42 | input_img_batch=img_batch, 43 | gtboxes_batch=None) 44 | 45 | xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ 46 | detected_boxes[:, 2], detected_boxes[:, 3] 47 | 48 | resized_shape = tf.shape(img_batch) 49 | resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2]) 50 | 51 | xmin = xmin * raw_w / resized_w 52 | xmax = xmax * raw_w / resized_w 53 | 54 | ymin = ymin * raw_h / resized_h 55 | ymax = ymax * raw_h / resized_h 56 | 57 | boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax])) 58 | dets = tf.concat([tf.reshape(detection_category, [-1, 1]), 59 | tf.reshape(detection_scores, [-1, 1]), 60 | boxes], axis=1, name='DetResults') 61 | 62 | return dets 63 | 64 | 65 | def export_frozenPB(): 66 | 67 | tf.reset_default_graph() 68 | 69 | dets = build_detection_graph() 70 | 71 | saver = tf.train.Saver() 72 | 73 | with tf.Session() as sess: 74 | print("we have restred the weights from =====>>\n", CKPT_PATH) 75 | saver.restore(sess, CKPT_PATH) 76 | 77 | tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME) 78 | freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME), 79 | input_saver='', 80 | input_binary=False, 81 | input_checkpoint=CKPT_PATH, 82 | output_node_names="DetResults", 83 | restore_op_name="save/restore_all", 84 | filename_tensor_name='save/Const:0', 85 | output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')), 86 | clear_devices=False, 87 | initializer_nodes='') 88 | 89 | 90 | if __name__ == '__main__': 91 | os.environ["CUDA_VISIBLE_DEVICES"] = '' 92 | export_frozenPB() 93 | -------------------------------------------------------------------------------- /libs/export_pbs/test_TensorRT.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import tensorflow.contrib.tensorrt as trt 10 | import time 11 | import cv2 12 | import argparse 13 | import numpy as np 14 | sys.path.append('../../') 15 | 16 | from data.io.image_preprocess import short_side_resize_for_inference_data 17 | from libs.configs import cfgs 18 | from libs.networks import build_whole_network 19 | from libs.box_utils import draw_box_in_img 20 | from help_utils import tools 21 | 22 | 23 | def load_graph(frozen_graph_file): 24 | 25 | # we parse the graph_def file 26 | with tf.gfile.GFile(frozen_graph_file, 'rb') as f: 27 | graph_def = tf.GraphDef() 28 | graph_def.ParseFromString(f.read()) 29 | 30 | # we load the graph_def in the default graph 31 | 32 | graph_def = trt.create_inference_graph(graph_def, ["DetResults"], 33 | max_batch_size=1000, 34 | max_workspace_size_bytes=(1 << 10)*10000, 35 | precision_mode="INT8", 36 | maximum_cached_engines=10) # Get optimized graph 37 | 38 | # graph_def = trt.calib_graph_to_infer_graph(graph_def) 39 | tf.reset_default_graph() 40 | with tf.Graph().as_default() as graph: 41 | tf.import_graph_def(graph_def, 42 | input_map=None, 43 | return_elements=None, 44 | name="", 45 | op_dict=None, 46 | producer_op_list=None) 47 | return graph 48 | 49 | 50 | def test(frozen_graph_path, test_dir): 51 | 52 | graph = load_graph(frozen_graph_path) 53 | print("we are testing ====>>>>", frozen_graph_path) 54 | 55 | img = graph.get_tensor_by_name("input_img:0") 56 | dets = graph.get_tensor_by_name("DetResults:0") 57 | 58 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 59 | config = tf.ConfigProto(gpu_options=gpu_options) 60 | 61 | with tf.Session(graph=graph, config=config) as sess: 62 | for img_path in os.listdir(test_dir): 63 | a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1] 64 | st = time.time() 65 | dets_val = sess.run(dets, feed_dict={img: a_img}) 66 | end = time.time() 67 | 68 | show_indices = dets_val[:, 1] >= 0.5 69 | dets_val = dets_val[show_indices] 70 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img, 71 | boxes=dets_val[:, 2:], 72 | labels=dets_val[:, 0], 73 | scores=dets_val[:, 1]) 74 | cv2.imwrite(img_path, 75 | final_detections[:, :, ::-1]) 76 | print("%s cost time: %f" % (img_path, end - st)) 77 | 78 | 79 | if __name__ == '__main__': 80 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 81 | test('/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/Pbs/FPN_Res50_COCO_Frozen.pb', 82 | '/unsullied/sharefs/yangxue/isilon/yangxue/data/COCO/train2017') 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /libs/export_pbs/test_exportPb.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import tensorflow.contrib.tensorrt as trt 10 | import time 11 | import cv2 12 | import argparse 13 | import numpy as np 14 | sys.path.append('../../') 15 | 16 | from data.io.image_preprocess import short_side_resize_for_inference_data 17 | from libs.configs import cfgs 18 | from libs.networks import build_whole_network 19 | from libs.box_utils import draw_box_in_img 20 | from help_utils import tools 21 | 22 | 23 | def load_graph(frozen_graph_file): 24 | 25 | # we parse the graph_def file 26 | with tf.gfile.GFile(frozen_graph_file, 'rb') as f: 27 | graph_def = tf.GraphDef() 28 | graph_def.ParseFromString(f.read()) 29 | 30 | with tf.Graph().as_default() as graph: 31 | tf.import_graph_def(graph_def, 32 | input_map=None, 33 | return_elements=None, 34 | name="", 35 | op_dict=None, 36 | producer_op_list=None) 37 | return graph 38 | 39 | 40 | def test(frozen_graph_path, test_dir): 41 | 42 | graph = load_graph(frozen_graph_path) 43 | print("we are testing ====>>>>", frozen_graph_path) 44 | 45 | img = graph.get_tensor_by_name("input_img:0") 46 | dets = graph.get_tensor_by_name("DetResults:0") 47 | 48 | with tf.Session(graph=graph) as sess: 49 | for img_path in os.listdir(test_dir): 50 | a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1] 51 | st = time.time() 52 | dets_val = sess.run(dets, feed_dict={img: a_img}) 53 | 54 | show_indices = dets_val[:, 1] >= 0.5 55 | dets_val = dets_val[show_indices] 56 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img, 57 | boxes=dets_val[:, 2:], 58 | labels=dets_val[:, 0], 59 | scores=dets_val[:, 1]) 60 | cv2.imwrite(img_path, 61 | final_detections[:, :, ::-1]) 62 | print("%s cost time: %f" % (img_path, time.time() - st)) 63 | 64 | 65 | if __name__ == '__main__': 66 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 67 | test('/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/Pbs/FPN_Res50_COCO_Frozen.pb', 68 | '/unsullied/sharefs/yangxue/isilon/yangxue/data/COCO/train2017') 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /libs/gluon2TF/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .pyc 104 | 105 | # pycharm files 106 | .idea/* 107 | 108 | # vis files 109 | *.png* 110 | *.jpg* 111 | *events.out.tfevents* 112 | *.ckpt* 113 | *.tfrecord* 114 | *.params 115 | *.zip 116 | tf_ckpts/ 117 | 118 | # folder 119 | tools/demos/* 120 | tools/txt_output/* 121 | output/* 122 | 123 | -------------------------------------------------------------------------------- /libs/gluon2TF/README.md: -------------------------------------------------------------------------------- 1 | # Convert ResNets weights from GluonCV to Tensorflow 2 | 3 | ## Abstract 4 | GluonCV released some new resnet pre-training weights and designed some new resnets (such as resnet_v1_b, resnet_v1_d, refer [this](https://arxiv.org/pdf/1812.01187.pdf) for detail). 5 | 6 | This project reproduces the resnet in glouncv by Tensorflow and attempts to convert the pre-training weights in glouncv to the Tensorflow CheckPoints. 7 | At present, we have completed the conversion of resnet50_v1_b, resnet101_v1_b, resnet50_v1_d, resnet101_v1_d, 8 | and the 1000-dimensional Logits error rate is controlled within the range of 1e-5. 9 | (We welcome you to submit PR to support more models.) 10 | 11 | We also try to transfer these weights to object detection (using FPN as the baseline, the specific detection code we will post [here](https://github.com/DetectionTeamUCAS/FPN_Tensorflow_DEV).), 12 | and **train on voc07trainVal (excluding voc2012 dataset), test in voc07test**. The results are as follows: 13 | 14 | ## Comparison 15 | 16 | ### use_voc2007_metric 17 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat | motorbike | 18 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:| 19 | |[Faster-RCNN](https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow) resnet101_v1(original)|74.63|76.35|86.18|79.87|58.73|83.4|74.75|80.03|85.4|86.55|78.24|76.07|70.89|78.52|86.26|47.80|76.34|52.14|78.06|58.90|78.04| 20 | |FPN resnet101_v1(original)|76.14|74.63|85.13|81.67|63.79|82.43|77.83|83.07|86.45|85.82|81.08|81.01|71.22|80.01|86.30|48.05|73.89|56.99|78.33|62.91|82.24| 21 | |FPN resnet101_v1_d|77.98|78.01|87.48|85.34|65.42|84.56|74.42|82.97|87.87|87.34|82.14|84.44|70.32|80.64|88.6|51.9|76.59|59.31|81.19|67.84|83.1| 22 | 23 | 24 | **FPN_resnet101_v1_d is transfer from GluonCV** 25 | 26 | **FPN_resnet101_v1(original) is official resnet in [tensorflow/models](https://github.com/tensorflow/models/tree/master/research/slim/nets)** 27 | 28 | ## My Development Environment 29 | 1、python2.7 (anaconda recommend) 30 | 31 | 2、cuda9.0 32 | 33 | 3、[opencv(cv2)](https://pypi.org/project/opencv-python/) 34 | 35 | 4、mxnet-cu90 (1.3.0) 36 | 37 | 5、tensorflow == 1.10 38 | 39 | 6、[GlounCV](https://gluon-cv.mxnet.io/) 40 | 41 | ## Download MxNet GluonCV PreTrained Weights 42 | 43 | ``` 44 | cd $PATH_ROOT/resnet 45 | (modify the resnet version in the main function of download_mxnet_resnet_weights.py.) 46 | python download_mxnet_resnet_weights.py 47 | ``` 48 | 49 | 50 | ## Convert MxNet Weights To Tensorflow CheckPoint and caculate Erros 51 | 52 | modify the main function in gluon2TF/resnet/test_resnet.py as following, and then run it 53 | ``` 54 | MODEL_NAME = 'resnet101_v1d' (modify the version as u want) 55 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params' (remember modify the path) 56 | 57 | cal_erro(img_path='../demo_img/person.jpg', 58 | use_tf_ckpt=False, 59 | ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME, 60 | save_ckpt=True) 61 | ``` 62 | 63 | Just run it : 64 | ``` 65 | cd $PATH_ROOT/resnet 66 | python test_resnet 67 | ``` 68 | 69 | ## caculate Erros between the converted tensorflow chenckpoints and Mxnet GluonCV Weights 70 | 71 | modify the main function in gluon2TF/resnet/test_resnet.py as following, and then run it 72 | ``` 73 | MODEL_NAME = 'resnet101_v1d' (modify the version as u want) 74 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params' (remember modify the path) 75 | 76 | cal_erro(img_path='../demo_img/person.jpg', 77 | use_tf_ckpt=True, 78 | ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME, 79 | save_ckpt=False) 80 | ``` 81 | 82 | Just run it : 83 | ``` 84 | cd $PATH_ROOT/resnet 85 | python test_resnet 86 | ``` 87 | 88 | -------------------------------------------------------------------------------- /libs/gluon2TF/mxnet_weights/readme.txt: -------------------------------------------------------------------------------- 1 | the dir place mxnet_weights 2 | -------------------------------------------------------------------------------- /libs/gluon2TF/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/gluon2TF/resnet/parse_mxnet_weights.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | import mxnet.ndarray as nd 5 | import numpy as np 6 | 7 | 8 | def read_mxnet_weights(path, show=False): 9 | 10 | # assert os.path.exists(path), "path erro: {}".format(path) 11 | 12 | name_MxnetArray_dict = nd.load(path) 13 | 14 | name_array_dict = {} 15 | for name in sorted(name_MxnetArray_dict.keys()): 16 | mxnet_array = name_MxnetArray_dict[name] 17 | array = mxnet_array.asnumpy() 18 | 19 | if show: 20 | print ("name: {} || shape: {} || dtype: {}".format(name, array.shape, array.dtype)) 21 | 22 | if name.endswith("weight"): 23 | if name.endswith("fc.weight"): 24 | array = np.transpose(array, [1, 0]) 25 | else: 26 | array = np.transpose(array, [2, 3, 1, 0]) 27 | # (out_channel, in_channel, k, k)(mxnet) --> (k, k, in_channel, out_channel)(tf) 28 | # (32, 3, 3, 3)-->(3, 3, 3, 32) 29 | name_array_dict[name] = array 30 | 31 | return name_array_dict 32 | 33 | 34 | def check_mxnet_names(mxnet_tf_map, mxnetName_array_dict): 35 | 36 | for key1, key2 in zip(sorted(mxnet_tf_map.keys()), sorted(mxnetName_array_dict.keys())): 37 | assert key1 == key2, "key in mxnet_array_dict and mxnet_tf_map do not equal, details are :\n" \ 38 | "key1 in mxnet_tf_map: {}\n"\ 39 | "key2 in mxnet_array dict: {}".format(key1, key2) 40 | if len(mxnetName_array_dict) == len(mxnet_tf_map): 41 | print("all mxnet names are mapped") 42 | 43 | 44 | def check_tf_vars(tf_mxnet_map, mxnetName_array_dict, tf_model_vars, scope='resnet50_v1_d'): 45 | 46 | tf_nake_names = sorted([var.op.name.split("%s/" % scope)[1] for var in tf_model_vars]) 47 | # check_name 48 | for tf_name, name2 in zip(tf_nake_names, sorted(tf_mxnet_map.keys())): 49 | assert tf_name == name2, "key in tf_model_vars and tf_mxnet_map do not equal, details are :\n" \ 50 | "tf_name in tf_model_vars: {}\n" \ 51 | "name2 in tf_mxnet_maps: {}".format(tf_name, name2) 52 | print("all tf_model_var can find matched name in tf_mxnet_map") 53 | 54 | # check shape 55 | for var in tf_model_vars: 56 | name = var.op.name.split("%s/"%scope)[1] 57 | array = mxnetName_array_dict[tf_mxnet_map[name]] 58 | 59 | assert var.shape == array.shape, "var in tf_model_vars and mxnet_arrays shape do not equal, details are :\n" \ 60 | "tf_var in tf_model_vars: {}\n" \ 61 | "name in tf_mxnet_maps: {}, shape is : {}".format(var, tf_mxnet_map[name], 62 | array.shape) 63 | print("All tf_model_var shapes match the shape of arrays in mxnet_array_dict...") 64 | 65 | 66 | -------------------------------------------------------------------------------- /libs/gluon2TF/resnet/resnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from __future__ import absolute_import, print_function, division 5 | import numpy as np 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | from resnet_utils import get_resnet_v1_d, get_resnet_v1_b 9 | from parse_mxnet_weights import read_mxnet_weights, check_mxnet_names, check_tf_vars 10 | import weights_map 11 | import os 12 | 13 | BottleNeck_NUM_DICT = { 14 | 'resnet50_v1b': [3, 4, 6, 3], 15 | 'resnet101_v1b': [3, 4, 23, 3], 16 | 'resnet50_v1d': [3, 4, 6, 3], 17 | 'resnet101_v1d': [3, 4, 23, 3] 18 | } 19 | 20 | BASE_CHANNELS_DICT = { 21 | 'resnet50_v1b': [64, 128, 256, 512], 22 | 'resnet101_v1b': [64, 128, 256, 512], 23 | 'resnet50_v1d': [64, 128, 256, 512], 24 | 'resnet101_v1d': [64, 128, 256, 512] 25 | } 26 | 27 | 28 | def create_resotre_op(scope, mxnet_weights_path): 29 | 30 | mxnetName_array_dict = read_mxnet_weights(mxnet_weights_path, show=False) 31 | 32 | tf_mxnet_map, mxnet_tf_map = \ 33 | weights_map.get_map(scope=scope, 34 | bottleneck_nums=BottleNeck_NUM_DICT[scope], show_mxnettf=False, show_tfmxnet=False) 35 | 36 | tf_model_vars = slim.get_model_variables(scope) 37 | 38 | # # check name and var 39 | check_mxnet_names(mxnet_tf_map, mxnetName_array_dict=mxnetName_array_dict) 40 | check_tf_vars(tf_mxnet_map, mxnetName_array_dict, tf_model_vars, scope=scope) 41 | # # 42 | 43 | assign_ops = [] 44 | 45 | for var in tf_model_vars: 46 | name = var.op.name.split('%s/' % scope)[1] 47 | new_val = tf.constant(mxnetName_array_dict[tf_mxnet_map[name]]) 48 | sub_assign_op = tf.assign(var, value=new_val) 49 | 50 | assign_ops.append(sub_assign_op) 51 | 52 | assign_op = tf.group(*assign_ops) 53 | 54 | return assign_op 55 | 56 | 57 | def build_resnet(img_batch=None, scope='resnet50_v1d', is_training=True, freeze_norm=False, num_cls=1000): 58 | if img_batch is None: 59 | np.random.seed(30) 60 | img_batch = np.random.rand(1, 224, 224, 3) # H, W, C 61 | img_batch = tf.constant(img_batch, dtype=tf.float32) 62 | 63 | print("Please Ensure the img is in NHWC") 64 | 65 | if scope.endswith('b'): 66 | get_resnet_fn = get_resnet_v1_b 67 | elif scope.endswith('d'): 68 | get_resnet_fn = get_resnet_v1_d 69 | 70 | logits = get_resnet_fn(input_x=img_batch, scope=scope, 71 | bottleneck_nums=BottleNeck_NUM_DICT[scope], 72 | base_channels=BASE_CHANNELS_DICT[scope], 73 | is_training=is_training, freeze_norm=freeze_norm, num_cls=num_cls) 74 | 75 | return logits 76 | 77 | 78 | if __name__ == "__main__": 79 | build_resnet() 80 | create_resotre_op() 81 | -------------------------------------------------------------------------------- /libs/gluon2TF/resnet/some_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import mxnet as mx 4 | from mxnet.gluon import nn 5 | from mxnet import ndarray as nd 6 | import tensorflow.contrib.slim as slim 7 | import tensorflow as tf 8 | import numpy as np 9 | from test_resnet import mxnet_process_img 10 | # 卷积层 11 | # 输入输出的数据格式是: batch * channel * height * width 12 | # 权重格式:output_channels * in_channels * height * width 13 | np.random.seed(30) 14 | 15 | # w = nd.array(np.random.rand(2, 3, 3, 3)) 16 | w = nd.load('/home/yjr/MxNet_Codes/gluon-cv/scripts/gloun2TF/mxnet_weights/resnet50_v1b-0ecdba34.params')['conv1.weight'] # [64, 3, 7, 7] 17 | # w = nd.arange(9*2).reshape((2, 1, 3, 3)) 18 | data = nd.array(np.random.rand(1, 3, 224, 224)) 19 | # data, _ = mxnet_process_img('../demo_img/person.jpg') 20 | # data = nd.arange(6*6).reshape((1, 1, 6, 6)) 21 | 22 | # 卷积运算 23 | out = nd.Convolution(data, w, no_bias=True, 24 | kernel=(7, 7), 25 | stride=(2, 2), 26 | num_filter=64, 27 | pad=(3, 3)) 28 | 29 | 30 | 31 | def tf_conv(data, w): 32 | 33 | data = tf.constant(data.asnumpy()) 34 | data = tf.pad(data, paddings=[[0, 0], [0, 0], [3, 3], [3, 3]]) 35 | tf_out = slim.conv2d(data, num_outputs=64, kernel_size=[7, 7], padding='VALID', stride=2, 36 | biases_initializer=None, data_format='NCHW', normalizer_fn=None, activation_fn=None) 37 | tf_w = tf.constant(np.transpose(w.asnumpy(), [2, 3, 1, 0])) 38 | # tf_w = 39 | model_vars = slim.get_model_variables() 40 | assign_op = tf.assign(model_vars[0], tf_w) 41 | 42 | with tf.Session() as sess: 43 | sess.run(assign_op) 44 | print(sess.run(tf_out)) 45 | 46 | 47 | if __name__ == '__main__': 48 | tf_conv(data, w=w) 49 | print "mxnet_out: ", out 50 | print 20 * "+" -------------------------------------------------------------------------------- /libs/gluon2TF/resnet/test_resnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import tensorflow as tf 6 | import tensorflow.contrib.slim as slim 7 | 8 | from mxnet import nd, image 9 | import numpy as np 10 | from mxnet.gluon.data.vision import transforms 11 | from gluoncv.model_zoo import get_model 12 | from gluoncv.data.transforms.presets.imagenet import transform_eval 13 | from resnet import build_resnet, create_resotre_op 14 | from resnet_utils import DEBUG 15 | import os 16 | 17 | 18 | # MODEL_NAME = 'resnet50_v1d' 19 | # Mxnet_Weights_PATH = '../mxnet_weights/resnet50_v1d-117a384e.params' 20 | 21 | 22 | MODEL_NAME = 'resnet101_v1b' 23 | # Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params' 24 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1b-a455932a.params' 25 | 26 | def mxnet_process_img(path): 27 | # Load Images 28 | img = image.imread(path) 29 | 30 | # Transform 31 | img = transform_eval(img) 32 | img_arr = img.asnumpy() 33 | if len(img_arr) == 3: 34 | img_arr = np.expand_dims(img_arr, axis=0) 35 | img_tf_tensor = tf.constant(img_arr) 36 | 37 | # np.random.seed(30) 38 | # img = nd.array(np.random.randn(1, 3, 600, 800)) 39 | # img_tf_tensor = tf.constant(img.asnumpy()) 40 | img_tf_tensor = tf.transpose(img_tf_tensor, [0, 2, 3, 1]) 41 | return img, img_tf_tensor 42 | 43 | # 44 | def mxnet_infer(img): 45 | 46 | model_name = MODEL_NAME 47 | net = get_model(model_name, pretrained=False) 48 | net.load_parameters(Mxnet_Weights_PATH) 49 | pred = net(img) 50 | 51 | # print (pred.shape, pred.dtype) 52 | pred = pred.asnumpy() 53 | return pred 54 | 55 | 56 | def tf_infer(img, save_ckpt=True, restore_from_tfckpt=False, ckpt_path=None): 57 | 58 | pred_tensor = build_resnet(img_batch=img, scope=MODEL_NAME, 59 | is_training=False, freeze_norm=True, num_cls=1000) 60 | 61 | if restore_from_tfckpt: 62 | print("restore weights from tf_CKPT") 63 | assert not ckpt_path is None, "ckpt_path is None, Erro" 64 | restore_op = tf.train.Saver() 65 | else: 66 | print('restore weights from MxnetWeights') 67 | restore_op = create_resotre_op(MODEL_NAME, Mxnet_Weights_PATH) 68 | 69 | if DEBUG: 70 | from resnet_utils import debug_dict 71 | print (debug_dict) 72 | assert len(debug_dict) >=3, "debug_dict size erro, len is :{}".format(len(debug_dict)) 73 | 74 | if save_ckpt: 75 | save_dir = '../tf_ckpts' 76 | if not os.path.exists(save_dir): 77 | os.mkdir(save_dir) 78 | saver = tf.train.Saver(max_to_keep=30) 79 | save_ckpt = os.path.join(save_dir, '%s.ckpt' % MODEL_NAME) 80 | 81 | with tf.Session() as sess: 82 | if restore_from_tfckpt: 83 | restore_op.restore(sess, ckpt_path) 84 | else: 85 | sess.run(restore_op) 86 | if DEBUG: 87 | name_val = {} 88 | for name in debug_dict.keys(): 89 | name_val[name] = sess.run(debug_dict[name]) 90 | pred = sess.run(pred_tensor) 91 | if save_ckpt: 92 | saver.save(sess, save_ckpt) 93 | 94 | return pred 95 | 96 | 97 | def cal_erro(img_path, use_tf_ckpt=False, ckpt_path=None, save_ckpt=False): 98 | 99 | mxnet_img, tf_img = mxnet_process_img(img_path) 100 | 101 | mxnet_pred = mxnet_infer(mxnet_img) 102 | 103 | mxnet_pred = np.squeeze(mxnet_pred, axis=0) 104 | tf_pred = tf_infer(tf_img, restore_from_tfckpt=use_tf_ckpt, ckpt_path=ckpt_path, save_ckpt=save_ckpt) 105 | tf_pred = np.squeeze(tf_pred, axis=0) 106 | assert mxnet_pred.shape == tf_pred.shape, "mxnet_pred shape Do Not equal with tf_pred shape" 107 | 108 | argmax_mxnet = np.argmax(mxnet_pred) 109 | argmax_tf = np.argmax(tf_pred) 110 | 111 | erro = np.linalg.norm(tf_pred-mxnet_pred) 112 | for i, (m, t) in enumerate(zip(mxnet_pred, tf_pred)): 113 | if i == 5: 114 | break 115 | print ("mxnet|tf==>>{} | {} ".format(m, t)) 116 | 117 | print ('total_erro-->', erro) 118 | print ('erro_rate-->', erro/np.linalg.norm(mxnet_pred)) 119 | print ("argmax_mxnet: {} || tf_argmx: {}".format(argmax_mxnet, argmax_tf)) 120 | 121 | 122 | if __name__ == '__main__': 123 | 124 | # cal_erro(img_path='../demo_img/person.jpg', 125 | # use_tf_ckpt=False, 126 | # ckpt_path=None, 127 | # save_ckpt=True) 128 | cal_erro(img_path='../demo_img/person.jpg', 129 | use_tf_ckpt=True, 130 | ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME, 131 | save_ckpt=False) 132 | print (20*"++") 133 | -------------------------------------------------------------------------------- /libs/label_name_dict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/label_name_dict/__init__.py -------------------------------------------------------------------------------- /libs/label_name_dict/coco_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | class_names = [ 6 | 'back_ground', 'person', 'bicycle', 'car', 'motorcycle', 7 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 8 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 9 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 10 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 11 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 12 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 13 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 14 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 15 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 16 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 17 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 18 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 19 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 20 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 21 | 'hair drier', 'toothbrush'] 22 | 23 | 24 | classes_originID = { 25 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 26 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 27 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 28 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 29 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 30 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27, 31 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 32 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 33 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 34 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 35 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 36 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 37 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 38 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 39 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 40 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 41 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 42 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 43 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 44 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 45 | 'toothbrush': 90} 46 | 47 | originID_classes = {item: key for key, item in classes_originID.items()} 48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names)))) 49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names)) 50 | 51 | # print (originID_classes) 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /libs/label_name_dict/label_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | 4 | from libs.configs import cfgs 5 | 6 | 7 | class_names = [ 8 | 'back_ground', 'person', 'bicycle', 'car', 'motorcycle', 9 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 10 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 11 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 12 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 13 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 14 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 15 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 16 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 17 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 18 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 19 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 20 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 21 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 22 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 23 | 'hair drier', 'toothbrush'] 24 | 25 | classes_originID = { 26 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 27 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 28 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 29 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 30 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 31 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27, 32 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 33 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 34 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 35 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 36 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 37 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 38 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 39 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 40 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 41 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 42 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 43 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 44 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 45 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 46 | 'toothbrush': 90} 47 | 48 | 49 | def get_coco_label_dict(): 50 | originID_classes = {item: key for key, item in classes_originID.items()} 51 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names)))) 52 | return NAME_LABEL_MAP 53 | 54 | if cfgs.DATASET_NAME == 'ship': 55 | NAME_LABEL_MAP = { 56 | 'back_ground': 0, 57 | 'ship': 1 58 | } 59 | elif cfgs.DATASET_NAME == 'aeroplane': 60 | NAME_LABEL_MAP = { 61 | 'back_ground': 0, 62 | 'aeroplane': 1 63 | } 64 | elif cfgs.DATASET_NAME == 'WIDER': 65 | NAME_LABEL_MAP = { 66 | 'back_ground': 0, 67 | 'face': 1 68 | } 69 | elif cfgs.DATASET_NAME == 'icdar': 70 | NAME_LABEL_MAP = { 71 | 'back_ground': 0, 72 | 'text': 1 73 | } 74 | elif cfgs.DATASET_NAME.startswith('DOTA'): 75 | NAME_LABEL_MAP = { 76 | 'back_ground': 0, 77 | 'roundabout': 1, 78 | 'tennis-court': 2, 79 | 'swimming-pool': 3, 80 | 'storage-tank': 4, 81 | 'soccer-ball-field': 5, 82 | 'small-vehicle': 6, 83 | 'ship': 7, 84 | 'plane': 8, 85 | 'large-vehicle': 9, 86 | 'helicopter': 10, 87 | 'harbor': 11, 88 | 'ground-track-field': 12, 89 | 'bridge': 13, 90 | 'basketball-court': 14, 91 | 'baseball-diamond': 15 92 | } 93 | elif cfgs.DATASET_NAME.startswith('DOAI2019'): 94 | NAME_LABEL_MAP = { 95 | 'back_ground': 0, 96 | 'turntable': 1, 97 | 'tennis-court': 2, 98 | 'swimming-pool': 3, 99 | 'storage-tank': 4, 100 | 'soccer-ball-field': 5, 101 | 'small-vehicle': 6, 102 | 'ship': 7, 103 | 'plane': 8, 104 | 'large-vehicle': 9, 105 | 'helicopter': 10, 106 | 'harbor': 11, 107 | 'ground-track-field': 12, 108 | 'bridge': 13, 109 | 'basketball-court': 14, 110 | 'baseball-diamond': 15, 111 | 'container-crane': 16 112 | } 113 | elif cfgs.DATASET_NAME == 'coco': 114 | NAME_LABEL_MAP = get_coco_label_dict() 115 | elif cfgs.DATASET_NAME == 'pascal': 116 | NAME_LABEL_MAP = { 117 | 'back_ground': 0, 118 | 'aeroplane': 1, 119 | 'bicycle': 2, 120 | 'bird': 3, 121 | 'boat': 4, 122 | 'bottle': 5, 123 | 'bus': 6, 124 | 'car': 7, 125 | 'cat': 8, 126 | 'chair': 9, 127 | 'cow': 10, 128 | 'diningtable': 11, 129 | 'dog': 12, 130 | 'horse': 13, 131 | 'motorbike': 14, 132 | 'person': 15, 133 | 'pottedplant': 16, 134 | 'sheep': 17, 135 | 'sofa': 18, 136 | 'train': 19, 137 | 'tvmonitor': 20 138 | } 139 | elif cfgs.DATASET_NAME == 'bdd100k': 140 | NAME_LABEL_MAP = { 141 | 'back_ground': 0, 142 | 'bus': 1, 143 | 'traffic light': 2, 144 | 'traffic sign': 3, 145 | 'person': 4, 146 | 'bike': 5, 147 | 'truck': 6, 148 | 'motor': 7, 149 | 'car': 8, 150 | 'train': 9, 151 | 'rider': 10 152 | } 153 | else: 154 | assert 'please set label dict!' 155 | 156 | 157 | def get_label_name_map(): 158 | reverse_dict = {} 159 | for name, label in NAME_LABEL_MAP.items(): 160 | reverse_dict[label] = name 161 | return reverse_dict 162 | 163 | 164 | LABEl_NAME_MAP = get_label_name_map() -------------------------------------------------------------------------------- /libs/label_name_dict/remote_sensing_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | NAME_LABEL_MAP = { 4 | 'back_ground': 0, 5 | 'building': 1 6 | } 7 | 8 | 9 | def get_label_name_map(): 10 | reverse_dict = {} 11 | for name, label in NAME_LABEL_MAP.items(): 12 | reverse_dict[label] = name 13 | return reverse_dict 14 | 15 | LABEl_NAME_MAP = get_label_name_map() -------------------------------------------------------------------------------- /libs/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/losses/__init__.py -------------------------------------------------------------------------------- /libs/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/__init__.py -------------------------------------------------------------------------------- /libs/networks/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | # Mobilenet V2 2 | This folder contains building code for Mobilenet V2, based on 3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation] 4 | (https://arxiv.org/abs/1801.04381) 5 | 6 | # Pretrained model 7 | TODO 8 | 9 | # Example 10 | TODO 11 | 12 | 13 | -------------------------------------------------------------------------------- /libs/networks/mobilenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/mobilenet/__init__.py -------------------------------------------------------------------------------- /libs/networks/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | import tensorflow.contrib.slim as slim 5 | import tensorflow as tf 6 | 7 | from libs.networks.mobilenet import mobilenet_v2 8 | from libs.networks.mobilenet.mobilenet import training_scope 9 | from libs.networks.mobilenet.mobilenet_v2 import op 10 | from libs.networks.mobilenet.mobilenet_v2 import ops 11 | expand_input = ops.expand_input_by_factor 12 | 13 | V2_BASE_DEF = dict( 14 | defaults={ 15 | # Note: these parameters of batch norm affect the architecture 16 | # that's why they are here and not in training_scope. 17 | (slim.batch_norm,): {'center': True, 'scale': True}, 18 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 19 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 20 | }, 21 | (ops.expanded_conv,): { 22 | 'expansion_size': expand_input(6), 23 | 'split_expansion': 1, 24 | 'normalizer_fn': slim.batch_norm, 25 | 'residual': True 26 | }, 27 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 28 | }, 29 | spec=[ 30 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), 31 | op(ops.expanded_conv, 32 | expansion_size=expand_input(1, divisible_by=1), 33 | num_outputs=16, scope='expanded_conv'), 34 | op(ops.expanded_conv, stride=2, num_outputs=24, scope='expanded_conv_1'), 35 | op(ops.expanded_conv, stride=1, num_outputs=24, scope='expanded_conv_2'), 36 | op(ops.expanded_conv, stride=2, num_outputs=32, scope='expanded_conv_3'), 37 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_4'), 38 | op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_5'), 39 | op(ops.expanded_conv, stride=2, num_outputs=64, scope='expanded_conv_6'), 40 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_7'), 41 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_8'), 42 | op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_9'), 43 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_10'), 44 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_11'), 45 | op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_12') 46 | ], 47 | ) 48 | 49 | 50 | V2_HEAD_DEF = dict( 51 | defaults={ 52 | # Note: these parameters of batch norm affect the architecture 53 | # that's why they are here and not in training_scope. 54 | (slim.batch_norm,): {'center': True, 'scale': True}, 55 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 56 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 57 | }, 58 | (ops.expanded_conv,): { 59 | 'expansion_size': expand_input(6), 60 | 'split_expansion': 1, 61 | 'normalizer_fn': slim.batch_norm, 62 | 'residual': True 63 | }, 64 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 65 | }, 66 | spec=[ 67 | op(ops.expanded_conv, stride=2, num_outputs=160, scope='expanded_conv_13'), 68 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_14'), 69 | op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_15'), 70 | op(ops.expanded_conv, stride=1, num_outputs=320, scope='expanded_conv_16'), 71 | op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280, scope='Conv_1') 72 | ], 73 | ) 74 | def mobilenetv2_scope(is_training=True, 75 | trainable=True, 76 | weight_decay=0.00004, 77 | stddev=0.09, 78 | dropout_keep_prob=0.8, 79 | bn_decay=0.997): 80 | """Defines Mobilenet training scope. 81 | In default. We do not use BN 82 | 83 | ReWrite the scope. 84 | """ 85 | batch_norm_params = { 86 | 'is_training': False, 87 | 'trainable': False, 88 | 'decay': bn_decay, 89 | } 90 | with slim.arg_scope(training_scope(is_training=is_training, weight_decay=weight_decay)): 91 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_conv2d], 92 | trainable=trainable): 93 | with slim.arg_scope([slim.batch_norm], **batch_norm_params) as sc: 94 | return sc 95 | 96 | 97 | 98 | def mobilenetv2_base(img_batch, is_training=True): 99 | 100 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)): 101 | 102 | feature_to_crop, endpoints = mobilenet_v2.mobilenet_base(input_tensor=img_batch, 103 | num_classes=None, 104 | is_training=False, 105 | depth_multiplier=1.0, 106 | scope='MobilenetV2', 107 | conv_defs=V2_BASE_DEF, 108 | finegrain_classification_mode=False) 109 | 110 | # feature_to_crop = tf.Print(feature_to_crop, [tf.shape(feature_to_crop)], summarize=10, message='rpn_shape') 111 | return feature_to_crop 112 | 113 | 114 | def mobilenetv2_head(inputs, is_training=True): 115 | with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)): 116 | net, _ = mobilenet_v2.mobilenet(input_tensor=inputs, 117 | num_classes=None, 118 | is_training=False, 119 | depth_multiplier=1.0, 120 | scope='MobilenetV2', 121 | conv_defs=V2_HEAD_DEF, 122 | finegrain_classification_mode=False) 123 | 124 | net = tf.squeeze(net, [1, 2]) 125 | 126 | return net -------------------------------------------------------------------------------- /libs/networks/ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | 5 | def norm(x, norm_type, is_train, G=32, esp=1e-5): 6 | with tf.variable_scope('{}_norm'.format(norm_type)): 7 | if norm_type == 'none': 8 | output = x 9 | elif norm_type == 'batch': 10 | output = tf.contrib.layers.batch_norm( 11 | x, center=True, scale=True, decay=0.999, 12 | is_training=is_train, updates_collections=None 13 | ) 14 | elif norm_type == 'group': 15 | # normalize 16 | # tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper 17 | x = tf.transpose(x, [0, 3, 1, 2]) 18 | N, C, H, W = x.get_shape().as_list() 19 | G = min(G, C) 20 | x = tf.reshape(x, [-1, G, C // G, H, W]) 21 | mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True) 22 | x = (x - mean) / tf.sqrt(var + esp) 23 | # per channel gamma and beta 24 | gamma = tf.Variable(tf.constant(1.0, shape=[C]), dtype=tf.float32, name='gamma') 25 | beta = tf.Variable(tf.constant(0.0, shape=[C]), dtype=tf.float32, name='beta') 26 | gamma = tf.reshape(gamma, [1, C, 1, 1]) 27 | beta = tf.reshape(beta, [1, C, 1, 1]) 28 | 29 | output = tf.reshape(x, [-1, C, H, W]) * gamma + beta 30 | # tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper 31 | output = tf.transpose(output, [0, 2, 3, 1]) 32 | else: 33 | raise NotImplementedError 34 | return output 35 | 36 | 37 | def lrelu(x, leak=0.2, name="lrelu"): 38 | with tf.variable_scope(name): 39 | f1 = 0.5 * (1 + leak) 40 | f2 = 0.5 * (1 - leak) 41 | return f1 * x + f2 * abs(x) 42 | 43 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2'): 61 | """AlexNet version 2. 62 | 63 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 64 | Parameters from: 65 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 66 | layers-imagenet-1gpu.cfg 67 | 68 | Note: All the fully_connected layers have been transformed to conv2d layers. 69 | To use in classification mode, resize input to 224x224. To use in fully 70 | convolutional mode, set spatial_squeeze to false. 71 | The LRN layers have been removed and change the initializers from 72 | random_normal_initializer to xavier_initializer. 73 | 74 | Args: 75 | inputs: a tensor of size [batch_size, height, width, channels]. 76 | num_classes: number of predicted classes. 77 | is_training: whether or not the model is being trained. 78 | dropout_keep_prob: the probability that activations are kept in the dropout 79 | layers during training. 80 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 81 | outputs. Useful to remove unnecessary dimensions for classification. 82 | scope: Optional scope for the variables. 83 | 84 | Returns: 85 | the last op containing the log predictions and end_points dict. 86 | """ 87 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 88 | end_points_collection = sc.name + '_end_points' 89 | # Collect outputs for conv2d, fully_connected and max_pool2d. 90 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 91 | outputs_collections=[end_points_collection]): 92 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 93 | scope='conv1') 94 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 95 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 96 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 97 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 98 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 99 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 100 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 101 | 102 | # Use conv2d instead of fully_connected layers. 103 | with slim.arg_scope([slim.conv2d], 104 | weights_initializer=trunc_normal(0.005), 105 | biases_initializer=tf.constant_initializer(0.1)): 106 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 107 | scope='fc6') 108 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 109 | scope='dropout6') 110 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 111 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 112 | scope='dropout7') 113 | net = slim.conv2d(net, num_classes, [1, 1], 114 | activation_fn=None, 115 | normalizer_fn=None, 116 | biases_initializer=tf.zeros_initializer(), 117 | scope='fc8') 118 | 119 | # Convert end_points_collection into a end_point dict. 120 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 121 | if spatial_squeeze: 122 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 123 | end_points[sc.name + '/fc8'] = net 124 | return net, end_points 125 | alexnet_v2.default_image_size = 224 126 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | logits: the pre-softmax activations, a tensor of size 54 | [batch_size, `num_classes`] 55 | end_points: a dictionary from components of the network to the corresponding 56 | activation. 57 | """ 58 | end_points = {} 59 | 60 | with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): 61 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 62 | end_points['conv1'] = net 63 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | end_points['pool1'] = net 65 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 66 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 67 | end_points['conv2'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 69 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 70 | end_points['pool2'] = net 71 | net = slim.flatten(net) 72 | end_points['Flatten'] = net 73 | net = slim.fully_connected(net, 384, scope='fc3') 74 | end_points['fc3'] = net 75 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 76 | scope='dropout3') 77 | net = slim.fully_connected(net, 192, scope='fc4') 78 | end_points['fc4'] = net 79 | logits = slim.fully_connected(net, num_classes, 80 | biases_initializer=tf.zeros_initializer(), 81 | weights_initializer=trunc_normal(1/192.0), 82 | weights_regularizer=None, 83 | activation_fn=None, 84 | scope='logits') 85 | 86 | end_points['Logits'] = logits 87 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 88 | 89 | return logits, end_points 90 | cifarnet.default_image_size = 32 91 | 92 | 93 | def cifarnet_arg_scope(weight_decay=0.004): 94 | """Defines the default cifarnet argument scope. 95 | 96 | Args: 97 | weight_decay: The weight decay to use for regularizing the model. 98 | 99 | Returns: 100 | An `arg_scope` to use for the inception v3 model. 101 | """ 102 | with slim.arg_scope( 103 | [slim.conv2d], 104 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 105 | activation_fn=tf.nn.relu): 106 | with slim.arg_scope( 107 | [slim.fully_connected], 108 | biases_initializer=tf.constant_initializer(0.1), 109 | weights_initializer=trunc_normal(0.04), 110 | weights_regularizer=slim.l2_regularizer(weight_decay), 111 | activation_fn=tf.nn.relu) as sc: 112 | return sc 113 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_resnet_v2 import inception_resnet_v2_base 25 | from nets.inception_v1 import inception_v1 26 | from nets.inception_v1 import inception_v1_arg_scope 27 | from nets.inception_v1 import inception_v1_base 28 | from nets.inception_v2 import inception_v2 29 | from nets.inception_v2 import inception_v2_arg_scope 30 | from nets.inception_v2 import inception_v2_base 31 | from nets.inception_v3 import inception_v3 32 | from nets.inception_v3 import inception_v3_arg_scope 33 | from nets.inception_v3 import inception_v3_base 34 | from nets.inception_v4 import inception_v4 35 | from nets.inception_v4 import inception_v4_arg_scope 36 | from nets.inception_v4 import inception_v4_base 37 | # pylint: enable=unused-import 38 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. 44 | is_training: specifies whether or not we're currently training the model. 45 | This variable will determine the behaviour of the dropout layer. 46 | dropout_keep_prob: the percentage of activation values that are retained. 47 | prediction_fn: a function to get predictions out of logits. 48 | scope: Optional variable_scope. 49 | 50 | Returns: 51 | logits: the pre-softmax activations, a tensor of size 52 | [batch_size, `num_classes`] 53 | end_points: a dictionary from components of the network to the corresponding 54 | activation. 55 | """ 56 | end_points = {} 57 | 58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]): 59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1') 60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 63 | net = slim.flatten(net) 64 | end_points['Flatten'] = net 65 | 66 | net = slim.fully_connected(net, 1024, scope='fc3') 67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 68 | scope='dropout3') 69 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 70 | scope='fc4') 71 | 72 | end_points['Logits'] = logits 73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 74 | 75 | return logits, end_points 76 | lenet.default_image_size = 28 77 | 78 | 79 | def lenet_arg_scope(weight_decay=0.0): 80 | """Defines the default lenet argument scope. 81 | 82 | Args: 83 | weight_decay: The weight decay to use for regularizing the model. 84 | 85 | Returns: 86 | An `arg_scope` to use for the inception v3 model. 87 | """ 88 | with slim.arg_scope( 89 | [slim.conv2d, slim.fully_connected], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 92 | activation_fn=tf.nn.relu) as sc: 93 | return sc 94 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.md: -------------------------------------------------------------------------------- 1 | # MobileNet_v1 2 | 3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 4 | 5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature. 6 | 7 | ![alt text](mobilenet_v1.png "MobileNet Graph") 8 | 9 | # Pre-trained Models 10 | 11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the 12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) 13 | image classification dataset. Accuracies were computed by evaluating using a single image crop. 14 | 15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy | 16 | :----:|:------------:|:----------:|:-------:|:-------:| 17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5| 18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9| 19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5| 20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3| 21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2| 22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3| 23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1| 24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6| 25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4| 26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0| 27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5| 28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6| 29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0| 30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6| 31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7| 32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2| 33 | 34 | 35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint: 36 | 37 | ```shell 38 | $ CHECKPOINT_DIR=/tmp/checkpoints 39 | $ mkdir ${CHECKPOINT_DIR} 40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz 41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz 42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR} 43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz 44 | ``` 45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md). 46 | 47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 48 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/slim_nets/mobilenet_v1.png -------------------------------------------------------------------------------- /libs/networks/slim_nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from nets import alexnet 25 | from nets import cifarnet 26 | from nets import inception 27 | from nets import lenet 28 | from nets import mobilenet_v1 29 | from nets import overfeat 30 | from nets import resnet_v1 31 | from nets import resnet_v2 32 | from nets import vgg 33 | 34 | slim = tf.contrib.slim 35 | 36 | networks_map = {'alexnet_v2': alexnet.alexnet_v2, 37 | 'cifarnet': cifarnet.cifarnet, 38 | 'overfeat': overfeat.overfeat, 39 | 'vgg_a': vgg.vgg_a, 40 | 'vgg_16': vgg.vgg_16, 41 | 'vgg_19': vgg.vgg_19, 42 | 'inception_v1': inception.inception_v1, 43 | 'inception_v2': inception.inception_v2, 44 | 'inception_v3': inception.inception_v3, 45 | 'inception_v4': inception.inception_v4, 46 | 'inception_resnet_v2': inception.inception_resnet_v2, 47 | 'lenet': lenet.lenet, 48 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 49 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 50 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 51 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 52 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 53 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 54 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 55 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 56 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1, 57 | } 58 | 59 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, 60 | 'cifarnet': cifarnet.cifarnet_arg_scope, 61 | 'overfeat': overfeat.overfeat_arg_scope, 62 | 'vgg_a': vgg.vgg_arg_scope, 63 | 'vgg_16': vgg.vgg_arg_scope, 64 | 'vgg_19': vgg.vgg_arg_scope, 65 | 'inception_v1': inception.inception_v3_arg_scope, 66 | 'inception_v2': inception.inception_v3_arg_scope, 67 | 'inception_v3': inception.inception_v3_arg_scope, 68 | 'inception_v4': inception.inception_v4_arg_scope, 69 | 'inception_resnet_v2': 70 | inception.inception_resnet_v2_arg_scope, 71 | 'lenet': lenet.lenet_arg_scope, 72 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 73 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 74 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 75 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 76 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 77 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 78 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 79 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 80 | 'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope, 81 | } 82 | 83 | 84 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 85 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 86 | 87 | Args: 88 | name: The name of the network. 89 | num_classes: The number of classes to use for classification. 90 | weight_decay: The l2 coefficient for the model weights. 91 | is_training: `True` if the model is being used for training and `False` 92 | otherwise. 93 | 94 | Returns: 95 | network_fn: A function that applies the model to a batch of images. It has 96 | the following signature: 97 | logits, end_points = network_fn(images) 98 | Raises: 99 | ValueError: If network `name` is not recognized. 100 | """ 101 | if name not in networks_map: 102 | raise ValueError('Name of network unknown %s' % name) 103 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 104 | func = networks_map[name] 105 | @functools.wraps(func) 106 | def network_fn(images): 107 | with slim.arg_scope(arg_scope): 108 | return func(images, num_classes, is_training=is_training) 109 | if hasattr(func, 'default_image_size'): 110 | network_fn.default_image_size = func.default_image_size 111 | 112 | return network_fn 113 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from nets import nets_factory 25 | 26 | slim = tf.contrib.slim 27 | 28 | 29 | class NetworksTest(tf.test.TestCase): 30 | 31 | def testGetNetworkFn(self): 32 | batch_size = 5 33 | num_classes = 1000 34 | for net in nets_factory.networks_map: 35 | with self.test_session(): 36 | net_fn = nets_factory.get_network_fn(net, num_classes) 37 | # Most networks use 224 as their default_image_size 38 | image_size = getattr(net_fn, 'default_image_size', 224) 39 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 40 | logits, end_points = net_fn(inputs) 41 | self.assertTrue(isinstance(logits, tf.Tensor)) 42 | self.assertTrue(isinstance(end_points, dict)) 43 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 44 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 45 | 46 | def testGetNetworkFnArgScope(self): 47 | batch_size = 5 48 | num_classes = 10 49 | net = 'cifarnet' 50 | with self.test_session(use_gpu=True): 51 | net_fn = nets_factory.get_network_fn(net, num_classes) 52 | image_size = getattr(net_fn, 'default_image_size', 224) 53 | with slim.arg_scope([slim.model_variable, slim.variable], 54 | device='/CPU:0'): 55 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 56 | net_fn(inputs) 57 | weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0] 58 | self.assertDeviceEqual('/CPU:0', weights.device) 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer()): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat'): 56 | """Contains the model definition for the OverFeat network. 57 | 58 | The definition for the network was obtained from: 59 | OverFeat: Integrated Recognition, Localization and Detection using 60 | Convolutional Networks 61 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 62 | Yann LeCun, 2014 63 | http://arxiv.org/abs/1312.6229 64 | 65 | Note: All the fully_connected layers have been transformed to conv2d layers. 66 | To use in classification mode, resize input to 231x231. To use in fully 67 | convolutional mode, set spatial_squeeze to false. 68 | 69 | Args: 70 | inputs: a tensor of size [batch_size, height, width, channels]. 71 | num_classes: number of predicted classes. 72 | is_training: whether or not the model is being trained. 73 | dropout_keep_prob: the probability that activations are kept in the dropout 74 | layers during training. 75 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 76 | outputs. Useful to remove unnecessary dimensions for classification. 77 | scope: Optional scope for the variables. 78 | 79 | Returns: 80 | the last op containing the log predictions and end_points dict. 81 | 82 | """ 83 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 84 | end_points_collection = sc.name + '_end_points' 85 | # Collect outputs for conv2d, fully_connected and max_pool2d 86 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 87 | outputs_collections=end_points_collection): 88 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 89 | scope='conv1') 90 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 91 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 92 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 93 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 94 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 95 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 96 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 97 | with slim.arg_scope([slim.conv2d], 98 | weights_initializer=trunc_normal(0.005), 99 | biases_initializer=tf.constant_initializer(0.1)): 100 | # Use conv2d instead of fully_connected layers. 101 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 102 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 103 | scope='dropout6') 104 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 105 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 106 | scope='dropout7') 107 | net = slim.conv2d(net, num_classes, [1, 1], 108 | activation_fn=None, 109 | normalizer_fn=None, 110 | biases_initializer=tf.zeros_initializer(), 111 | scope='fc8') 112 | # Convert end_points_collection into a end_point dict. 113 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 114 | if spatial_squeeze: 115 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 116 | end_points[sc.name + '/fc8'] = net 117 | return net, end_points 118 | overfeat.default_image_size = 231 119 | -------------------------------------------------------------------------------- /libs/val_libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/val_libs/__init__.py -------------------------------------------------------------------------------- /output/trained_weights/README.md: -------------------------------------------------------------------------------- 1 | Please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/FPN_Tensorflow) by this project, then put it here. -------------------------------------------------------------------------------- /scalars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/scalars.png -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/tools/__init__.py -------------------------------------------------------------------------------- /tools/cocoval.py: -------------------------------------------------------------------------------- 1 | from data.lib_coco.PythonAPI.pycocotools.coco import COCO 2 | from data.lib_coco.PythonAPI.pycocotools.cocoeval import COCOeval 3 | 4 | 5 | def cocoval(detected_json, eval_json): 6 | eval_gt = COCO(eval_json) 7 | 8 | eval_dt = eval_gt.loadRes(detected_json) 9 | cocoEval = COCOeval(eval_gt, eval_dt, iouType='bbox') 10 | 11 | # cocoEval.params.imgIds = eval_gt.getImgIds() 12 | cocoEval.evaluate() 13 | cocoEval.accumulate() 14 | cocoEval.summarize() 15 | 16 | 17 | detected_json = '/home/yangxue/isilon/yangxue/code/ADAS/output/yangxue/fpn/fpn.res50.coco.roialign.2x.detectron.new.concat/eval_dump/epoch-2.coco' 18 | eval_gt = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/instances_minival2014.json' 19 | cocoval(detected_json, eval_gt) -------------------------------------------------------------------------------- /tools/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import os, sys 8 | import tensorflow as tf 9 | import time 10 | import cv2 11 | import argparse 12 | import numpy as np 13 | sys.path.append("../") 14 | 15 | from data.io.image_preprocess import short_side_resize_for_inference_data 16 | from libs.configs import cfgs 17 | from libs.networks import build_whole_network 18 | from libs.box_utils import draw_box_in_img 19 | from help_utils import tools 20 | 21 | 22 | def detect(det_net, inference_save_path, real_test_imgname_list): 23 | 24 | # 1. preprocess img 25 | img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR 26 | img_batch = tf.cast(img_plac, tf.float32) 27 | img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, 28 | target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, 29 | length_limitation=cfgs.IMG_MAX_LENGTH) 30 | 31 | if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: 32 | img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) 33 | else: 34 | img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) 35 | img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] 36 | 37 | detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( 38 | input_img_batch=img_batch, 39 | gtboxes_batch=None) 40 | 41 | init_op = tf.group( 42 | tf.global_variables_initializer(), 43 | tf.local_variables_initializer() 44 | ) 45 | 46 | restorer, restore_ckpt = det_net.get_restorer() 47 | 48 | config = tf.ConfigProto() 49 | config.gpu_options.allow_growth = True 50 | 51 | with tf.Session(config=config) as sess: 52 | sess.run(init_op) 53 | if not restorer is None: 54 | restorer.restore(sess, restore_ckpt) 55 | print('restore model') 56 | 57 | for i, a_img_name in enumerate(real_test_imgname_list): 58 | 59 | raw_img = cv2.imread(a_img_name) 60 | start = time.time() 61 | resized_img, detected_boxes, detected_scores, detected_categories = \ 62 | sess.run( 63 | [img_batch, detection_boxes, detection_scores, detection_category], 64 | feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB 65 | ) 66 | end = time.time() 67 | # print("{} cost time : {} ".format(img_name, (end - start))) 68 | 69 | show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD 70 | show_scores = detected_scores[show_indices] 71 | show_boxes = detected_boxes[show_indices] 72 | show_categories = detected_categories[show_indices] 73 | 74 | draw_img = np.squeeze(resized_img, 0) 75 | 76 | if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: 77 | draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 78 | else: 79 | draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) 80 | final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, 81 | boxes=show_boxes, 82 | labels=show_categories, 83 | scores=show_scores, 84 | in_graph=False) 85 | nake_name = a_img_name.split('/')[-1] 86 | # print (inference_save_path + '/' + nake_name) 87 | cv2.imwrite(inference_save_path + '/' + nake_name, 88 | final_detections[:, :, ::-1]) 89 | 90 | tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list)) 91 | 92 | 93 | def inference(test_dir, inference_save_path): 94 | 95 | test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir) 96 | if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))] 97 | assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \ 98 | ' Note that, we only support img format of (.jpg, .png, and .tiff) ' 99 | 100 | faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, 101 | is_training=False) 102 | detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list) 103 | 104 | 105 | def parse_args(): 106 | """ 107 | Parse input arguments 108 | """ 109 | parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir') 110 | parser.add_argument('--data_dir', dest='data_dir', 111 | help='data path', 112 | default='demos', type=str) 113 | parser.add_argument('--save_dir', dest='save_dir', 114 | help='demo imgs to save', 115 | default='inference_results', type=str) 116 | parser.add_argument('--GPU', dest='GPU', 117 | help='gpu id ', 118 | default='0', type=str) 119 | 120 | if len(sys.argv) == 1: 121 | parser.print_help() 122 | sys.exit(1) 123 | 124 | args = parser.parse_args() 125 | 126 | return args 127 | 128 | 129 | if __name__ == '__main__': 130 | 131 | args = parse_args() 132 | print('Called with args:') 133 | print(args) 134 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU 135 | inference(args.data_dir, 136 | inference_save_path=args.save_dir) 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | --------------------------------------------------------------------------------