├── .gitignore
├── LICENSE
├── README.md
├── comparison.png
├── data
    ├── __init__.py
    ├── io
    │   ├── BDD100K
    │   │   ├── BDD2VOC.py
    │   │   └── get_bdd100k_next_batch.py
    │   ├── COCO
    │   │   └── get_coco_next_batch.py
    │   ├── DOAI2019
    │   │   ├── __init__.py
    │   │   └── train_crop.py
    │   ├── DOTA
    │   │   ├── get_dota_next_batch.py
    │   │   ├── train_crop.py
    │   │   └── val_crop.py
    │   ├── __init__.py
    │   ├── convert_data_to_tfrecord.py
    │   ├── convert_data_to_tfrecord_coco.py
    │   ├── convert_data_to_tfrecord_voc2012.py
    │   ├── image_preprocess.py
    │   ├── image_preprocess_multi_gpu.py
    │   ├── image_preprocess_multi_gpu_aug.py
    │   ├── read_tfrecord.py
    │   ├── read_tfrecord_multi_gpu.py
    │   └── read_tfrecord_multi_gpu_aug.py
    ├── lib_coco
    │   ├── PythonAPI
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── pycocoDemo.ipynb
    │   │   ├── pycocoEvalDemo.ipynb
    │   │   ├── pycocotools
    │   │   │   ├── __init__.py
    │   │   │   ├── _mask.c
    │   │   │   ├── _mask.pyx
    │   │   │   ├── coco.py
    │   │   │   ├── cocoeval.py
    │   │   │   └── mask.py
    │   │   └── setup.py
    │   ├── __init__.py
    │   ├── common
    │   │   ├── gason.cpp
    │   │   ├── gason.h
    │   │   ├── maskApi.c
    │   │   └── maskApi.h
    │   └── get_coco_next_batch.py
    └── pretrained_weights
    │   ├── README.md
    │   └── mobilenet
    │       └── README.md
├── help_utils
    ├── __init__.py
    └── tools.py
├── images.png
├── libs
    ├── __init__.py
    ├── box_utils
    │   ├── __init__.py
    │   ├── anchor_utils.py
    │   ├── boxes_utils.py
    │   ├── coordinate_convert.py
    │   ├── cython_utils
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── bbox.c
    │   │   ├── bbox.pyx
    │   │   ├── nms.c
    │   │   ├── nms.pyx
    │   │   └── setup.py
    │   ├── draw_box_in_img.py
    │   ├── encode_and_decode.py
    │   ├── iou.py
    │   ├── nms.py
    │   ├── show_box_in_tensor.py
    │   └── tf_ops.py
    ├── configs
    │   ├── COCO
    │   │   ├── __init__.py
    │   │   ├── cfgs_res50_1x_coco_v1.py
    │   │   ├── cfgs_res50_1x_coco_v2.py
    │   │   ├── cfgs_res50_1x_coco_v3.py
    │   │   └── cfgs_res50_1x_coco_v4.py
    │   ├── __init__.py
    │   └── cfgs.py
    ├── detection_oprations
    │   ├── __init__.py
    │   ├── anchor_target_layer_without_boxweight.py
    │   ├── proposal_opr.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_target_layer_cascade.py
    │   └── proposal_target_layer_cascade_.py
    ├── export_pbs
    │   ├── __init__.py
    │   ├── exportPb.py
    │   ├── test_TensorRT.py
    │   └── test_exportPb.py
    ├── gluon2TF
    │   ├── .gitignore
    │   ├── README.md
    │   ├── mxnet_weights
    │   │   ├── mxnet_weights_namefile.py
    │   │   └── readme.txt
    │   └── resnet
    │   │   ├── __init__.py
    │   │   ├── download_mxnet_resnet_weights.py
    │   │   ├── parse_mxnet_weights.py
    │   │   ├── resnet.py
    │   │   ├── resnet_utils.py
    │   │   ├── resnet_utils_NCHW.py
    │   │   ├── some_test.py
    │   │   ├── test_resnet.py
    │   │   └── weights_map.py
    ├── label_name_dict
    │   ├── __init__.py
    │   ├── coco_dict.py
    │   ├── label_dict.py
    │   └── remote_sensing_dict.py
    ├── losses
    │   ├── __init__.py
    │   ├── losses.py
    │   └── losses_cascade.py
    ├── networks
    │   ├── __init__.py
    │   ├── build_whole_network.py
    │   ├── build_whole_network_cascade.py
    │   ├── layer.py
    │   ├── mobilenet
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── conv_blocks.py
    │   │   ├── mobilenet.py
    │   │   ├── mobilenet_v2.py
    │   │   └── mobilenet_v2_test.py
    │   ├── mobilenet_v2.py
    │   ├── ops.py
    │   ├── resnet.py
    │   ├── resnet_gluoncv.py
    │   └── slim_nets
    │   │   ├── __init__.py
    │   │   ├── alexnet.py
    │   │   ├── alexnet_test.py
    │   │   ├── cifarnet.py
    │   │   ├── inception.py
    │   │   ├── inception_resnet_v2.py
    │   │   ├── inception_resnet_v2_test.py
    │   │   ├── inception_utils.py
    │   │   ├── inception_v1.py
    │   │   ├── inception_v1_test.py
    │   │   ├── inception_v2.py
    │   │   ├── inception_v2_test.py
    │   │   ├── inception_v3.py
    │   │   ├── inception_v3_test.py
    │   │   ├── inception_v4.py
    │   │   ├── inception_v4_test.py
    │   │   ├── lenet.py
    │   │   ├── mobilenet_v1.md
    │   │   ├── mobilenet_v1.png
    │   │   ├── mobilenet_v1.py
    │   │   ├── mobilenet_v1_test.py
    │   │   ├── nets_factory.py
    │   │   ├── nets_factory_test.py
    │   │   ├── overfeat.py
    │   │   ├── overfeat_test.py
    │   │   ├── resnet_utils.py
    │   │   ├── resnet_v1.py
    │   │   ├── resnet_v1_test.py
    │   │   ├── resnet_v2.py
    │   │   ├── resnet_v2_test.py
    │   │   ├── vgg.py
    │   │   └── vgg_test.py
    ├── setup.py
    └── val_libs
    │   ├── __init__.py
    │   └── voc_eval.py
├── output
    └── trained_weights
    │   └── README.md
├── scalars.png
└── tools
    ├── __init__.py
    ├── cocoval.py
    ├── demo.py
    ├── eval.py
    ├── eval_bdd.py
    ├── eval_coco.py
    ├── eval_coco_pyramid.py
    ├── eval_voc2012.py
    ├── inference.py
    ├── inference_for_coco.py
    ├── multi_gpu_train.py
    ├── multi_gpu_train_aug.py
    ├── multi_gpu_train_cascade.py
    ├── multi_gpu_train_warmup_cosine.py
    ├── test.py
    ├── test_coco.py
    ├── test_coco_pyramid.py
    ├── test_pyramid_dota.py
    ├── train.py
    ├── train_for_coco.py
    └── train_with_placeholder.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | .pyc
104 | .so
105 | *.data-00000-of-00001
106 | *.index
107 | *.meta
108 | events.*
109 | checkpoint
110 | .idea/
111 | __pycache__/
112 | *.json
113 | *.zip
114 | 
115 | */tools/demos/*
116 | */output/*
117 | */data/pretrained_weights/*
118 | */data/tfrecord/*
119 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 DetectionTeamUCAS
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Cascade R-CNN: Delving into High Quality Object Detection 
 2 | 
 3 | ## Abstract     
 4 | This repo is based on [FPN](https://github.com/DetectionTeamUCAS/FPN_Tensorflow), and completed by [YangXue](https://github.com/yangxue0827).     
 5 | 
 6 | ## Train on COCO train2017 and test on COCO val2017 (coco minival).   
 7 | |Model|Backbone|Train Schedule|GPU|Image/GPU|FP16|Box AP(Mask AP)|test stage|
 8 | |-----|--------|--------------|---|---------|----|---------------|---|
 9 | |Faster (paper)|R50v1-FPN|1X|8X TITAN XP|1|no|38.3|3|
10 | |Faster (ours)|R50v1-FPN|1X|8X 2080 Ti|1|no|38.2|3|
11 | |Faster (Face++)|R50v1-FPN|1X|8X 2080 Ti|2|no|39.1|3|
12 | 
13 | ![2](comparison.png)
14 | 
15 | ## My Development Environment
16 | 1、python3.5 (anaconda recommend)             
17 | 2、cuda9.0 **(If you want to use cuda8, please set CUDA9 = False in the cfgs.py file.)**                    
18 | 3、[opencv(cv2)](https://pypi.org/project/opencv-python/)    
19 | 4、[tfplot](https://github.com/wookayin/tensorflow-plot)             
20 | 5、tensorflow == 1.12                   
21 | 
22 | ## Download Model
23 | ### Pretrain weights
24 | 1、Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights.       
25 | 2、Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). [Pretrain Model Link](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9.
26 | 
27 | ### Trained weights
28 | **Select a configuration file in the folder ($PATH_ROOT/libs/configs/) and copy its contents into cfgs.py, then download the corresponding [weights](https://github.com/DetectionTeamUCAS/Models/tree/master/Cascade_FPN_Tensorflow).**      
29 | 
30 | ## Compile
31 | ```  
32 | cd $PATH_ROOT/libs/box_utils/cython_utils
33 | python setup.py build_ext --inplace
34 | ```
35 | 
36 | ## Train
37 | 
38 | 1、If you want to train your own data, please note:  
39 | ```     
40 | (1) Modify parameters (such as CLASS_NUM, DATASET_NAME, VERSION, etc.) in $PATH_ROOT/libs/configs/cfgs.py
41 | (2) Add category information in $PATH_ROOT/libs/label_name_dict/lable_dict.py     
42 | (3) Add data_name to $PATH_ROOT/data/io/read_tfrecord_multi_gpu.py 
43 | ```     
44 | 
45 | 2、make tfrecord
46 | ```  
47 | cd $PATH_ROOT/data/io/  
48 | python convert_data_to_tfrecord_coco.py --VOC_dir='/PATH/TO/JSON/FILE/' 
49 |                                         --save_name='train' 
50 |                                         --dataset='coco'
51 | ```     
52 | 
53 | 3、multi-gpu train
54 | ```  
55 | cd $PATH_ROOT/tools
56 | python multi_gpu_train.py
57 | ```
58 | 
59 | ## Eval
60 | ```  
61 | cd $PATH_ROOT/tools
62 | python eval_coco.py --eval_data='/PATH/TO/IMAGES/'  
63 |                     --eval_gt='/PATH/TO/TEST/ANNOTATION/'
64 |                     --GPU='0'
65 | ```
66 | 
67 | ## Tensorboard
68 | ```  
69 | cd $PATH_ROOT/output/summary
70 | tensorboard --logdir=.
71 | ``` 
72 | ![3](images.png)
73 | 
74 | ![4](scalars.png)
75 | 
76 | ## Reference
77 | 1、https://github.com/endernewton/tf-faster-rcnn    
78 | 2、https://github.com/zengarden/light_head_rcnn     
79 | 3、https://github.com/tensorflow/models/tree/master/research/object_detection        
80 | 4、https://github.com/CharlesShang/FastMaskRCNN       
81 | 5、https://github.com/matterport/Mask_RCNN      
82 | 6、https://github.com/msracver/Deformable-ConvNets         
83 | 7、https://github.com/tensorpack/tensorpack       
84 | 


--------------------------------------------------------------------------------
/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/comparison.png


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/__init__.py


--------------------------------------------------------------------------------
/data/io/BDD100K/BDD2VOC.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import cv2
  4 | from xml.dom.minidom import Document
  5 | import xml.dom.minidom
  6 | 
  7 | label_map = {'bus': 1, 'traffic light': 2, 'traffic sign': 3, 'person': 4, 'bike': 5,
  8 |              'truck': 6, 'motor': 7, 'car': 8, 'train': 9, 'rider': 10}
  9 | FLAG = ['train', 'val']
 10 | 
 11 | 
 12 | def write_xml(save_path, name, box_list, label_list, w, h, d):
 13 | 
 14 |     # dict_box[filename]=json_dict[filename]
 15 |     doc = xml.dom.minidom.Document()
 16 |     root = doc.createElement('annotation')
 17 |     doc.appendChild(root)
 18 | 
 19 |     foldername = doc.createElement("folder")
 20 |     foldername.appendChild(doc.createTextNode("JPEGImages"))
 21 |     root.appendChild(foldername)
 22 | 
 23 |     nodeFilename = doc.createElement('filename')
 24 |     nodeFilename.appendChild(doc.createTextNode(name))
 25 |     root.appendChild(nodeFilename)
 26 | 
 27 |     pathname = doc.createElement("path")
 28 |     pathname.appendChild(doc.createTextNode("xxxx"))
 29 |     root.appendChild(pathname)
 30 | 
 31 |     sourcename=doc.createElement("source")
 32 | 
 33 |     databasename = doc.createElement("database")
 34 |     databasename.appendChild(doc.createTextNode("Unknown"))
 35 |     sourcename.appendChild(databasename)
 36 | 
 37 |     annotationname = doc.createElement("annotation")
 38 |     annotationname.appendChild(doc.createTextNode("xxx"))
 39 |     sourcename.appendChild(annotationname)
 40 | 
 41 |     imagename = doc.createElement("image")
 42 |     imagename.appendChild(doc.createTextNode("xxx"))
 43 |     sourcename.appendChild(imagename)
 44 | 
 45 |     flickridname = doc.createElement("flickrid")
 46 |     flickridname.appendChild(doc.createTextNode("0"))
 47 |     sourcename.appendChild(flickridname)
 48 | 
 49 |     root.appendChild(sourcename)
 50 | 
 51 |     nodesize = doc.createElement('size')
 52 |     nodewidth = doc.createElement('width')
 53 |     nodewidth.appendChild(doc.createTextNode(str(w)))
 54 |     nodesize.appendChild(nodewidth)
 55 |     nodeheight = doc.createElement('height')
 56 |     nodeheight.appendChild(doc.createTextNode(str(h)))
 57 |     nodesize.appendChild(nodeheight)
 58 |     nodedepth = doc.createElement('depth')
 59 |     nodedepth.appendChild(doc.createTextNode(str(d)))
 60 |     nodesize.appendChild(nodedepth)
 61 |     root.appendChild(nodesize)
 62 | 
 63 |     segname = doc.createElement("segmented")
 64 |     segname.appendChild(doc.createTextNode("0"))
 65 |     root.appendChild(segname)
 66 | 
 67 |     for (box, label) in zip(box_list, label_list):
 68 | 
 69 |         nodeobject = doc.createElement('object')
 70 |         nodename = doc.createElement('name')
 71 |         nodename.appendChild(doc.createTextNode(str(label)))
 72 |         nodeobject.appendChild(nodename)
 73 |         nodebndbox = doc.createElement('bndbox')
 74 |         nodex1 = doc.createElement('x1')
 75 |         nodex1.appendChild(doc.createTextNode(str(box[0])))
 76 |         nodebndbox.appendChild(nodex1)
 77 |         nodey1 = doc.createElement('y1')
 78 |         nodey1.appendChild(doc.createTextNode(str(box[1])))
 79 |         nodebndbox.appendChild(nodey1)
 80 |         nodex2 = doc.createElement('x2')
 81 |         nodex2.appendChild(doc.createTextNode(str(box[2])))
 82 |         nodebndbox.appendChild(nodex2)
 83 |         nodey2 = doc.createElement('y2')
 84 |         nodey2.appendChild(doc.createTextNode(str(box[3])))
 85 |         nodebndbox.appendChild(nodey2)
 86 | 
 87 |         nodeobject.appendChild(nodebndbox)
 88 |         root.appendChild(nodeobject)
 89 |     fp = open(save_path, 'w')
 90 |     doc.writexml(fp, indent='\n')
 91 |     fp.close()
 92 | 
 93 | 
 94 | for flag in FLAG:
 95 |     BDD_path = '/unsullied/sharefs/_research_detection/GeneralDetection/BDD100K/bdd100k/'
 96 |     BDD_labels_dir = os.path.join(BDD_path, 'labels/bdd100k_labels_images_{}.json'.format(flag))
 97 |     BDD_labels = json.load(open(BDD_labels_dir, 'r'))
 98 |     BDD_images_dir = os.path.join(BDD_path, 'images/100k/{}'.format(flag))
 99 | 
100 |     for cnt, bdd in enumerate(BDD_labels):
101 |         img_name = bdd['name']
102 |         img_path = os.path.join(BDD_images_dir, img_name)
103 |         # img = cv2.imread(img_path)
104 |         # h, w, d = img.shape
105 |         h, w, d = 720, 1280, 3
106 |         bdd_boxes = bdd['labels']
107 |         box_list, label_list = [], []
108 |         for bb in bdd_boxes:
109 |             if bb['category'] not in label_map.keys():
110 |                 continue
111 |             box = bb['box2d']
112 |             box_list.append([round(box['x1']), round(box['y1']),
113 |                              round(box['x2']), round(box['y2'])])
114 |             label_list.append(bb['category'])
115 | 
116 |         if len(box_list) != 0:
117 |             save_path = os.path.join('/unsullied/sharefs/yangxue/isilon/yangxue/data/BDD100K/BDD100K_VOC/bdd100k_{}/Annotations'.format(flag),
118 |                                      img_name.replace('.jpg', '.xml'))
119 |             write_xml(save_path, img_name, box_list, label_list, w, h, d)
120 |         if cnt % 100 == 0:
121 |             print('{} process: {}/{}'.format(flag, cnt+1, len(BDD_labels)))
122 | print('Finish!')
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/data/io/BDD100K/get_bdd100k_next_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import xml.etree.cElementTree as ET
 6 | import cv2
 7 | import numpy as np
 8 | import os
 9 | from libs.label_name_dict import coco_dict
10 | from libs.label_name_dict.label_dict import *
11 | 
12 | 
13 | root_path = '/unsullied/sharefs/yangxue/isilon/yangxue/data/BDD100K/BDD100K_VOC/bdd100k_train/'
14 | xmls = os.listdir(os.path.join(root_path, 'Annotations'))
15 | total_imgs = len(xmls)
16 | 
17 | # print (NAME_LABEL_DICT)
18 | 
19 | 
20 | def read_xml_gtbox_and_label(xml_path):
21 |     """
22 |     :param xml_path: the path of voc xml
23 |     :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5],
24 |            and has [xmin, ymin, xmax, ymax, label] in a per row
25 |     """
26 | 
27 |     tree = ET.parse(xml_path)
28 |     root = tree.getroot()
29 |     img_width = None
30 |     img_height = None
31 |     box_list = []
32 |     for child_of_root in root:
33 |         # if child_of_root.tag == 'filename':
34 |         #     assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
35 |         #                                  + FLAGS.img_format, 'xml_name and img_name cannot match'
36 | 
37 |         if child_of_root.tag == 'size':
38 |             for child_item in child_of_root:
39 |                 if child_item.tag == 'width':
40 |                     img_width = int(child_item.text)
41 |                 if child_item.tag == 'height':
42 |                     img_height = int(child_item.text)
43 | 
44 |         if child_of_root.tag == 'object':
45 |             label = None
46 |             for child_item in child_of_root:
47 |                 if child_item.tag == 'name':
48 |                     label = NAME_LABEL_MAP[child_item.text]
49 |                 if child_item.tag == 'bndbox':
50 |                     tmp_box = []
51 |                     for node in child_item:
52 |                         tmp_box.append(int(node.text))
53 |                     assert label is not None, 'label is none, error'
54 |                     tmp_box.append(label)
55 |                     box_list.append(tmp_box)
56 | 
57 |     gtbox_label = np.array(box_list, dtype=np.int32)
58 | 
59 |     return img_height, img_width, gtbox_label
60 | 
61 | 
62 | def next_img(step):
63 | 
64 |     if step % total_imgs == 0:
65 |         np.random.shuffle(xmls)
66 |     xml_name = xmls[step % total_imgs]
67 |     img_name = xml_name.replace('.xml', '.jpg')
68 | 
69 |     img = cv2.imread(os.path.join(root_path, 'train', img_name))
70 | 
71 |     img_height, img_width, gtbox_label = read_xml_gtbox_and_label(os.path.join(root_path, 'Annotations', xml_name))
72 | 
73 |     gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32)
74 |     if gtbox_and_label_list.shape[0] == 0:
75 |         return next_img(step+1)
76 |     else:
77 |         return img_name, img[:, :, ::-1], gtbox_and_label_list
78 | 
79 | 
80 | if __name__ == '__main__':
81 | 
82 |     imgid, img,  gtbox = next_img(3234)
83 | 
84 |     print("::")
85 |     from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores
86 | 
87 |     img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1],
88 |                                            scores=np.ones(shape=(len(gtbox), )))
89 |     print ("_----")
90 | 
91 | 
92 |     cv2.imshow("test", img)
93 |     cv2.waitKey(0)
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/data/io/COCO/get_coco_next_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import xml.etree.cElementTree as ET
 6 | import cv2
 7 | import numpy as np
 8 | import json
 9 | import os
10 | from libs.label_name_dict import coco_dict
11 | from libs.label_name_dict.label_dict import *
12 | 
13 | 
14 | coco_trainvalmini = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/odformat/coco_trainvalmini.odgt'
15 | 
16 | 
17 | def next_img(step):
18 |     with open(coco_trainvalmini) as f:
19 |         files = f.readlines()
20 | 
21 |     total_imgs = len(files)
22 |     if step % total_imgs == 0:
23 |         np.random.shuffle(files)
24 | 
25 |     raw_line = files[step % total_imgs]
26 |     file = json.loads(raw_line)
27 |     img_name = file['ID']
28 |     # img_height, img_width = file['height'], file['width']
29 | 
30 |     img = cv2.imread(file['fpath'])
31 | 
32 |     gtboxes = file['gtboxes']
33 | 
34 |     gtbox_label = []
35 |     for gt in gtboxes:
36 |         box = gt['box']
37 |         label = gt['tag']
38 |         gtbox_label.append([box[0], box[1], box[0]+box[2], box[1]+box[3], NAME_LABEL_MAP[label]])
39 | 
40 |     gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32)
41 |     if gtbox_and_label_list.shape[0] == 0:
42 |         return next_img(step+1)
43 |     else:
44 |         return img_name, img[:, :, ::-1], gtbox_and_label_list
45 | 
46 | 
47 | if __name__ == '__main__':
48 | 
49 |     imgid, img,  gtbox = next_img(3234)
50 | 
51 |     print("::")
52 |     from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores
53 | 
54 |     img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1],
55 |                                            scores=np.ones(shape=(len(gtbox), )))
56 |     print("_----")
57 | 
58 |     cv2.imwrite("test.jpg", img)
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/data/io/DOAI2019/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/io/DOAI2019/__init__.py


--------------------------------------------------------------------------------
/data/io/DOTA/get_dota_next_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import xml.etree.cElementTree as ET
 6 | import cv2
 7 | import numpy as np
 8 | import os
 9 | from libs.label_name_dict import coco_dict
10 | from libs.label_name_dict.label_dict import *
11 | 
12 | 
13 | root_path = '/unsullied/sharefs/yangxue/isilon/yangxue/data/DOTA/DOTA_TOTAL/'
14 | xmls = os.listdir(os.path.join(root_path, 'xml_h'))
15 | total_imgs = len(xmls)
16 | 
17 | # print (NAME_LABEL_DICT)
18 | 
19 | 
20 | def read_xml_gtbox_and_label(xml_path):
21 |     """
22 |     :param xml_path: the path of voc xml
23 |     :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5],
24 |            and has [xmin, ymin, xmax, ymax, label] in a per row
25 |     """
26 | 
27 |     tree = ET.parse(xml_path)
28 |     root = tree.getroot()
29 |     img_width = None
30 |     img_height = None
31 |     box_list = []
32 |     for child_of_root in root:
33 |         # if child_of_root.tag == 'filename':
34 |         #     assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
35 |         #                                  + FLAGS.img_format, 'xml_name and img_name cannot match'
36 | 
37 |         if child_of_root.tag == 'size':
38 |             for child_item in child_of_root:
39 |                 if child_item.tag == 'width':
40 |                     img_width = int(child_item.text)
41 |                 if child_item.tag == 'height':
42 |                     img_height = int(child_item.text)
43 | 
44 |         if child_of_root.tag == 'object':
45 |             label = None
46 |             for child_item in child_of_root:
47 |                 if child_item.tag == 'name':
48 |                     label = NAME_LABEL_MAP[child_item.text]
49 |                 if child_item.tag == 'bndbox':
50 |                     tmp_box = []
51 |                     for node in child_item:
52 |                         tmp_box.append(int(node.text))
53 |                     assert label is not None, 'label is none, error'
54 |                     tmp_box.append(label)
55 |                     box_list.append(tmp_box)
56 | 
57 |     gtbox_label = np.array(box_list, dtype=np.int32)
58 | 
59 |     return img_height, img_width, gtbox_label
60 | 
61 | 
62 | def next_img(step):
63 | 
64 |     if step % total_imgs == 0:
65 |         np.random.shuffle(xmls)
66 |     xml_name = xmls[step % total_imgs]
67 |     img_name = xml_name.replace('.xml', '.jpg')
68 | 
69 |     img = cv2.imread(os.path.join(root_path, 'img', img_name))
70 | 
71 |     img_height, img_width, gtbox_label = read_xml_gtbox_and_label(os.path.join(root_path, 'xml_h', xml_name))
72 | 
73 |     gtbox_and_label_list = np.array(gtbox_label, dtype=np.int32)
74 |     if gtbox_and_label_list.shape[0] == 0:
75 |         return next_img(step+1)
76 |     else:
77 |         return img_name, img[:, :, ::-1], gtbox_and_label_list
78 | 
79 | 
80 | if __name__ == '__main__':
81 | 
82 |     imgid, img,  gtbox = next_img(3234)
83 | 
84 |     print("::")
85 |     from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores
86 | 
87 |     img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1],
88 |                                            scores=np.ones(shape=(len(gtbox), )))
89 |     print ("_----")
90 | 
91 | 
92 |     cv2.imshow("test", img)
93 |     cv2.waitKey(0)
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/data/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/io/__init__.py


--------------------------------------------------------------------------------
/data/io/convert_data_to_tfrecord.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import sys
  4 | sys.path.append('../../')
  5 | import xml.etree.cElementTree as ET
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import glob
  9 | import cv2
 10 | from libs.label_name_dict.label_dict import *
 11 | from help_utils.tools import *
 12 | 
 13 | tf.app.flags.DEFINE_string('VOC_dir', '/data/DOTA/DOTA_TOTAL/', 'Voc dir')
 14 | tf.app.flags.DEFINE_string('xml_dir', 'xml', 'xml dir')
 15 | tf.app.flags.DEFINE_string('image_dir', 'img', 'image dir')
 16 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name')
 17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
 18 | tf.app.flags.DEFINE_string('img_format', '.png', 'format of image')
 19 | tf.app.flags.DEFINE_string('dataset', 'DOAI2019', 'dataset')
 20 | FLAGS = tf.app.flags.FLAGS
 21 | 
 22 | 
 23 | def _int64_feature(value):
 24 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 25 | 
 26 | 
 27 | def _bytes_feature(value):
 28 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 29 | 
 30 | 
 31 | def read_xml_gtbox_and_label(xml_path):
 32 |     """
 33 |     :param xml_path: the path of voc xml
 34 |     :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5],
 35 |            and has [xmin, ymin, xmax, ymax, label] in a per row
 36 |     """
 37 | 
 38 |     tree = ET.parse(xml_path)
 39 |     root = tree.getroot()
 40 |     img_width = None
 41 |     img_height = None
 42 |     box_list = []
 43 |     for child_of_root in root:
 44 |         # if child_of_root.tag == 'filename':
 45 |         #     assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
 46 |         #                                  + FLAGS.img_format, 'xml_name and img_name cannot match'
 47 | 
 48 |         if child_of_root.tag == 'size':
 49 |             for child_item in child_of_root:
 50 |                 if child_item.tag == 'width':
 51 |                     img_width = int(child_item.text)
 52 |                 if child_item.tag == 'height':
 53 |                     img_height = int(child_item.text)
 54 | 
 55 |         if child_of_root.tag == 'object':
 56 |             label = None
 57 |             for child_item in child_of_root:
 58 |                 if child_item.tag == 'name':
 59 |                     label = NAME_LABEL_MAP[child_item.text]
 60 |                 if child_item.tag == 'bndbox':
 61 |                     tmp_box = []
 62 |                     for node in child_item:
 63 |                         tmp_box.append(int(node.text))
 64 |                     assert label is not None, 'label is none, error'
 65 |                     tmp_box.append(label)
 66 |                     box_list.append(tmp_box)
 67 | 
 68 |     gtbox_label = np.array(box_list, dtype=np.int32)
 69 | 
 70 |     return img_height, img_width, gtbox_label
 71 | 
 72 | 
 73 | def convert_pascal_to_tfrecord():
 74 |     xml_path = FLAGS.VOC_dir + FLAGS.xml_dir
 75 |     image_path = FLAGS.VOC_dir + FLAGS.image_dir
 76 |     save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
 77 |     mkdir(FLAGS.save_dir)
 78 | 
 79 |     # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
 80 |     # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
 81 |     writer = tf.python_io.TFRecordWriter(path=save_path)
 82 |     for count, xml in enumerate(glob.glob(xml_path + '/*.xml')):
 83 |         # to avoid path error in different development platform
 84 |         xml = xml.replace('\\', '/')
 85 | 
 86 |         img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
 87 |         img_path = image_path + '/' + img_name
 88 | 
 89 |         if not os.path.exists(img_path):
 90 |             print('{} is not exist!'.format(img_path))
 91 |             continue
 92 | 
 93 |         img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)
 94 | 
 95 |         # img = np.array(Image.open(img_path))
 96 |         img = cv2.imread(img_path)[:, :, ::-1]
 97 | 
 98 |         feature = tf.train.Features(feature={
 99 |             # do not need encode() in linux
100 |             'img_name': _bytes_feature(img_name.encode()),
101 |             # 'img_name': _bytes_feature(img_name),
102 |             'img_height': _int64_feature(img_height),
103 |             'img_width': _int64_feature(img_width),
104 |             'img': _bytes_feature(img.tostring()),
105 |             'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
106 |             'num_objects': _int64_feature(gtbox_label.shape[0])
107 |         })
108 | 
109 |         example = tf.train.Example(features=feature)
110 | 
111 |         writer.write(example.SerializeToString())
112 | 
113 |         view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml')))
114 | 
115 |     print('\nConversion is complete!')
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
120 |     # read_xml_gtbox_and_label(xml_path)
121 | 
122 |     convert_pascal_to_tfrecord()
123 | 


--------------------------------------------------------------------------------
/data/io/convert_data_to_tfrecord_coco.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import sys
  4 | sys.path.append('../../')
  5 | import xml.etree.cElementTree as ET
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import glob
  9 | import cv2
 10 | import json
 11 | from libs.label_name_dict.label_dict import *
 12 | from help_utils.tools import *
 13 | 
 14 | tf.app.flags.DEFINE_string('coco_dir', '/data/COCO/coco_trainvalmini.odgt', 'coco dir')
 15 | tf.app.flags.DEFINE_string('save_name', 'train', 'save name')
 16 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
 17 | tf.app.flags.DEFINE_string('dataset', 'coco', 'dataset')
 18 | FLAGS = tf.app.flags.FLAGS
 19 | 
 20 | 
 21 | def _int64_feature(value):
 22 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 23 | 
 24 | 
 25 | def _bytes_feature(value):
 26 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 27 | 
 28 | 
 29 | def convert_pascal_to_tfrecord(coco_trainvalmini):
 30 |     save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
 31 |     mkdir(FLAGS.save_dir)
 32 | 
 33 |     # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
 34 |     # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
 35 |     writer = tf.python_io.TFRecordWriter(path=save_path)
 36 | 
 37 |     with open(coco_trainvalmini) as f:
 38 |         files = f.readlines()
 39 | 
 40 |     img_count = 0
 41 |     gt_count = 0
 42 | 
 43 |     for count, raw_line in enumerate(files):
 44 |         file = json.loads(raw_line)
 45 |         img_path = os.path.join('/data/COCO/train2017', file['fpath'].split('_')[-1])
 46 |         img_name = file['ID']
 47 | 
 48 |         if not os.path.exists(img_path):
 49 |             # print('{} is not exist!'.format(img_path))
 50 |             img_count += 1
 51 |             continue
 52 |         # img = np.array(Image.open(img_path))
 53 |         img = cv2.imread(img_path)[:, :, ::-1]
 54 | 
 55 |         if img is None:
 56 |             continue
 57 | 
 58 |         gtboxes = file['gtboxes']
 59 |         img_height = file['height']
 60 |         img_width = file['width']
 61 | 
 62 |         if len(gtboxes) == 0:
 63 |             # print('{}: gt is not exist!'.format(img_path))
 64 |             gt_count += 1
 65 |             continue
 66 | 
 67 |         gtbox_label = []
 68 |         for gt in gtboxes:
 69 |             box = gt['box']
 70 |             label = gt['tag']
 71 |             gtbox_label.append([box[0], box[1], box[0]+box[2], box[1]+box[3], NAME_LABEL_MAP[label]])
 72 | 
 73 |         gtbox_label = np.array(gtbox_label, np.int32)
 74 | 
 75 |         feature = tf.train.Features(feature={
 76 |             # do not need encode() in linux
 77 |             'img_name': _bytes_feature(img_name.encode()),
 78 |             # 'img_name': _bytes_feature(img_name),
 79 |             'img_height': _int64_feature(img_height),
 80 |             'img_width': _int64_feature(img_width),
 81 |             'img': _bytes_feature(img.tostring()),
 82 |             'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
 83 |             'num_objects': _int64_feature(gtbox_label.shape[0])
 84 |         })
 85 | 
 86 |         example = tf.train.Example(features=feature)
 87 | 
 88 |         writer.write(example.SerializeToString())
 89 | 
 90 |         view_bar('Conversion progress', count + 1, len(files))
 91 | 
 92 |     print('{} images not exist!'.format(img_count))
 93 |     print('{} gts not exist!'.format(gt_count))
 94 |     print('\nConversion is complete!')
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
 99 |     # read_xml_gtbox_and_label(xml_path)
100 | 
101 |     # coco_path = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/odformat/coco_trainvalmini.odgt'
102 |     # convert_pascal_to_tfrecord(coco_path)
103 |     convert_pascal_to_tfrecord(FLAGS.coco_dir)
104 | 


--------------------------------------------------------------------------------
/data/io/convert_data_to_tfrecord_voc2012.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import sys
  4 | sys.path.append('../../')
  5 | import xml.etree.cElementTree as ET
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import glob
  9 | import cv2
 10 | from libs.label_name_dict.label_dict import *
 11 | from help_utils.tools import *
 12 | 
 13 | tf.app.flags.DEFINE_string('VOC_dir', '/unsullied/sharefs/yangxue/isilon/yangxue/data/VOC2012/VOCdevkit/VOC2012/', 'Voc dir')
 14 | tf.app.flags.DEFINE_string('xml_dir', 'Annotations', 'xml dir')
 15 | tf.app.flags.DEFINE_string('image_dir', 'JPEGImages', 'image dir')
 16 | tf.app.flags.DEFINE_string('save_name', 'train2012', 'save name')
 17 | tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
 18 | tf.app.flags.DEFINE_string('img_format', '.jpg', 'format of image')
 19 | tf.app.flags.DEFINE_string('dataset', 'pascal', 'dataset')
 20 | FLAGS = tf.app.flags.FLAGS
 21 | 
 22 | 
 23 | def _int64_feature(value):
 24 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 25 | 
 26 | 
 27 | def _bytes_feature(value):
 28 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 29 | 
 30 | 
 31 | def read_xml_gtbox_and_label(xml_path):
 32 |     """
 33 |     :param xml_path: the path of voc xml
 34 |     :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5],
 35 |            and has [xmin, ymin, xmax, ymax, label] in a per row
 36 |     """
 37 | 
 38 |     tree = ET.parse(xml_path)
 39 |     root = tree.getroot()
 40 |     img_width = None
 41 |     img_height = None
 42 |     box_list = []
 43 |     for child_of_root in root:
 44 |         # if child_of_root.tag == 'filename':
 45 |         #     assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
 46 |         #                                  + FLAGS.img_format, 'xml_name and img_name cannot match'
 47 | 
 48 |         if child_of_root.tag == 'size':
 49 |             for child_item in child_of_root:
 50 |                 if child_item.tag == 'width':
 51 |                     img_width = int(child_item.text)
 52 |                 if child_item.tag == 'height':
 53 |                     img_height = int(child_item.text)
 54 | 
 55 |         if child_of_root.tag == 'object':
 56 |             label = None
 57 |             for child_item in child_of_root:
 58 |                 if child_item.tag == 'name':
 59 |                     label = NAME_LABEL_MAP[child_item.text]
 60 |                 if child_item.tag == 'bndbox':
 61 |                     tmp_box = [0, 0, 0, 0]
 62 |                     for node in child_item:
 63 |                         if node.tag == 'xmin':
 64 |                             tmp_box[0] = int(node.text)
 65 |                         if node.tag == 'ymin':
 66 |                             tmp_box[1] = int(node.text)
 67 |                         if node.tag == 'xmax':
 68 |                             tmp_box[2] = int(node.text)
 69 |                         if node.tag == 'ymax':
 70 |                             tmp_box[3] = int(node.text)
 71 |                     assert label is not None, 'label is none, error'
 72 |                     tmp_box.append(label)
 73 |                     box_list.append(tmp_box)
 74 | 
 75 |     gtbox_label = np.array(box_list, dtype=np.int32)
 76 | 
 77 |     return img_height, img_width, gtbox_label
 78 | 
 79 | 
 80 | def convert_pascal_to_tfrecord():
 81 |     xml_path = FLAGS.VOC_dir + FLAGS.xml_dir
 82 |     image_path = FLAGS.VOC_dir + FLAGS.image_dir
 83 |     save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
 84 |     mkdir(FLAGS.save_dir)
 85 | 
 86 |     # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
 87 |     # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
 88 |     writer = tf.python_io.TFRecordWriter(path=save_path)
 89 | 
 90 |     fr = open('/unsullied/sharefs/yangxue/isilon/yangxue/data/VOC2012/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt', 'r')
 91 |     lines = fr.readlines()
 92 | 
 93 |     real_cnt = 0
 94 | 
 95 |     for count, xml in enumerate(glob.glob(xml_path + '/*.xml')):
 96 |         # to avoid path error in different development platform
 97 |         xml = xml.replace('\\', '/')
 98 | 
 99 |         tmp = xml.split('/')[-1].split('.')[0] + "\n"
100 |         if tmp not in lines:
101 |             continue
102 | 
103 |         img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
104 |         img_path = image_path + '/' + img_name
105 | 
106 |         if not os.path.exists(img_path):
107 |             print('{} is not exist!'.format(img_path))
108 |             continue
109 | 
110 |         img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)
111 | 
112 |         # img = np.array(Image.open(img_path))
113 |         img = cv2.imread(img_path)[:, :, ::-1]
114 | 
115 |         feature = tf.train.Features(feature={
116 |             # do not need encode() in linux
117 |             'img_name': _bytes_feature(img_name.encode()),
118 |             # 'img_name': _bytes_feature(img_name),
119 |             'img_height': _int64_feature(img_height),
120 |             'img_width': _int64_feature(img_width),
121 |             'img': _bytes_feature(img.tostring()),
122 |             'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
123 |             'num_objects': _int64_feature(gtbox_label.shape[0])
124 |         })
125 | 
126 |         example = tf.train.Example(features=feature)
127 | 
128 |         writer.write(example.SerializeToString())
129 |         real_cnt += 1
130 | 
131 |         view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml')))
132 | 
133 |     print('\nConversion is complete! {} images.'.format(real_cnt))
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     # xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
138 |     # read_xml_gtbox_and_label(xml_path)
139 | 
140 |     convert_pascal_to_tfrecord()
141 | 


--------------------------------------------------------------------------------
/data/io/image_preprocess.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def max_length_limitation(length, length_limitation):
13 |     return tf.cond(tf.less(length, length_limitation),
14 |                    true_fn=lambda: length,
15 |                    false_fn=lambda: length_limitation)
16 | 
17 | 
18 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200):
19 |     '''
20 | 
21 |     :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5].  gtboxes: [xmin, ymin, xmax, ymax]
22 |     :param target_shortside_len:
23 |     :param length_limitation: set max length to avoid OUT OF MEMORY
24 |     :return:
25 |     '''
26 |     img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
27 |     new_h, new_w = tf.cond(tf.less(img_h, img_w),
28 |                            true_fn=lambda: (target_shortside_len,
29 |                                             max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
30 |                            false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
31 |                                              target_shortside_len))
32 | 
33 |     img_tensor = tf.expand_dims(img_tensor, axis=0)
34 |     img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
35 | 
36 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
37 | 
38 |     new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h
39 |     new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h
40 |     img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
41 | 
42 |     return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0))
43 | 
44 | 
45 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True):
46 |     if is_resize:
47 |       img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
48 | 
49 |       new_h, new_w = tf.cond(tf.less(img_h, img_w),
50 |                              true_fn=lambda: (target_shortside_len,
51 |                                               max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
52 |                              false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
53 |                                                target_shortside_len))
54 | 
55 |       img_tensor = tf.expand_dims(img_tensor, axis=0)
56 |       img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
57 | 
58 |       img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
59 |     return img_tensor
60 | 
61 | 
62 | def flip_left_to_right(img_tensor, gtboxes_and_label):
63 | 
64 |     h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
65 | 
66 |     img_tensor = tf.image.flip_left_right(img_tensor)
67 | 
68 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
69 |     new_xmax = w - xmin
70 |     new_xmin = w - xmax
71 | 
72 |     return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0))
73 | 
74 | 
75 | def random_flip_left_right(img_tensor, gtboxes_and_label):
76 |     img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5),
77 |                                             lambda: flip_left_to_right(img_tensor, gtboxes_and_label),
78 |                                             lambda: (img_tensor, gtboxes_and_label))
79 | 
80 |     return img_tensor,  gtboxes_and_label
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/data/io/image_preprocess_multi_gpu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def max_length_limitation(length, length_limitation):
13 |     return tf.cond(tf.less(length, length_limitation),
14 |                    true_fn=lambda: length,
15 |                    false_fn=lambda: length_limitation)
16 | 
17 | 
18 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200):
19 |     '''
20 | 
21 |     :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5].  gtboxes: [xmin, ymin, xmax, ymax]
22 |     :param target_shortside_len:
23 |     :param length_limitation: set max length to avoid OUT OF MEMORY
24 |     :return:
25 |     '''
26 |     img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
27 |     new_h, new_w = tf.cond(tf.less(img_h, img_w),
28 |                            true_fn=lambda: (target_shortside_len,
29 |                                             max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
30 |                            false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
31 |                                              target_shortside_len))
32 | 
33 |     img_tensor = tf.expand_dims(img_tensor, axis=0)
34 |     img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
35 | 
36 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
37 | 
38 |     new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h
39 |     new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h
40 |     img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
41 | 
42 |     return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0)), new_h, new_w
43 | 
44 | 
45 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True):
46 |     if is_resize:
47 |       img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
48 | 
49 |       new_h, new_w = tf.cond(tf.less(img_h, img_w),
50 |                              true_fn=lambda: (target_shortside_len,
51 |                                               max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
52 |                              false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
53 |                                                target_shortside_len))
54 | 
55 |       img_tensor = tf.expand_dims(img_tensor, axis=0)
56 |       img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
57 | 
58 |       img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
59 |     return img_tensor
60 | 
61 | 
62 | def flip_left_to_right(img_tensor, gtboxes_and_label):
63 | 
64 |     h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
65 | 
66 |     img_tensor = tf.image.flip_left_right(img_tensor)
67 | 
68 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
69 |     new_xmax = w - xmin
70 |     new_xmin = w - xmax
71 | 
72 |     return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0))
73 | 
74 | 
75 | def random_flip_left_right(img_tensor, gtboxes_and_label):
76 |     img_tensor, gtboxes_and_label= tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5),
77 |                                             lambda: flip_left_to_right(img_tensor, gtboxes_and_label),
78 |                                             lambda: (img_tensor, gtboxes_and_label))
79 | 
80 |     return img_tensor,  gtboxes_and_label
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/data/io/read_tfrecord.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import print_function
  5 | from __future__ import division
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | import os
 10 | from data.io import image_preprocess
 11 | from libs.configs import cfgs
 12 | 
 13 | def read_single_example_and_decode(filename_queue):
 14 | 
 15 |     # tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
 16 | 
 17 |     # reader = tf.TFRecordReader(options=tfrecord_options)
 18 |     reader = tf.TFRecordReader()
 19 |     _, serialized_example = reader.read(filename_queue)
 20 | 
 21 |     features = tf.parse_single_example(
 22 |         serialized=serialized_example,
 23 |         features={
 24 |             'img_name': tf.FixedLenFeature([], tf.string),
 25 |             'img_height': tf.FixedLenFeature([], tf.int64),
 26 |             'img_width': tf.FixedLenFeature([], tf.int64),
 27 |             'img': tf.FixedLenFeature([], tf.string),
 28 |             'gtboxes_and_label': tf.FixedLenFeature([], tf.string),
 29 |             'num_objects': tf.FixedLenFeature([], tf.int64)
 30 |         }
 31 |     )
 32 |     img_name = features['img_name']
 33 |     img_height = tf.cast(features['img_height'], tf.int32)
 34 |     img_width = tf.cast(features['img_width'], tf.int32)
 35 |     img = tf.decode_raw(features['img'], tf.uint8)
 36 | 
 37 |     img = tf.reshape(img, shape=[img_height, img_width, 3])
 38 | 
 39 |     gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32)
 40 |     gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5])
 41 | 
 42 |     num_objects = tf.cast(features['num_objects'], tf.int32)
 43 |     return img_name, img, gtboxes_and_label, num_objects
 44 | 
 45 | 
 46 | def read_and_prepocess_single_img(filename_queue, shortside_len, is_training):
 47 | 
 48 |     img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue)
 49 | 
 50 |     img = tf.cast(img, tf.float32)
 51 | 
 52 |     if is_training:
 53 |         img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label,
 54 |                                                                     target_shortside_len=shortside_len,
 55 |                                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
 56 |         img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img,
 57 |                                                                          gtboxes_and_label=gtboxes_and_label)
 58 | 
 59 |     else:
 60 |         img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label,
 61 |                                                                     target_shortside_len=shortside_len,
 62 |                                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
 63 |     if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
 64 |         img = img / 255 - tf.constant([[cfgs.PIXEL_MEAN_]])
 65 |     else:
 66 |         img = img - tf.constant([[cfgs.PIXEL_MEAN]])  # sub pixel mean at last
 67 |     return img_name, img, gtboxes_and_label, num_objects
 68 | 
 69 | 
 70 | def next_batch(dataset_name, batch_size, shortside_len, is_training):
 71 |     '''
 72 |     :return:
 73 |     img_name_batch: shape(1, 1)
 74 |     img_batch: shape:(1, new_imgH, new_imgW, C)
 75 |     gtboxes_and_label_batch: shape(1, Num_Of_objects, 5] .each row is [x1, y1, x2, y2, label]
 76 |     '''
 77 |     assert batch_size == 1, "we only support batch_size is 1.We may support large batch_size in the future"
 78 | 
 79 |     if dataset_name not in ['ship', 'spacenet', 'pascal', 'coco', 'bdd100k', 'DOTA', 'DOTA_H']:
 80 |         raise ValueError('dataSet name must be in pascal, coco spacenet and ship')
 81 | 
 82 |     if is_training:
 83 |         pattern = os.path.join('../data/tfrecord', dataset_name + '_train*')
 84 |     else:
 85 |         pattern = os.path.join('../data/tfrecord', dataset_name + '_test*')
 86 | 
 87 |     print('tfrecord path is -->', os.path.abspath(pattern))
 88 | 
 89 |     filename_tensorlist = tf.train.match_filenames_once(pattern)
 90 | 
 91 |     filename_queue = tf.train.string_input_producer(filename_tensorlist)
 92 | 
 93 |     # shortside_len = tf.constant(shortside_len)
 94 |     # shortside_len = tf.random_shuffle(shortside_len)[0]
 95 | 
 96 |     img_name, img, gtboxes_and_label, num_obs = read_and_prepocess_single_img(filename_queue, shortside_len,
 97 |                                                                               is_training=is_training)
 98 |     img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch = \
 99 |         tf.train.batch(
100 |                        [img_name, img, gtboxes_and_label, num_obs],
101 |                        batch_size=batch_size,
102 |                        capacity=1,
103 |                        num_threads=1,
104 |                        dynamic_pad=True)
105 |     return img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
110 |     img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
111 |         next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
112 |                    batch_size=cfgs.BATCH_SIZE,
113 |                    shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
114 |                    is_training=True)
115 |     gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
116 | 
117 |     init_op = tf.group(
118 |         tf.global_variables_initializer(),
119 |         tf.local_variables_initializer()
120 |     )
121 | 
122 |     config = tf.ConfigProto()
123 |     config.gpu_options.allow_growth = True
124 | 
125 |     with tf.Session(config=config) as sess:
126 |         sess.run(init_op)
127 | 
128 |         coord = tf.train.Coordinator()
129 |         threads = tf.train.start_queue_runners(sess, coord)
130 | 
131 |         img_name_batch_, img_batch_, gtboxes_and_label_batch_, num_objects_batch_ \
132 |             = sess.run([img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch])
133 | 
134 |         print(img_name_batch_)
135 | 
136 |         print('debug')
137 | 
138 |         coord.request_stop()
139 |         coord.join(threads)
140 | 


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |     # install pycocotools locally
3 | 	python setup.py build_ext --inplace
4 | 	rm -rf build
5 | 
6 | install:
7 | 	# install pycocotools to the Python site-packages
8 | 	python setup.py build_ext install
9 | 	rm -rf build


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/lib_coco/PythonAPI/__init__.py


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocoEvalDemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "from pycocotools.coco import COCO\n",
 14 |     "from pycocotools.cocoeval import COCOeval\n",
 15 |     "import numpy as np\n",
 16 |     "import skimage.io as io\n",
 17 |     "import pylab\n",
 18 |     "pylab.rcParams['figure.figsize'] = (10.0, 8.0)"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "name": "stdout",
 30 |      "output_type": "stream",
 31 |      "text": [
 32 |       "Running demo for *bbox* results.\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "annType = ['segm','bbox','keypoints']\n",
 38 |     "annType = annType[1]      #specify type here\n",
 39 |     "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n",
 40 |     "print 'Running demo for *%s* results.'%(annType)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "loading annotations into memory...\n",
 55 |       "Done (t=8.01s)\n",
 56 |       "creating index...\n",
 57 |       "index created!\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "#initialize COCO ground truth api\n",
 63 |     "dataDir='../'\n",
 64 |     "dataType='val2014'\n",
 65 |     "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n",
 66 |     "cocoGt=COCO(annFile)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "Loading and preparing results...     \n",
 81 |       "DONE (t=0.05s)\n",
 82 |       "creating index...\n",
 83 |       "index created!\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#initialize COCO detections api\n",
 89 |     "resFile='%s/results/%s_%s_fake%s100_results.json'\n",
 90 |     "resFile = resFile%(dataDir, prefix, dataType, annType)\n",
 91 |     "cocoDt=cocoGt.loadRes(resFile)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "imgIds=sorted(cocoGt.getImgIds())\n",
103 |     "imgIds=imgIds[0:100]\n",
104 |     "imgId = imgIds[np.random.randint(100)]"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "Running per image evaluation...      \n",
119 |       "DONE (t=0.46s).\n",
120 |       "Accumulating evaluation results...   \n",
121 |       "DONE (t=0.38s).\n",
122 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505\n",
123 |       " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.697\n",
124 |       " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.573\n",
125 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n",
126 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n",
127 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n",
128 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.387\n",
129 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.594\n",
130 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.595\n",
131 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n",
132 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n",
133 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "# running evaluation\n",
139 |     "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n",
140 |     "cocoEval.params.imgIds  = imgIds\n",
141 |     "cocoEval.evaluate()\n",
142 |     "cocoEval.accumulate()\n",
143 |     "cocoEval.summarize()"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 2",
150 |    "language": "python",
151 |    "name": "python2"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 2
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython2",
163 |    "version": "2.7.10"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 0
168 | }
169 | 


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/data/lib_coco/PythonAPI/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | import numpy as np
 3 | 
 4 | # To compile and install locally run "python setup.py build_ext --inplace"
 5 | # To install library to Python site-packages run "python setup.py build_ext install"
 6 | 
 7 | ext_modules = [
 8 |     Extension(
 9 |         'pycocotools._mask',
10 |         sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'],
11 |         include_dirs = [np.get_include(), '../common'],
12 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
13 |     )
14 | ]
15 | 
16 | setup(
17 |     name='pycocotools',
18 |     packages=['pycocotools'],
19 |     package_dir = {'pycocotools': 'pycocotools'},
20 |     install_requires=[
21 |         'setuptools>=18.0',
22 |         'cython>=0.27.3',
23 |         'matplotlib>=2.1.0'
24 |     ],
25 |     version='2.0',
26 |     ext_modules= ext_modules
27 | )
28 | 


--------------------------------------------------------------------------------
/data/lib_coco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/data/lib_coco/__init__.py


--------------------------------------------------------------------------------
/data/lib_coco/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/data/lib_coco/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/data/lib_coco/get_coco_next_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import sys, os
 6 | # sys.path.insert(0, os.path.abspath('.'))
 7 | sys.path.insert(0, './PythonAPI/')
 8 | # sys.path.insert(0, os.path.abspath('data'))
 9 | for _ in sys.path:
10 |     print (_)
11 | from PythonAPI.pycocotools.coco import COCO
12 | import cv2
13 | import numpy as np
14 | import os
15 | from libs.label_name_dict import coco_dict
16 | 
17 | 
18 | annotation_path = '/home/yjr/DataSet/COCO/2017/annotations/instances_train2017.json'
19 | print ("load coco .... it will cost about 17s..")
20 | coco = COCO(annotation_path)
21 | 
22 | imgId_list = coco.getImgIds()
23 | imgId_list = np.array(imgId_list)
24 | 
25 | total_imgs = len(imgId_list)
26 | 
27 | # print (NAME_LABEL_DICT)
28 | 
29 | 
30 | def next_img(step):
31 | 
32 |     if step % total_imgs == 0:
33 |         np.random.shuffle(imgId_list)
34 |     imgid = imgId_list[step % total_imgs]
35 | 
36 |     imgname = coco.loadImgs(ids=[imgid])[0]['file_name']
37 |     # print (type(imgname), imgname)
38 |     img = cv2.imread(os.path.join("/home/yjr/DataSet/COCO/2017/train2017", imgname))
39 | 
40 |     annotation = coco.imgToAnns[imgid]
41 |     gtbox_and_label_list = []
42 |     for ann in annotation:
43 |         box = ann['bbox']
44 | 
45 |         box = [box[0], box[1], box[0]+box[2], box[1]+box[3]]  # [xmin, ymin, xmax, ymax]
46 |         cat_id = ann['category_id']
47 |         cat_name = coco_dict.originID_classes[cat_id] #ID_NAME_DICT[cat_id]
48 |         label = coco_dict.NAME_LABEL_MAP[cat_name]
49 |         gtbox_and_label_list.append(box + [label])
50 |     gtbox_and_label_list = np.array(gtbox_and_label_list, dtype=np.int32)
51 |     # print (img.shape, gtbox_and_label_list.shape)
52 |     if gtbox_and_label_list.shape[0] == 0:
53 |         return next_img(step+1)
54 |     else:
55 |         return imgid, img[:, :, ::-1], gtbox_and_label_list
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 
60 |     imgid, img,  gtbox = next_img(3234)
61 | 
62 |     print("::")
63 |     from libs.box_utils.draw_box_in_img import draw_boxes_with_label_and_scores
64 | 
65 |     img = draw_boxes_with_label_and_scores(img_array=img, boxes=gtbox[:, :-1], labels=gtbox[:, -1],
66 |                                            scores=np.ones(shape=(len(gtbox), )))
67 |     print ("_----")
68 | 
69 | 
70 |     cv2.imshow("test", img)
71 |     cv2.waitKey(0)
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/README.md:
--------------------------------------------------------------------------------
1 | 1、Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights.       
2 | 2、Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). [Pretrain Model Link](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9.
3 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/mobilenet/README.md:
--------------------------------------------------------------------------------
1 | Please download [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained model on Imagenet, put it to data/pretrained_weights/mobilenet.


--------------------------------------------------------------------------------
/help_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/help_utils/__init__.py


--------------------------------------------------------------------------------
/help_utils/tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | import math
 4 | import sys
 5 | import os
 6 | 
 7 | 
 8 | def view_bar(message, num, total):
 9 |     rate = num / total
10 |     rate_num = int(rate * 40)
11 |     rate_nums = math.ceil(rate * 100)
12 |     r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,)
13 |     sys.stdout.write(r)
14 |     sys.stdout.flush()
15 | 
16 | 
17 | def mkdir(path):
18 |     if not os.path.exists(path):
19 |         os.makedirs(path)


--------------------------------------------------------------------------------
/images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/images.png


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/box_utils/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/anchor_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | import tensorflow as tf
 5 | from libs.configs import cfgs
 6 | 
 7 | 
 8 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios,
 9 |                  featuremap_height, featuremap_width,
10 |                  stride, name='make_anchors'):
11 |     '''
12 |     :param base_anchor_size:256
13 |     :param anchor_scales:
14 |     :param anchor_ratios:
15 |     :param featuremap_height:
16 |     :param featuremap_width:
17 |     :param stride:
18 |     :return:
19 |     '''
20 |     with tf.variable_scope(name):
21 |         base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32)  # [x_center, y_center, w, h]
22 | 
23 |         ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales),
24 |                              anchor_ratios)  # per locations ws and hs
25 | 
26 |         # featuremap_height = tf.Print(featuremap_height,
27 |         #                              [featuremap_height, featuremap_width], summarize=10,
28 |         #                              message=name+"_SHAPE***")
29 | 
30 |         x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride
31 |         y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride
32 | 
33 |         if cfgs.USE_CENTER_OFFSET:
34 |             x_centers = x_centers + stride/2.
35 |             y_centers = y_centers + stride/2.
36 | 
37 |         x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
38 | 
39 |         ws, x_centers = tf.meshgrid(ws, x_centers)
40 |         hs, y_centers = tf.meshgrid(hs, y_centers)
41 | 
42 |         anchor_centers = tf.stack([x_centers, y_centers], 2)
43 |         anchor_centers = tf.reshape(anchor_centers, [-1, 2])
44 | 
45 |         box_sizes = tf.stack([ws, hs], axis=2)
46 |         box_sizes = tf.reshape(box_sizes, [-1, 2])
47 |         # anchors = tf.concat([anchor_centers, box_sizes], axis=1)
48 |         anchors = tf.concat([anchor_centers - 0.5*box_sizes,
49 |                              anchor_centers + 0.5*box_sizes], axis=1)
50 |         return anchors
51 | 
52 | 
53 | def enum_scales(base_anchor, anchor_scales):
54 | 
55 |     anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1))
56 | 
57 |     return anchor_scales
58 | 
59 | 
60 | def enum_ratios(anchors, anchor_ratios):
61 |     '''
62 |     ratio = h /w
63 |     :param anchors:
64 |     :param anchor_ratios:
65 |     :return:
66 |     '''
67 |     ws = anchors[:, 2]  # for base anchor: w == h
68 |     hs = anchors[:, 3]
69 |     sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios))
70 | 
71 |     ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1])
72 |     hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1])
73 | 
74 |     return ws, hs
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     import os
79 |     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
80 |     base_anchor_size = 256
81 |     anchor_scales = [1.0]
82 |     anchor_ratios = [0.5, 2.0, 1.0]
83 |     anchors = make_anchors(base_anchor_size=base_anchor_size, anchor_ratios=anchor_ratios,
84 |                            anchor_scales=anchor_scales,
85 |                            featuremap_width=32,
86 |                            featuremap_height=63,
87 |                            stride=16)
88 |     init = tf.global_variables_initializer()
89 |     with tf.Session() as sess:
90 |         sess.run(init)
91 |         anchor_result = sess.run(anchors)
92 |         print (anchor_result.shape)
93 | 


--------------------------------------------------------------------------------
/libs/box_utils/coordinate_convert.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | 
10 | 
11 | def forward_convert(coordinate, with_label=True):
12 |     """
13 |     :param coordinate: format [x_c, y_c, w, h, theta]
14 |     :return: format [x1, y1, x2, y2, x3, y3, x4, y4]
15 |     """
16 |     boxes = []
17 |     if with_label:
18 |         for rect in coordinate:
19 |             box = cv2.boxPoints(((rect[0], rect[1]), (rect[2], rect[3]), rect[4]))
20 |             box = np.reshape(box, [-1, ])
21 |             boxes.append([box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7], rect[5]])
22 |     else:
23 |         for rect in coordinate:
24 |             box = cv2.boxPoints(((rect[0], rect[1]), (rect[2], rect[3]), rect[4]))
25 |             box = np.reshape(box, [-1, ])
26 |             boxes.append([box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7]])
27 | 
28 |     return np.array(boxes, dtype=np.float32)
29 | 
30 | 
31 | def back_forward_convert(coordinate, with_label=True):
32 |     """
33 |     :param coordinate: format [x1, y1, x2, y2, x3, y3, x4, y4, (label)]
34 |     :param with_label: default True
35 |     :return: format [x_c, y_c, w, h, theta, (label)]
36 |     """
37 | 
38 |     boxes = []
39 |     if with_label:
40 |         for rect in coordinate:
41 |             box = np.int0(rect[:-1])
42 |             box = box.reshape([4, 2])
43 |             rect1 = cv2.minAreaRect(box)
44 | 
45 |             x, y, w, h, theta = rect1[0][0], rect1[0][1], rect1[1][0], rect1[1][1], rect1[2]
46 |             boxes.append([x, y, w, h, theta, rect[-1]])
47 | 
48 |     else:
49 |         for rect in coordinate:
50 |             box = np.int0(rect)
51 |             box = box.reshape([4, 2])
52 |             rect1 = cv2.minAreaRect(box)
53 | 
54 |             x, y, w, h, theta = rect1[0][0], rect1[0][1], rect1[1][0], rect1[1][1], rect1[2]
55 |             boxes.append([x, y, w, h, theta])
56 | 
57 |     return np.array(boxes, dtype=np.float32)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     coord = np.array([[150, 150, 50, 100, -90, 1],
62 |                       [150, 150, 100, 50, -90, 1],
63 |                       [150, 150, 50, 100, -45, 1],
64 |                       [150, 150, 100, 50, -45, 1]])
65 | 
66 |     coord1 = np.array([[150, 150, 100, 50, 0],
67 |                       [150, 150, 100, 50, -90],
68 |                       [150, 150, 100, 50, 45],
69 |                       [150, 150, 100, 50, -45]])
70 | 
71 |     coord2 = forward_convert(coord)
72 |     # coord3 = forward_convert(coord1, mode=-1)
73 |     print(coord2)
74 |     # print(coord3-coord2)
75 |     # coord_label = np.array([[167., 203., 96., 132., 132., 96., 203., 167., 1.]])
76 |     #
77 |     # coord4 = back_forward_convert(coord_label, mode=1)
78 |     # coord5 = back_forward_convert(coord_label)
79 | 
80 |     # print(coord4)
81 |     # print(coord5)
82 | 
83 |     # coord3 = coordinate_present_convert(coord, -1)
84 |     # print(coord3)
85 |     # coord4 = coordinate_present_convert(coord3, mode=1)
86 | # print(coord4)


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | clean:
5 | 	rm -rf */*.pyc
6 | 	rm -rf */*.so
7 | 


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/box_utils/cython_utils/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 | 
 27 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 28 |     and values giving the absolute path to each directory.
 29 | 
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |             raise EnvironmentError('The nvcc binary could not be '
 44 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 45 |         home = os.path.dirname(os.path.dirname(nvcc))
 46 | 
 47 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 48 |                   'include': pjoin(home, 'include'),
 49 |                   'lib64': pjoin(home, 'lib64')}
 50 |     for k, v in cudaconfig.items():
 51 |         if not os.path.exists(v):
 52 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 53 | 
 54 |     return cudaconfig
 55 | CUDA = locate_cuda()
 56 | 
 57 | # Obtain the numpy include directory.  This logic works across numpy versions.
 58 | try:
 59 |     numpy_include = np.get_include()
 60 | except AttributeError:
 61 |     numpy_include = np.get_numpy_include()
 62 | 
 63 | def customize_compiler_for_nvcc(self):
 64 |     """inject deep into distutils to customize how the dispatch
 65 |     to gcc/nvcc works.
 66 | 
 67 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 68 |     injected in, and still have the right customizations (i.e.
 69 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 70 |     the OO route, I have this. Note, it's kindof like a wierd functional
 71 |     subclassing going on."""
 72 | 
 73 |     # tell the compiler it can processes .cu
 74 |     self.src_extensions.append('.cu')
 75 | 
 76 |     # save references to the default compiler_so and _comple methods
 77 |     default_compiler_so = self.compiler_so
 78 |     super = self._compile
 79 | 
 80 |     # now redefine the _compile method. This gets executed for each
 81 |     # object but distutils doesn't have the ability to change compilers
 82 |     # based on source extension: we add it.
 83 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 84 |         print(extra_postargs)
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | # run the customize_compiler
102 | class custom_build_ext(build_ext):
103 |     def build_extensions(self):
104 |         customize_compiler_for_nvcc(self.compiler)
105 |         build_ext.build_extensions(self)
106 | 
107 | ext_modules = [
108 |     Extension(
109 |         "cython_bbox",
110 |         ["bbox.pyx"],
111 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
112 |         include_dirs = [numpy_include]
113 |     ),
114 |     Extension(
115 |         "cython_nms",
116 |         ["nms.pyx"],
117 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
118 |         include_dirs = [numpy_include]
119 |     )
120 |     # Extension(
121 |     #     "cpu_nms",
122 |     #     ["cpu_nms.pyx"],
123 |     #     extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 |     #     include_dirs = [numpy_include]
125 |     # )
126 | ]
127 | 
128 | setup(
129 |     name='tf_faster_rcnn',
130 |     ext_modules=ext_modules,
131 |     # inject our custom trigger
132 |     cmdclass={'build_ext': custom_build_ext},
133 | )
134 | 


--------------------------------------------------------------------------------
/libs/box_utils/iou.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | import tensorflow as tf
 9 | import numpy as np
10 | 
11 | 
12 | def iou_calculate(boxes_1, boxes_2):
13 | 
14 |     with tf.name_scope('iou_caculate'):
15 |         xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3]
16 | 
17 |         xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3]
18 | 
19 |         max_xmin = tf.maximum(xmin_1, xmin_2)
20 |         min_xmax = tf.minimum(xmax_1, xmax_2)
21 | 
22 |         max_ymin = tf.maximum(ymin_1, ymin_2)
23 |         min_ymax = tf.minimum(ymax_1, ymax_2)
24 | 
25 |         overlap_h = tf.maximum(0., min_ymax - max_ymin)  # avoid h < 0
26 |         overlap_w = tf.maximum(0., min_xmax - max_xmin)
27 | 
28 |         overlaps = overlap_h * overlap_w
29 | 
30 |         area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
31 |         area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
32 | 
33 |         iou = overlaps / (area_1 + area_2 - overlaps)
34 | 
35 |         return iou
36 | 
37 | 
38 | def iou_calculate1(boxes_1, boxes_2):
39 |     xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3]
40 | 
41 |     xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3]
42 | 
43 |     max_xmin = np.maximum(xmin_1, xmin_2)
44 |     min_xmax = np.minimum(xmax_1, xmax_2)
45 | 
46 |     max_ymin = np.maximum(ymin_1, ymin_2)
47 |     min_ymax = np.minimum(ymax_1, ymax_2)
48 | 
49 |     overlap_h = np.maximum(0., min_ymax - max_ymin)  # avoid h < 0
50 |     overlap_w = np.maximum(0., min_xmax - max_xmin)
51 | 
52 |     overlaps = overlap_h * overlap_w
53 | 
54 |     area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
55 |     area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
56 | 
57 |     iou = overlaps / (area_1 + area_2 - overlaps)
58 | 
59 |     return iou
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     import os
64 |     os.environ["CUDA_VISIBLE_DEVICES"] = '13'
65 |     boxes1 = np.array([[50, 50, 100, 300],
66 |                        [60, 60, 100, 200]], np.float32)
67 | 
68 |     boxes2 = np.array([[50, 50, 100, 300],
69 |                        [200, 200, 100, 200]], np.float32)
70 | 
71 |     print(iou_calculate1(boxes1, boxes2))
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/libs/box_utils/nms.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def py_cpu_nms(dets, thresh, max_output_size):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 |     keep = []
21 |     while order.size > 0:
22 |         if len(keep) >= max_output_size:
23 |             break
24 |         i = order[0]
25 |         keep.append(i)
26 |         xx1 = np.maximum(x1[i], x1[order[1:]])
27 |         yy1 = np.maximum(y1[i], y1[order[1:]])
28 |         xx2 = np.minimum(x2[i], x2[order[1:]])
29 |         yy2 = np.minimum(y2[i], y2[order[1:]])
30 | 
31 |         w = np.maximum(0.0, xx2 - xx1 + 1)
32 |         h = np.maximum(0.0, yy2 - yy1 + 1)
33 |         inter = w * h
34 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return np.array(keep, np.int64)
39 | 


--------------------------------------------------------------------------------
/libs/box_utils/show_box_in_tensor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | import cv2
10 | from libs.label_name_dict.label_dict import LABEl_NAME_MAP
11 | 
12 | from libs.configs import cfgs
13 | 
14 | from libs.box_utils import draw_box_in_img
15 | 
16 | def only_draw_boxes(img_batch, boxes):
17 | 
18 |     boxes = tf.stop_gradient(boxes)
19 |     img_tensor = tf.squeeze(img_batch, 0)
20 |     img_tensor = tf.cast(img_tensor, tf.float32)
21 |     labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES
22 |     scores = tf.zeros_like(labels, dtype=tf.float32)
23 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
24 |                                        inp=[img_tensor, boxes, labels, scores],
25 |                                        Tout=tf.uint8)
26 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))  # [batch_size, h, w, c]
27 | 
28 |     return img_tensor_with_boxes
29 | 
30 | def draw_boxes_with_scores(img_batch, boxes, scores):
31 | 
32 |     boxes = tf.stop_gradient(boxes)
33 |     scores = tf.stop_gradient(scores)
34 | 
35 |     img_tensor = tf.squeeze(img_batch, 0)
36 |     img_tensor = tf.cast(img_tensor, tf.float32)
37 |     labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES
38 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
39 |                                        inp=[img_tensor, boxes, labels, scores],
40 |                                        Tout=[tf.uint8])
41 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
42 |     return img_tensor_with_boxes
43 | 
44 | def draw_boxes_with_categories(img_batch, boxes, labels):
45 |     boxes = tf.stop_gradient(boxes)
46 | 
47 |     img_tensor = tf.squeeze(img_batch, 0)
48 |     img_tensor = tf.cast(img_tensor, tf.float32)
49 |     scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32)
50 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
51 |                                        inp=[img_tensor, boxes, labels, scores],
52 |                                        Tout=[tf.uint8])
53 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
54 |     return img_tensor_with_boxes
55 | 
56 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores):
57 |     boxes = tf.stop_gradient(boxes)
58 |     scores = tf.stop_gradient(scores)
59 | 
60 |     img_tensor = tf.squeeze(img_batch, 0)
61 |     img_tensor = tf.cast(img_tensor, tf.float32)
62 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
63 |                                        inp=[img_tensor, boxes, labels, scores],
64 |                                        Tout=[tf.uint8])
65 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
66 |     return img_tensor_with_boxes
67 | 
68 | if __name__ == "__main__":
69 |     print (1)
70 | 
71 | 


--------------------------------------------------------------------------------
/libs/box_utils/tf_ops.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | '''
 8 | all of these ops are derived from tenosrflow Object Detection API
 9 | '''
10 | def indices_to_dense_vector(indices,
11 |                             size,
12 |                             indices_value=1.,
13 |                             default_value=0,
14 |                             dtype=tf.float32):
15 |   """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros.
16 | 
17 |   This function exists because it is unclear if it is safe to use
18 |     tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
19 |   with indices which are not ordered.
20 |   This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
21 | 
22 |   Args:
23 |     indices: 1d Tensor with integer indices which are to be set to
24 |         indices_values.
25 |     size: scalar with size (integer) of output Tensor.
26 |     indices_value: values of elements specified by indices in the output vector
27 |     default_value: values of other elements in the output vector.
28 |     dtype: data type.
29 | 
30 |   Returns:
31 |     dense 1D Tensor of shape [size] with indices set to indices_values and the
32 |         rest set to default_value.
33 |   """
34 |   size = tf.to_int32(size)
35 |   zeros = tf.ones([size], dtype=dtype) * default_value
36 |   values = tf.ones_like(indices, dtype=dtype) * indices_value
37 | 
38 |   return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
39 |                            [zeros, values])
40 | 
41 | 
42 | 
43 | 
44 | def test_plt():
45 |   from PIL import Image
46 |   import matplotlib.pyplot as plt
47 |   import numpy as np
48 | 
49 |   a = np.random.rand(20, 30)
50 |   print (a.shape)
51 |   # plt.subplot()
52 |   b = plt.imshow(a)
53 |   plt.show()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |   test_plt()
58 | 


--------------------------------------------------------------------------------
/libs/configs/COCO/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/configs/COCO/__init__.py


--------------------------------------------------------------------------------
/libs/configs/COCO/cfgs_res50_1x_coco_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | '''
  6 | gluoncv backbone + multi_gpu
  7 | 
  8 | '''
  9 | 
 10 | # ------------------------------------------------
 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190416_v1'
 12 | NET_NAME = 'resnet50_v1d'
 13 | ADD_BOX_IN_TENSORBOARD = True
 14 | 
 15 | # ---------------------------------------- System_config
 16 | ROOT_PATH = os.path.abspath('../')
 17 | print(20*"++--")
 18 | print(ROOT_PATH)
 19 | GPU_GROUP = "0,1,2,3,4,5,6,7"
 20 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 21 | SHOW_TRAIN_INFO_INTE = 10
 22 | SMRY_ITER = 200
 23 | SAVE_WEIGHTS_INTE = 80000
 24 | 
 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise NotImplementedError
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | 
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | IS_FILTER_OUTSIDE_BOXES = False
 45 | FIXED_BLOCKS = 0  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | CUDA9 = True
 49 | EVAL_THRESHOLD = 0.5
 50 | 
 51 | RPN_LOCATION_LOSS_WEIGHT = 1.
 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 53 | 
 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 56 | RPN_SIGMA = 3.0
 57 | FASTRCNN_SIGMA = 1.0
 58 | 
 59 | MUTILPY_BIAS_GRADIENT = None   # 2.0  # if None, will not multipy
 60 | GRADIENT_CLIPPING_BY_NORM = None   # 10.0  if None, will not clip
 61 | 
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | BATCH_SIZE = 1
 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE)
 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE
 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE]  # 50000, 70000
 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE
 69 | 
 70 | # -------------------------------------------- Data_preprocess_config
 71 | DATASET_NAME = 'coco'  # 'pascal', 'coco'
 72 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 74 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 75 | IMG_SHORT_SIDE_LEN = 800
 76 | IMG_MAX_LENGTH = 1333
 77 | CLASS_NUM = 80
 78 | 
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
 84 | IS_ASSIGN = True
 85 | 
 86 | # ---------------------------------------------Anchor config
 87 | USE_CENTER_OFFSET = True
 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6']
 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64]
 91 | ANCHOR_SCALES = [1.0]
 92 | ANCHOR_RATIOS = [0.5, 1., 2.0]
 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [10., 10., 5.0, 5.0], [10., 10., 5.0, 5.0]]
 94 | ANCHOR_SCALE_FACTORS = None  # [10., 10., 5.0, 5.0]
 95 | 
 96 | # --------------------------------------------FPN config
 97 | SHARE_HEADS = True
 98 | KERNEL_SIZE = 3
 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
101 | TRAIN_RPN_CLOOBER_POSITIVES = False
102 | 
103 | RPN_MINIBATCH_SIZE = 256
104 | RPN_POSITIVE_RATE = 0.5
105 | RPN_NMS_IOU_THRESHOLD = 0.7
106 | RPN_TOP_K_NMS_TRAIN = 12000
107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000
108 | 
109 | RPN_TOP_K_NMS_TEST = 6000
110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000
111 | 
112 | # -------------------------------------------Fast-RCNN config
113 | ROI_SIZE = 14
114 | ROI_POOL_KERNEL_SIZE = 2
115 | USE_DROPOUT = False
116 | KEEP_PROB = 1.0
117 | SHOW_SCORE_THRSHOLD = 0.6  # only show in tensorboard
118 | 
119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5  # 0.6
120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0   # 0.1 < IOU < 0.5 is negative
123 | FAST_RCNN_MINIBATCH_SIZE = 512  # if is -1, that is train with OHEM
124 | FAST_RCNN_POSITIVE_RATE = 0.25
125 | 
126 | ADD_GTBOXES_TO_TRAIN = False
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/libs/configs/COCO/cfgs_res50_1x_coco_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | '''
  6 | gluoncv backbone + multi_gpu
  7 | 
  8 | '''
  9 | 
 10 | # ------------------------------------------------
 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190420_v2'
 12 | NET_NAME = 'resnet50_v1d'
 13 | ADD_BOX_IN_TENSORBOARD = True
 14 | 
 15 | # ---------------------------------------- System_config
 16 | ROOT_PATH = os.path.abspath('../')
 17 | print(20*"++--")
 18 | print(ROOT_PATH)
 19 | GPU_GROUP = "0,1,2,3,4,5,6,7"
 20 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 21 | SHOW_TRAIN_INFO_INTE = 10
 22 | SMRY_ITER = 200
 23 | SAVE_WEIGHTS_INTE = 80000
 24 | 
 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise NotImplementedError
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | 
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | IS_FILTER_OUTSIDE_BOXES = False
 45 | FIXED_BLOCKS = 0  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | CUDA9 = True
 49 | EVAL_THRESHOLD = 0.5
 50 | 
 51 | RPN_LOCATION_LOSS_WEIGHT = 1.
 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 53 | 
 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 56 | RPN_SIGMA = 3.0
 57 | FASTRCNN_SIGMA = 1.0
 58 | 
 59 | MUTILPY_BIAS_GRADIENT = None   # 2.0  # if None, will not multipy
 60 | GRADIENT_CLIPPING_BY_NORM = None   # 10.0  if None, will not clip
 61 | 
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | BATCH_SIZE = 1
 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE)
 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE
 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE]  # 50000, 70000
 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE
 69 | 
 70 | # -------------------------------------------- Data_preprocess_config
 71 | DATASET_NAME = 'coco'  # 'pascal', 'coco'
 72 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 74 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 75 | IMG_SHORT_SIDE_LEN = 800
 76 | IMG_MAX_LENGTH = 1333
 77 | CLASS_NUM = 80
 78 | 
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
 84 | IS_ASSIGN = True
 85 | 
 86 | # ---------------------------------------------Anchor config
 87 | USE_CENTER_OFFSET = True
 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6']
 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64]
 91 | ANCHOR_SCALES = [1.0]
 92 | ANCHOR_RATIOS = [0.5, 1., 2.0]
 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]]
 94 | ANCHOR_SCALE_FACTORS = None  # [10., 10., 5.0, 5.0]
 95 | 
 96 | # --------------------------------------------FPN config
 97 | SHARE_HEADS = True
 98 | KERNEL_SIZE = 3
 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
101 | TRAIN_RPN_CLOOBER_POSITIVES = False
102 | 
103 | RPN_MINIBATCH_SIZE = 256
104 | RPN_POSITIVE_RATE = 0.5
105 | RPN_NMS_IOU_THRESHOLD = 0.7
106 | RPN_TOP_K_NMS_TRAIN = 12000
107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000
108 | 
109 | RPN_TOP_K_NMS_TEST = 6000
110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000
111 | 
112 | # -------------------------------------------Fast-RCNN config
113 | ROI_SIZE = 14
114 | ROI_POOL_KERNEL_SIZE = 2
115 | USE_DROPOUT = False
116 | KEEP_PROB = 1.0
117 | SHOW_SCORE_THRSHOLD = 0.6  # only show in tensorboard
118 | 
119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5  # 0.6
120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0   # 0.1 < IOU < 0.5 is negative
123 | FAST_RCNN_MINIBATCH_SIZE = 512  # if is -1, that is train with OHEM
124 | FAST_RCNN_POSITIVE_RATE = 0.25
125 | 
126 | ADD_GTBOXES_TO_TRAIN = False
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/libs/configs/COCO/cfgs_res50_1x_coco_v3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | '''
  6 | gluoncv backbone + multi_gpu
  7 | 
  8 | '''
  9 | 
 10 | # ------------------------------------------------
 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190421_v3'
 12 | NET_NAME = 'resnet50_v1d'
 13 | ADD_BOX_IN_TENSORBOARD = True
 14 | 
 15 | # ---------------------------------------- System_config
 16 | ROOT_PATH = os.path.abspath('../')
 17 | print(20*"++--")
 18 | print(ROOT_PATH)
 19 | GPU_GROUP = "0,1,2,3,4,5,6,7"
 20 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 21 | SHOW_TRAIN_INFO_INTE = 20
 22 | SMRY_ITER = 200
 23 | SAVE_WEIGHTS_INTE = 80000
 24 | 
 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise NotImplementedError
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | 
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | IS_FILTER_OUTSIDE_BOXES = False
 45 | FIXED_BLOCKS = 0  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | CUDA9 = True
 49 | EVAL_THRESHOLD = 0.5
 50 | 
 51 | RPN_LOCATION_LOSS_WEIGHT = 1.
 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 53 | 
 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 56 | RPN_SIGMA = 3.0
 57 | FASTRCNN_SIGMA = 1.0
 58 | 
 59 | MUTILPY_BIAS_GRADIENT = None   # 2.0  # if None, will not multipy
 60 | GRADIENT_CLIPPING_BY_NORM = None   # 10.0  if None, will not clip
 61 | 
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | BATCH_SIZE = 1
 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE)
 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE
 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE]  # 50000, 70000
 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE
 69 | 
 70 | # -------------------------------------------- Data_preprocess_config
 71 | DATASET_NAME = 'coco'  # 'pascal', 'coco'
 72 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 74 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 75 | IMG_SHORT_SIDE_LEN = 800
 76 | IMG_MAX_LENGTH = 1333
 77 | CLASS_NUM = 80
 78 | 
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
 84 | IS_ASSIGN = True
 85 | 
 86 | # ---------------------------------------------Anchor config
 87 | USE_CENTER_OFFSET = True
 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6']
 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64]
 91 | ANCHOR_SCALES = [1.0]
 92 | ANCHOR_RATIOS = [0.5, 1., 2.0]
 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [40., 40., 20.0, 20.0]]
 94 | ANCHOR_SCALE_FACTORS = [10., 10., 5.0, 5.0]
 95 | 
 96 | # --------------------------------------------FPN config
 97 | SHARE_HEADS = True
 98 | KERNEL_SIZE = 3
 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
101 | TRAIN_RPN_CLOOBER_POSITIVES = False
102 | 
103 | RPN_MINIBATCH_SIZE = 256
104 | RPN_POSITIVE_RATE = 0.5
105 | RPN_NMS_IOU_THRESHOLD = 0.7
106 | RPN_TOP_K_NMS_TRAIN = 12000
107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000
108 | 
109 | RPN_TOP_K_NMS_TEST = 6000
110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000
111 | 
112 | # -------------------------------------------Fast-RCNN config
113 | ROI_SIZE = 14
114 | ROI_POOL_KERNEL_SIZE = 2
115 | USE_DROPOUT = False
116 | KEEP_PROB = 1.0
117 | SHOW_SCORE_THRSHOLD = 0.6  # only show in tensorboard
118 | 
119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5  # 0.6
120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0   # 0.1 < IOU < 0.5 is negative
123 | FAST_RCNN_MINIBATCH_SIZE = 512  # if is -1, that is train with OHEM
124 | FAST_RCNN_POSITIVE_RATE = 0.25
125 | 
126 | ADD_GTBOXES_TO_TRAIN = False
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/libs/configs/COCO/cfgs_res50_1x_coco_v4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | '''
  6 | gluoncv backbone + multi_gpu
  7 | 
  8 | '''
  9 | 
 10 | # ------------------------------------------------
 11 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190425_v4'
 12 | NET_NAME = 'resnet50_v1d'
 13 | ADD_BOX_IN_TENSORBOARD = True
 14 | 
 15 | # ---------------------------------------- System_config
 16 | ROOT_PATH = os.path.abspath('../')
 17 | print(20*"++--")
 18 | print(ROOT_PATH)
 19 | GPU_GROUP = "0,1,2,3,4,5,6,7"
 20 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 21 | SHOW_TRAIN_INFO_INTE = 10
 22 | SMRY_ITER = 200
 23 | SAVE_WEIGHTS_INTE = 80000
 24 | 
 25 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 26 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 27 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
 28 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise NotImplementedError
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | 
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | IS_FILTER_OUTSIDE_BOXES = False
 45 | FIXED_BLOCKS = 0  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | CUDA9 = True
 49 | EVAL_THRESHOLD = 0.5
 50 | 
 51 | RPN_LOCATION_LOSS_WEIGHT = 1.
 52 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 53 | 
 54 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
 55 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 56 | RPN_SIGMA = 3.0
 57 | FASTRCNN_SIGMA = 1.0
 58 | 
 59 | MUTILPY_BIAS_GRADIENT = None   # 2.0  # if None, will not multipy
 60 | GRADIENT_CLIPPING_BY_NORM = None   # 10.0  if None, will not clip
 61 | 
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | BATCH_SIZE = 1
 65 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE)
 66 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE
 67 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE]  # 50000, 70000
 68 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE
 69 | 
 70 | # -------------------------------------------- Data_preprocess_config
 71 | DATASET_NAME = 'coco'  # 'pascal', 'coco'
 72 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 73 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 74 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 75 | IMG_SHORT_SIDE_LEN = 800
 76 | IMG_MAX_LENGTH = 1333
 77 | CLASS_NUM = 80
 78 | 
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
 82 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
 83 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
 84 | IS_ASSIGN = True
 85 | 
 86 | # ---------------------------------------------Anchor config
 87 | USE_CENTER_OFFSET = True
 88 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6']
 89 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 90 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64]
 91 | ANCHOR_SCALES = [1.0]
 92 | ANCHOR_RATIOS = [0.5, 1., 2.0]
 93 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]]
 94 | ANCHOR_SCALE_FACTORS = None  # [10., 10., 5.0, 5.0]
 95 | 
 96 | # --------------------------------------------FPN config
 97 | SHARE_HEADS = True
 98 | KERNEL_SIZE = 3
 99 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
100 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
101 | TRAIN_RPN_CLOOBER_POSITIVES = False
102 | 
103 | RPN_MINIBATCH_SIZE = 256
104 | RPN_POSITIVE_RATE = 0.5
105 | RPN_NMS_IOU_THRESHOLD = 0.7
106 | RPN_TOP_K_NMS_TRAIN = 12000
107 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000
108 | 
109 | RPN_TOP_K_NMS_TEST = 6000
110 | RPN_MAXIMUM_PROPOSAL_TEST = 1000
111 | 
112 | # -------------------------------------------Fast-RCNN config
113 | ROI_SIZE = 14
114 | ROI_POOL_KERNEL_SIZE = 2
115 | USE_DROPOUT = False
116 | KEEP_PROB = 1.0
117 | SHOW_SCORE_THRSHOLD = 0.6  # only show in tensorboard
118 | 
119 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5  # 0.6
120 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
121 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
122 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0   # 0.1 < IOU < 0.5 is negative
123 | FAST_RCNN_MINIBATCH_SIZE = 512  # if is -1, that is train with OHEM
124 | FAST_RCNN_POSITIVE_RATE = 0.25
125 | 
126 | ADD_GTBOXES_TO_TRAIN = False
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/configs/__init__.py


--------------------------------------------------------------------------------
/libs/configs/cfgs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | 
  6 | # ------------------------------------------------
  7 | VERSION = 'Cascade_FPN_Res50_COCO_1x_20190420_v2'
  8 | NET_NAME = 'resnet_v1_50'
  9 | ADD_BOX_IN_TENSORBOARD = True
 10 | 
 11 | # ---------------------------------------- System_config
 12 | ROOT_PATH = os.path.abspath('../')
 13 | print(20*"++--")
 14 | print(ROOT_PATH)
 15 | GPU_GROUP = "0,1,2,3,4,5,6,7"
 16 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 17 | SHOW_TRAIN_INFO_INTE = 10
 18 | SMRY_ITER = 200
 19 | SAVE_WEIGHTS_INTE = 80000
 20 | 
 21 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 22 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 23 | INFERENCE_IMAGE_PATH = ROOT_PATH + '/tools/inference_image'
 24 | INFERENCE_SAVE_PATH = ROOT_PATH + '/tools/inference_results'
 25 | 
 26 | if NET_NAME.startswith("resnet"):
 27 |     weights_name = NET_NAME
 28 | elif NET_NAME.startswith("MobilenetV2"):
 29 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 30 | else:
 31 |     raise NotImplementedError
 32 | 
 33 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 34 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 35 | 
 36 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 37 | 
 38 | # ------------------------------------------ Train config
 39 | RESTORE_FROM_RPN = False
 40 | IS_FILTER_OUTSIDE_BOXES = False
 41 | FIXED_BLOCKS = 0  # allow 0~3
 42 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 43 | USE_07_METRIC = True
 44 | CUDA9 = True
 45 | EVAL_THRESHOLD = 0.5
 46 | 
 47 | RPN_LOCATION_LOSS_WEIGHT = 1.
 48 | RPN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 49 | 
 50 | FAST_RCNN_LOCATION_LOSS_WEIGHT = 1.0
 51 | FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT = 1.0
 52 | RPN_SIGMA = 3.0
 53 | FASTRCNN_SIGMA = 1.0
 54 | 
 55 | MUTILPY_BIAS_GRADIENT = None   # 2.0  # if None, will not multipy
 56 | GRADIENT_CLIPPING_BY_NORM = None   # 10.0  if None, will not clip
 57 | 
 58 | EPSILON = 1e-5
 59 | MOMENTUM = 0.9
 60 | BATCH_SIZE = 1
 61 | WARM_SETP = int(0.25 * SAVE_WEIGHTS_INTE)
 62 | LR = 5e-4 * 2 * 1.25 * NUM_GPU * BATCH_SIZE
 63 | DECAY_STEP = [11*SAVE_WEIGHTS_INTE, 16*SAVE_WEIGHTS_INTE, 20*SAVE_WEIGHTS_INTE]  # 50000, 70000
 64 | MAX_ITERATION = 20*SAVE_WEIGHTS_INTE
 65 | 
 66 | # -------------------------------------------- Data_preprocess_config
 67 | DATASET_NAME = 'coco'  # 'pascal', 'coco'
 68 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 70 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 71 | IMG_SHORT_SIDE_LEN = 800
 72 | IMG_MAX_LENGTH = 1333
 73 | CLASS_NUM = 80
 74 | 
 75 | 
 76 | # --------------------------------------------- Network_config
 77 | INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01)
 78 | BBOX_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.001)
 79 | WEIGHT_DECAY = 0.00004 if NET_NAME.startswith('Mobilenet') else 0.0001
 80 | IS_ASSIGN = True
 81 | 
 82 | # ---------------------------------------------Anchor config
 83 | USE_CENTER_OFFSET = True
 84 | LEVLES = ['P2', 'P3', 'P4', 'P5', 'P6']
 85 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 86 | ANCHOR_STRIDE_LIST = [4, 8, 16, 32, 64]
 87 | ANCHOR_SCALES = [1.0]
 88 | ANCHOR_RATIOS = [0.5, 1., 2.0]
 89 | ROI_SCALE_FACTORS = [[10., 10., 5.0, 5.0], [20., 20., 10.0, 10.0], [30., 30., 15.0, 15.0]]
 90 | ANCHOR_SCALE_FACTORS = None  # [10., 10., 5.0, 5.0]
 91 | 
 92 | # --------------------------------------------FPN config
 93 | SHARE_HEADS = True
 94 | KERNEL_SIZE = 3
 95 | RPN_IOU_POSITIVE_THRESHOLD = 0.7
 96 | RPN_IOU_NEGATIVE_THRESHOLD = 0.3
 97 | TRAIN_RPN_CLOOBER_POSITIVES = False
 98 | 
 99 | RPN_MINIBATCH_SIZE = 256
100 | RPN_POSITIVE_RATE = 0.5
101 | RPN_NMS_IOU_THRESHOLD = 0.7
102 | RPN_TOP_K_NMS_TRAIN = 12000
103 | RPN_MAXIMUM_PROPOSAL_TARIN = 2000
104 | 
105 | RPN_TOP_K_NMS_TEST = 6000
106 | RPN_MAXIMUM_PROPOSAL_TEST = 1000
107 | 
108 | # -------------------------------------------Fast-RCNN config
109 | ROI_SIZE = 14
110 | ROI_POOL_KERNEL_SIZE = 2
111 | USE_DROPOUT = False
112 | KEEP_PROB = 1.0
113 | SHOW_SCORE_THRSHOLD = 0.6  # only show in tensorboard
114 | 
115 | FAST_RCNN_NMS_IOU_THRESHOLD = 0.5  # 0.6
116 | FAST_RCNN_NMS_MAX_BOXES_PER_CLASS = 100
117 | FAST_RCNN_IOU_POSITIVE_THRESHOLD = 0.5
118 | FAST_RCNN_IOU_NEGATIVE_THRESHOLD = 0.0   # 0.1 < IOU < 0.5 is negative
119 | FAST_RCNN_MINIBATCH_SIZE = 512  # if is -1, that is train with OHEM
120 | FAST_RCNN_POSITIVE_RATE = 0.25
121 | 
122 | ADD_GTBOXES_TO_TRAIN = False
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/detection_oprations/__init__.py


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from libs.configs import cfgs
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
 16 | from libs.box_utils import encode_and_decode
 17 | 
 18 | 
 19 | def anchor_target_layer(
 20 |         gt_boxes, img_shape, all_anchors, is_restrict_bg=False):
 21 |     """Same as the anchor target layer in original Fast/er RCNN """
 22 | 
 23 |     total_anchors = all_anchors.shape[0]
 24 |     img_h, img_w = img_shape[1], img_shape[2]
 25 |     gt_boxes = gt_boxes[:, :-1]  # remove class label
 26 | 
 27 |     # allow boxes to sit over the edge by a small amount
 28 |     _allowed_border = 0
 29 | 
 30 |     # only keep anchors inside the image
 31 |     if cfgs.IS_FILTER_OUTSIDE_BOXES:
 32 |         inds_inside = np.where(
 33 |             (all_anchors[:, 0] >= -_allowed_border) &
 34 |             (all_anchors[:, 1] >= -_allowed_border) &
 35 |             (all_anchors[:, 2] < img_w + _allowed_border) &  # width
 36 |             (all_anchors[:, 3] < img_h + _allowed_border)  # height
 37 |         )[0]
 38 |     else:
 39 |         inds_inside = range(all_anchors.shape[0])
 40 | 
 41 |     anchors = all_anchors[inds_inside, :]
 42 | 
 43 |     # label: 1 is positive, 0 is negative, -1 is dont care
 44 |     labels = np.empty((len(inds_inside),), dtype=np.float32)
 45 |     labels.fill(-1)
 46 | 
 47 |     # overlaps between the anchors and the gt boxes
 48 |     overlaps = bbox_overlaps(
 49 |         np.ascontiguousarray(anchors, dtype=np.float),
 50 |         np.ascontiguousarray(gt_boxes, dtype=np.float))
 51 | 
 52 |     argmax_overlaps = overlaps.argmax(axis=1)
 53 |     max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 54 |     gt_argmax_overlaps = overlaps.argmax(axis=0)
 55 |     gt_max_overlaps = overlaps[
 56 |         gt_argmax_overlaps, np.arange(overlaps.shape[1])]
 57 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 58 | 
 59 |     if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
 60 |         labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0
 61 | 
 62 |     labels[gt_argmax_overlaps] = 1
 63 |     labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1
 64 | 
 65 |     if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
 66 |         labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0
 67 | 
 68 |     num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
 69 |     fg_inds = np.where(labels == 1)[0]
 70 |     if len(fg_inds) > num_fg:
 71 |         disable_inds = npr.choice(
 72 |             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 73 |         labels[disable_inds] = -1
 74 | 
 75 |     num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
 76 |     if is_restrict_bg:
 77 |         num_bg = max(num_bg, num_fg * 1.5)
 78 |     bg_inds = np.where(labels == 0)[0]
 79 |     if len(bg_inds) > num_bg:
 80 |         disable_inds = npr.choice(
 81 |             bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 82 |         labels[disable_inds] = -1
 83 | 
 84 |     bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
 85 | 
 86 |     # map up to original set of anchors
 87 |     labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
 88 |     bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
 89 | 
 90 |     # labels = labels.reshape((1, height, width, A))
 91 |     rpn_labels = labels.reshape((-1, 1))
 92 | 
 93 |     # bbox_targets
 94 |     bbox_targets = bbox_targets.reshape((-1, 4))
 95 |     rpn_bbox_targets = bbox_targets
 96 | 
 97 |     return rpn_labels, rpn_bbox_targets
 98 | 
 99 | 
100 | def _unmap(data, count, inds, fill=0):
101 |     """ Unmap a subset of item (data) back to the original set of items (of
102 |     size count) """
103 |     if len(data.shape) == 1:
104 |         ret = np.empty((count,), dtype=np.float32)
105 |         ret.fill(fill)
106 |         ret[inds] = data
107 |     else:
108 |         ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
109 |         ret.fill(fill)
110 |         ret[inds, :] = data
111 |     return ret
112 | 
113 | 
114 | def _compute_targets(ex_rois, gt_rois):
115 |     """Compute bounding-box regression targets for an image."""
116 |     # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype(
117 |     #     np.float32, copy=False)
118 |     targets = encode_and_decode.encode_boxes(unencode_boxes=gt_rois,
119 |                                              reference_boxes=ex_rois,
120 |                                              scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
121 |     # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois,
122 |     #                                          gt_rois=gt_rois,
123 |     #                                          scale_factor=None)
124 |     return targets
125 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_opr.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | from libs.configs import cfgs
 4 | from libs.box_utils import encode_and_decode
 5 | from libs.box_utils import boxes_utils
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | 
 9 | 
10 | def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training):
11 |     '''
12 | 
13 |     :param rpn_bbox_pred: [-1, 4]
14 |     :param rpn_cls_prob: [-1, 2]
15 |     :param img_shape:
16 |     :param anchors:[-1, 4]
17 |     :param is_training:
18 |     :return:
19 |     '''
20 | 
21 |     if is_training:
22 |         pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
23 |         post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
24 |         # pre_nms_topN = cfgs.FPN_TOP_K_PER_LEVEL_TRAIN
25 |         # post_nms_topN = pre_nms_topN
26 |     else:
27 |         pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
28 |         post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
29 |         # pre_nms_topN = cfgs.FPN_TOP_K_PER_LEVEL_TEST
30 |         # post_nms_topN = pre_nms_topN
31 | 
32 |     nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD
33 | 
34 |     cls_prob = rpn_cls_prob[:, 1]
35 | 
36 |     # 1. decode boxes
37 |     decode_boxes = encode_and_decode.decode_boxes(encoded_boxes=rpn_bbox_pred,
38 |                                                   reference_boxes=anchors,
39 |                                                   scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
40 | 
41 |     # 2. clip to img boundaries
42 |     decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=decode_boxes,
43 |                                                             img_shape=img_shape)
44 | 
45 |     # 3. get top N to NMS
46 |     if pre_nms_topN > 0:
47 |         pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes')
48 |         cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
49 |         decode_boxes = tf.gather(decode_boxes, top_k_indices)
50 | 
51 |     # 4. NMS
52 |     keep = tf.image.non_max_suppression(
53 |         boxes=decode_boxes,
54 |         scores=cls_prob,
55 |         max_output_size=post_nms_topN,
56 |         iou_threshold=nms_thresh)
57 | 
58 |     final_boxes = tf.gather(decode_boxes, keep)
59 |     final_probs = tf.gather(cls_prob, keep)
60 | 
61 |     return final_boxes, final_probs
62 | 
63 | 


--------------------------------------------------------------------------------
/libs/export_pbs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/export_pbs/__init__.py


--------------------------------------------------------------------------------
/libs/export_pbs/exportPb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import os, sys
 6 | import tensorflow as tf
 7 | import tensorflow.contrib.slim as slim
 8 | from tensorflow.python.tools import freeze_graph
 9 | 
10 | sys.path.append('../../')
11 | from data.io.image_preprocess import short_side_resize_for_inference_data
12 | from libs.configs import cfgs
13 | from libs.networks import build_whole_network
14 | 
15 | CKPT_PATH = '/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/trained_weights/FPN_Res50_COCO_20190211_v18/voc_1599999model.ckpt'
16 | OUT_DIR = '../../output/Pbs'
17 | PB_NAME = 'FPN_Res50_COCO.pb'
18 | 
19 | 
20 | def build_detection_graph():
21 |     # 1. preprocess img
22 |     img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
23 |                               name='input_img')  # is RGB. not GBR
24 |     raw_shape = tf.shape(img_plac)
25 |     raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
26 | 
27 |     img_batch = tf.cast(img_plac, tf.float32)
28 |     img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
29 |                                                      target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
30 |                                                      length_limitation=cfgs.IMG_MAX_LENGTH)
31 |     if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
32 |         img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
33 |     else:
34 |         img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
35 | 
36 |     img_batch = tf.expand_dims(img_batch, axis=0)  # [1, None, None, 3]
37 | 
38 |     det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
39 |                                                    is_training=False)
40 | 
41 |     detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
42 |         input_img_batch=img_batch,
43 |         gtboxes_batch=None)
44 | 
45 |     xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
46 |                              detected_boxes[:, 2], detected_boxes[:, 3]
47 | 
48 |     resized_shape = tf.shape(img_batch)
49 |     resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2])
50 | 
51 |     xmin = xmin * raw_w / resized_w
52 |     xmax = xmax * raw_w / resized_w
53 | 
54 |     ymin = ymin * raw_h / resized_h
55 |     ymax = ymax * raw_h / resized_h
56 | 
57 |     boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
58 |     dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
59 |                      tf.reshape(detection_scores, [-1, 1]),
60 |                      boxes], axis=1, name='DetResults')
61 | 
62 |     return dets
63 | 
64 | 
65 | def export_frozenPB():
66 | 
67 |     tf.reset_default_graph()
68 | 
69 |     dets = build_detection_graph()
70 | 
71 |     saver = tf.train.Saver()
72 | 
73 |     with tf.Session() as sess:
74 |         print("we have restred the weights from =====>>\n", CKPT_PATH)
75 |         saver.restore(sess, CKPT_PATH)
76 | 
77 |         tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
78 |         freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
79 |                                   input_saver='',
80 |                                   input_binary=False,
81 |                                   input_checkpoint=CKPT_PATH,
82 |                                   output_node_names="DetResults",
83 |                                   restore_op_name="save/restore_all",
84 |                                   filename_tensor_name='save/Const:0',
85 |                                   output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
86 |                                   clear_devices=False,
87 |                                   initializer_nodes='')
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     os.environ["CUDA_VISIBLE_DEVICES"] = ''
92 |     export_frozenPB()
93 | 


--------------------------------------------------------------------------------
/libs/export_pbs/test_TensorRT.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | import os, sys
 8 | import tensorflow as tf
 9 | import tensorflow.contrib.tensorrt as trt
10 | import time
11 | import cv2
12 | import argparse
13 | import numpy as np
14 | sys.path.append('../../')
15 | 
16 | from data.io.image_preprocess import short_side_resize_for_inference_data
17 | from libs.configs import cfgs
18 | from libs.networks import build_whole_network
19 | from libs.box_utils import draw_box_in_img
20 | from help_utils import tools
21 | 
22 | 
23 | def load_graph(frozen_graph_file):
24 | 
25 |     # we parse the graph_def file
26 |     with tf.gfile.GFile(frozen_graph_file, 'rb') as f:
27 |         graph_def = tf.GraphDef()
28 |         graph_def.ParseFromString(f.read())
29 | 
30 |     # we load the graph_def in the default graph
31 | 
32 |     graph_def = trt.create_inference_graph(graph_def, ["DetResults"],
33 |                                            max_batch_size=1000,
34 |                                            max_workspace_size_bytes=(1 << 10)*10000,
35 |                                            precision_mode="INT8",
36 |                                            maximum_cached_engines=10)  # Get optimized graph
37 | 
38 |     # graph_def = trt.calib_graph_to_infer_graph(graph_def)
39 |     tf.reset_default_graph()
40 |     with tf.Graph().as_default() as graph:
41 |         tf.import_graph_def(graph_def,
42 |                             input_map=None,
43 |                             return_elements=None,
44 |                             name="",
45 |                             op_dict=None,
46 |                             producer_op_list=None)
47 |     return graph
48 | 
49 | 
50 | def test(frozen_graph_path, test_dir):
51 | 
52 |     graph = load_graph(frozen_graph_path)
53 |     print("we are testing ====>>>>", frozen_graph_path)
54 | 
55 |     img = graph.get_tensor_by_name("input_img:0")
56 |     dets = graph.get_tensor_by_name("DetResults:0")
57 | 
58 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
59 |     config = tf.ConfigProto(gpu_options=gpu_options)
60 | 
61 |     with tf.Session(graph=graph, config=config) as sess:
62 |         for img_path in os.listdir(test_dir):
63 |             a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1]
64 |             st = time.time()
65 |             dets_val = sess.run(dets, feed_dict={img: a_img})
66 |             end = time.time()
67 | 
68 |             show_indices = dets_val[:, 1] >= 0.5
69 |             dets_val = dets_val[show_indices]
70 |             final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img,
71 |                                                                                 boxes=dets_val[:, 2:],
72 |                                                                                 labels=dets_val[:, 0],
73 |                                                                                 scores=dets_val[:, 1])
74 |             cv2.imwrite(img_path,
75 |                         final_detections[:, :, ::-1])
76 |             print("%s cost time: %f" % (img_path, end - st))
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
81 |     test('/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/Pbs/FPN_Res50_COCO_Frozen.pb',
82 |          '/unsullied/sharefs/yangxue/isilon/yangxue/data/COCO/train2017')
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/libs/export_pbs/test_exportPb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | import os, sys
 8 | import tensorflow as tf
 9 | import tensorflow.contrib.tensorrt as trt
10 | import time
11 | import cv2
12 | import argparse
13 | import numpy as np
14 | sys.path.append('../../')
15 | 
16 | from data.io.image_preprocess import short_side_resize_for_inference_data
17 | from libs.configs import cfgs
18 | from libs.networks import build_whole_network
19 | from libs.box_utils import draw_box_in_img
20 | from help_utils import tools
21 | 
22 | 
23 | def load_graph(frozen_graph_file):
24 | 
25 |     # we parse the graph_def file
26 |     with tf.gfile.GFile(frozen_graph_file, 'rb') as f:
27 |         graph_def = tf.GraphDef()
28 |         graph_def.ParseFromString(f.read())
29 | 
30 |     with tf.Graph().as_default() as graph:
31 |         tf.import_graph_def(graph_def,
32 |                             input_map=None,
33 |                             return_elements=None,
34 |                             name="",
35 |                             op_dict=None,
36 |                             producer_op_list=None)
37 |     return graph
38 | 
39 | 
40 | def test(frozen_graph_path, test_dir):
41 | 
42 |     graph = load_graph(frozen_graph_path)
43 |     print("we are testing ====>>>>", frozen_graph_path)
44 | 
45 |     img = graph.get_tensor_by_name("input_img:0")
46 |     dets = graph.get_tensor_by_name("DetResults:0")
47 | 
48 |     with tf.Session(graph=graph) as sess:
49 |         for img_path in os.listdir(test_dir):
50 |             a_img = cv2.imread(os.path.join(test_dir, img_path))[:, :, ::-1]
51 |             st = time.time()
52 |             dets_val = sess.run(dets, feed_dict={img: a_img})
53 | 
54 |             show_indices = dets_val[:, 1] >= 0.5
55 |             dets_val = dets_val[show_indices]
56 |             final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(a_img,
57 |                                                                                 boxes=dets_val[:, 2:],
58 |                                                                                 labels=dets_val[:, 0],
59 |                                                                                 scores=dets_val[:, 1])
60 |             cv2.imwrite(img_path,
61 |                         final_detections[:, :, ::-1])
62 |             print("%s cost time: %f" % (img_path, time.time() - st))
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
67 |     test('/home/yangxue/isilon/yangxue/code/yxdet/FPN_TF_DEV/output/Pbs/FPN_Res50_COCO_Frozen.pb',
68 |          '/unsullied/sharefs/yangxue/isilon/yangxue/data/COCO/train2017')
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | .pyc
104 | 
105 | # pycharm files
106 | .idea/*
107 | 
108 | # vis files
109 | *.png*
110 | *.jpg*
111 | *events.out.tfevents*
112 | *.ckpt*
113 | *.tfrecord*
114 | *.params
115 | *.zip
116 | tf_ckpts/
117 | 
118 | # folder
119 | tools/demos/*
120 | tools/txt_output/*
121 | output/*
122 | 
123 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/README.md:
--------------------------------------------------------------------------------
 1 | # Convert ResNets weights from GluonCV to Tensorflow
 2 | 
 3 | ## Abstract
 4 | GluonCV released some new resnet pre-training weights and designed some new resnets (such as resnet_v1_b, resnet_v1_d, refer [this](https://arxiv.org/pdf/1812.01187.pdf) for detail).
 5 | 
 6 | This project reproduces the resnet in glouncv by Tensorflow and attempts to convert the pre-training weights in glouncv to the Tensorflow CheckPoints.
 7 | At present, we have completed the conversion of resnet50_v1_b, resnet101_v1_b, resnet50_v1_d, resnet101_v1_d,
 8 | and the 1000-dimensional Logits error rate is controlled within the range of 1e-5.
 9 | (We welcome you to submit PR to support more models.)
10 | 
11 | We also try to transfer these weights to object detection (using FPN as the baseline, the specific detection code we will post [here](https://github.com/DetectionTeamUCAS/FPN_Tensorflow_DEV).),
12 | and **train on voc07trainVal (excluding voc2012 dataset), test in voc07test**. The results are as follows:
13 | 
14 | ## Comparison
15 | 
16 | ### use_voc2007_metric
17 | | Models | mAP | sheep | horse | bicycle | bottle | cow | sofa | bus | dog | cat | person | train | diningtable | aeroplane | car | pottedplant | tvmonitor | chair | bird | boat  | motorbike |
18 | |------------|:---:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|:---:|:--:|:--:|:--:|:--:|
19 | |[Faster-RCNN](https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow) resnet101_v1(original)|74.63|76.35|86.18|79.87|58.73|83.4|74.75|80.03|85.4|86.55|78.24|76.07|70.89|78.52|86.26|47.80|76.34|52.14|78.06|58.90|78.04|
20 | |FPN resnet101_v1(original)|76.14|74.63|85.13|81.67|63.79|82.43|77.83|83.07|86.45|85.82|81.08|81.01|71.22|80.01|86.30|48.05|73.89|56.99|78.33|62.91|82.24|
21 | |FPN resnet101_v1_d|77.98|78.01|87.48|85.34|65.42|84.56|74.42|82.97|87.87|87.34|82.14|84.44|70.32|80.64|88.6|51.9|76.59|59.31|81.19|67.84|83.1|
22 | 
23 | 
24 | **FPN_resnet101_v1_d is transfer from GluonCV**
25 | 
26 | **FPN_resnet101_v1(original) is official resnet in [tensorflow/models](https://github.com/tensorflow/models/tree/master/research/slim/nets)**
27 | 
28 | ## My Development Environment
29 | 1、python2.7 (anaconda recommend)
30 | 
31 | 2、cuda9.0
32 | 
33 | 3、[opencv(cv2)](https://pypi.org/project/opencv-python/)
34 | 
35 | 4、mxnet-cu90 (1.3.0)
36 | 
37 | 5、tensorflow == 1.10
38 | 
39 | 6、[GlounCV](https://gluon-cv.mxnet.io/)
40 | 
41 | ## Download MxNet GluonCV PreTrained Weights
42 | 
43 | ```
44 | cd $PATH_ROOT/resnet
45 | (modify the resnet version in the main function of download_mxnet_resnet_weights.py.)
46 | python download_mxnet_resnet_weights.py
47 | ```
48 | 
49 | 
50 | ## Convert MxNet Weights To Tensorflow CheckPoint and caculate Erros
51 | 
52 | modify the main function in gluon2TF/resnet/test_resnet.py as following, and then run it
53 | ```
54 | MODEL_NAME = 'resnet101_v1d' (modify the version as u want)
55 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params' (remember modify the path)
56 | 
57 | cal_erro(img_path='../demo_img/person.jpg',
58 |              use_tf_ckpt=False,
59 |              ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME,
60 |              save_ckpt=True)
61 | ```
62 | 
63 | Just run it :
64 | ```
65 | cd $PATH_ROOT/resnet
66 | python test_resnet
67 | ```
68 | 
69 | ## caculate Erros between the converted tensorflow chenckpoints and Mxnet GluonCV Weights
70 | 
71 | modify the main function in gluon2TF/resnet/test_resnet.py as following, and then run it
72 | ```
73 | MODEL_NAME = 'resnet101_v1d' (modify the version as u want)
74 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params' (remember modify the path)
75 | 
76 | cal_erro(img_path='../demo_img/person.jpg',
77 |              use_tf_ckpt=True,
78 |              ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME,
79 |              save_ckpt=False)
80 | ```
81 | 
82 | Just run it :
83 | ```
84 | cd $PATH_ROOT/resnet
85 | python test_resnet
86 | ```
87 | 
88 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/mxnet_weights/readme.txt:
--------------------------------------------------------------------------------
1 | the dir place mxnet_weights
2 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/resnet/__init__.py:
--------------------------------------------------------------------------------
1 |  
2 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/resnet/parse_mxnet_weights.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | import mxnet.ndarray as nd
 5 | import numpy as np
 6 | 
 7 | 
 8 | def read_mxnet_weights(path, show=False):
 9 | 
10 |     # assert os.path.exists(path), "path erro: {}".format(path)
11 | 
12 |     name_MxnetArray_dict = nd.load(path)
13 | 
14 |     name_array_dict = {}
15 |     for name in sorted(name_MxnetArray_dict.keys()):
16 |         mxnet_array = name_MxnetArray_dict[name]
17 |         array = mxnet_array.asnumpy()
18 | 
19 |         if show:
20 |             print ("name: {} || shape: {} || dtype: {}".format(name, array.shape, array.dtype))
21 | 
22 |         if name.endswith("weight"):
23 |             if name.endswith("fc.weight"):
24 |                 array = np.transpose(array, [1, 0])
25 |             else:
26 |                 array = np.transpose(array, [2, 3, 1, 0])
27 |             # (out_channel, in_channel, k, k)(mxnet) --> (k, k, in_channel, out_channel)(tf)
28 |             # (32, 3, 3, 3)-->(3, 3, 3, 32)
29 |         name_array_dict[name] = array
30 | 
31 |     return name_array_dict
32 | 
33 | 
34 | def check_mxnet_names(mxnet_tf_map, mxnetName_array_dict):
35 | 
36 |     for key1, key2 in zip(sorted(mxnet_tf_map.keys()), sorted(mxnetName_array_dict.keys())):
37 |         assert key1 == key2, "key in mxnet_array_dict and mxnet_tf_map do not equal, details are :\n" \
38 |                              "key1 in mxnet_tf_map: {}\n"\
39 |                              "key2 in mxnet_array dict: {}".format(key1, key2)
40 |     if len(mxnetName_array_dict) == len(mxnet_tf_map):
41 |         print("all mxnet names are mapped")
42 | 
43 | 
44 | def check_tf_vars(tf_mxnet_map, mxnetName_array_dict, tf_model_vars, scope='resnet50_v1_d'):
45 | 
46 |     tf_nake_names = sorted([var.op.name.split("%s/" % scope)[1] for var in tf_model_vars])
47 |     # check_name
48 |     for tf_name, name2 in zip(tf_nake_names, sorted(tf_mxnet_map.keys())):
49 |         assert tf_name == name2, "key in tf_model_vars and tf_mxnet_map do not equal, details are :\n" \
50 |                                  "tf_name in tf_model_vars: {}\n" \
51 |                                  "name2 in tf_mxnet_maps: {}".format(tf_name, name2)
52 |     print("all tf_model_var can find matched name in tf_mxnet_map")
53 | 
54 |     # check shape
55 |     for var in tf_model_vars:
56 |         name = var.op.name.split("%s/"%scope)[1]
57 |         array = mxnetName_array_dict[tf_mxnet_map[name]]
58 | 
59 |         assert var.shape == array.shape,  "var in tf_model_vars and mxnet_arrays shape do not equal, details are :\n" \
60 |                                           "tf_var in tf_model_vars: {}\n" \
61 |                                           "name in tf_mxnet_maps: {}, shape is : {}".format(var, tf_mxnet_map[name],
62 |                                                                                             array.shape)
63 |     print("All tf_model_var shapes match the shape of arrays in mxnet_array_dict...")
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/resnet/resnet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | from __future__ import absolute_import, print_function, division
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import tensorflow.contrib.slim as slim
 8 | from resnet_utils import get_resnet_v1_d, get_resnet_v1_b
 9 | from parse_mxnet_weights import read_mxnet_weights, check_mxnet_names, check_tf_vars
10 | import weights_map
11 | import os
12 | 
13 | BottleNeck_NUM_DICT = {
14 |     'resnet50_v1b': [3, 4, 6, 3],
15 |     'resnet101_v1b': [3, 4, 23, 3],
16 |     'resnet50_v1d': [3, 4, 6, 3],
17 |     'resnet101_v1d': [3, 4, 23, 3]
18 | }
19 | 
20 | BASE_CHANNELS_DICT = {
21 |     'resnet50_v1b': [64, 128, 256, 512],
22 |     'resnet101_v1b': [64, 128, 256, 512],
23 |     'resnet50_v1d': [64, 128, 256, 512],
24 |     'resnet101_v1d': [64, 128, 256, 512]
25 | }
26 | 
27 | 
28 | def create_resotre_op(scope, mxnet_weights_path):
29 | 
30 |     mxnetName_array_dict = read_mxnet_weights(mxnet_weights_path, show=False)
31 | 
32 |     tf_mxnet_map, mxnet_tf_map = \
33 |         weights_map.get_map(scope=scope,
34 |                             bottleneck_nums=BottleNeck_NUM_DICT[scope], show_mxnettf=False, show_tfmxnet=False)
35 | 
36 |     tf_model_vars = slim.get_model_variables(scope)
37 | 
38 |     # # check name and var
39 |     check_mxnet_names(mxnet_tf_map, mxnetName_array_dict=mxnetName_array_dict)
40 |     check_tf_vars(tf_mxnet_map, mxnetName_array_dict, tf_model_vars, scope=scope)
41 |     # #
42 | 
43 |     assign_ops = []
44 | 
45 |     for var in tf_model_vars:
46 |         name = var.op.name.split('%s/' % scope)[1]
47 |         new_val = tf.constant(mxnetName_array_dict[tf_mxnet_map[name]])
48 |         sub_assign_op = tf.assign(var, value=new_val)
49 | 
50 |         assign_ops.append(sub_assign_op)
51 | 
52 |     assign_op = tf.group(*assign_ops)
53 | 
54 |     return assign_op
55 | 
56 | 
57 | def build_resnet(img_batch=None, scope='resnet50_v1d', is_training=True, freeze_norm=False, num_cls=1000):
58 |     if img_batch is None:
59 |         np.random.seed(30)
60 |         img_batch = np.random.rand(1, 224, 224, 3)  # H, W, C
61 |         img_batch = tf.constant(img_batch, dtype=tf.float32)
62 | 
63 |     print("Please Ensure the img is in NHWC")
64 | 
65 |     if scope.endswith('b'):
66 |         get_resnet_fn = get_resnet_v1_b
67 |     elif scope.endswith('d'):
68 |         get_resnet_fn = get_resnet_v1_d
69 | 
70 |     logits = get_resnet_fn(input_x=img_batch, scope=scope,
71 |                            bottleneck_nums=BottleNeck_NUM_DICT[scope],
72 |                            base_channels=BASE_CHANNELS_DICT[scope],
73 |                            is_training=is_training, freeze_norm=freeze_norm, num_cls=num_cls)
74 | 
75 |     return logits
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     build_resnet()
80 |     create_resotre_op()
81 | 


--------------------------------------------------------------------------------
/libs/gluon2TF/resnet/some_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import mxnet as mx
 4 | from mxnet.gluon import nn
 5 | from mxnet import ndarray as nd
 6 | import tensorflow.contrib.slim as slim
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | from test_resnet import mxnet_process_img
10 | # 卷积层
11 | # 输入输出的数据格式是： batch * channel * height * width
12 | # 权重格式：output_channels * in_channels * height * width
13 | np.random.seed(30)
14 | 
15 | # w = nd.array(np.random.rand(2, 3, 3, 3))
16 | w = nd.load('/home/yjr/MxNet_Codes/gluon-cv/scripts/gloun2TF/mxnet_weights/resnet50_v1b-0ecdba34.params')['conv1.weight']  # [64, 3, 7, 7]
17 | # w = nd.arange(9*2).reshape((2, 1, 3, 3))
18 | data = nd.array(np.random.rand(1, 3, 224, 224))
19 | # data, _ = mxnet_process_img('../demo_img/person.jpg')
20 | # data = nd.arange(6*6).reshape((1, 1, 6, 6))
21 | 
22 | # 卷积运算
23 | out = nd.Convolution(data, w, no_bias=True,
24 |                      kernel=(7, 7),
25 |                      stride=(2, 2),
26 |                      num_filter=64,
27 |                      pad=(3, 3))
28 | 
29 | 
30 | 
31 | def tf_conv(data, w):
32 | 
33 |     data = tf.constant(data.asnumpy())
34 |     data = tf.pad(data, paddings=[[0, 0], [0, 0], [3, 3], [3, 3]])
35 |     tf_out = slim.conv2d(data, num_outputs=64, kernel_size=[7, 7], padding='VALID', stride=2,
36 |                          biases_initializer=None, data_format='NCHW', normalizer_fn=None, activation_fn=None)
37 |     tf_w = tf.constant(np.transpose(w.asnumpy(), [2, 3, 1, 0]))
38 |     # tf_w =
39 |     model_vars = slim.get_model_variables()
40 |     assign_op = tf.assign(model_vars[0], tf_w)
41 | 
42 |     with tf.Session() as sess:
43 |         sess.run(assign_op)
44 |         print(sess.run(tf_out))
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     tf_conv(data, w=w)
49 |     print "mxnet_out: ", out
50 |     print 20 * "+"


--------------------------------------------------------------------------------
/libs/gluon2TF/resnet/test_resnet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import, division, print_function
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow.contrib.slim as slim
  7 | 
  8 | from mxnet import nd, image
  9 | import numpy as np
 10 | from mxnet.gluon.data.vision import transforms
 11 | from gluoncv.model_zoo import get_model
 12 | from gluoncv.data.transforms.presets.imagenet import transform_eval
 13 | from resnet import build_resnet, create_resotre_op
 14 | from resnet_utils import DEBUG
 15 | import os
 16 | 
 17 | 
 18 | # MODEL_NAME = 'resnet50_v1d'
 19 | # Mxnet_Weights_PATH = '../mxnet_weights/resnet50_v1d-117a384e.params'
 20 | 
 21 | 
 22 | MODEL_NAME = 'resnet101_v1b'
 23 | # Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1d-1b2b825f.params'
 24 | Mxnet_Weights_PATH = '../mxnet_weights/resnet101_v1b-a455932a.params'
 25 | 
 26 | def mxnet_process_img(path):
 27 |     # Load Images
 28 |     img = image.imread(path)
 29 | 
 30 |     # Transform
 31 |     img = transform_eval(img)
 32 |     img_arr = img.asnumpy()
 33 |     if len(img_arr) == 3:
 34 |         img_arr = np.expand_dims(img_arr, axis=0)
 35 |     img_tf_tensor = tf.constant(img_arr)
 36 | 
 37 |     # np.random.seed(30)
 38 |     # img = nd.array(np.random.randn(1, 3, 600, 800))
 39 |     # img_tf_tensor = tf.constant(img.asnumpy())
 40 |     img_tf_tensor = tf.transpose(img_tf_tensor, [0, 2, 3, 1])
 41 |     return img, img_tf_tensor
 42 | 
 43 | #
 44 | def mxnet_infer(img):
 45 | 
 46 |     model_name = MODEL_NAME
 47 |     net = get_model(model_name, pretrained=False)
 48 |     net.load_parameters(Mxnet_Weights_PATH)
 49 |     pred = net(img)
 50 | 
 51 |     # print (pred.shape, pred.dtype)
 52 |     pred = pred.asnumpy()
 53 |     return pred
 54 | 
 55 | 
 56 | def tf_infer(img, save_ckpt=True, restore_from_tfckpt=False, ckpt_path=None):
 57 | 
 58 |     pred_tensor = build_resnet(img_batch=img, scope=MODEL_NAME,
 59 |                                is_training=False, freeze_norm=True, num_cls=1000)
 60 | 
 61 |     if restore_from_tfckpt:
 62 |         print("restore weights from tf_CKPT")
 63 |         assert not ckpt_path is None, "ckpt_path is None, Erro"
 64 |         restore_op = tf.train.Saver()
 65 |     else:
 66 |         print('restore weights from MxnetWeights')
 67 |         restore_op = create_resotre_op(MODEL_NAME, Mxnet_Weights_PATH)
 68 | 
 69 |     if DEBUG:
 70 |         from resnet_utils import debug_dict
 71 |         print (debug_dict)
 72 |         assert len(debug_dict) >=3, "debug_dict size erro, len is :{}".format(len(debug_dict))
 73 | 
 74 |     if save_ckpt:
 75 |         save_dir = '../tf_ckpts'
 76 |         if not os.path.exists(save_dir):
 77 |             os.mkdir(save_dir)
 78 |         saver = tf.train.Saver(max_to_keep=30)
 79 |         save_ckpt = os.path.join(save_dir, '%s.ckpt' % MODEL_NAME)
 80 | 
 81 |     with tf.Session() as sess:
 82 |         if restore_from_tfckpt:
 83 |             restore_op.restore(sess, ckpt_path)
 84 |         else:
 85 |             sess.run(restore_op)
 86 |         if DEBUG:
 87 |             name_val = {}
 88 |             for name in debug_dict.keys():
 89 |                 name_val[name] = sess.run(debug_dict[name])
 90 |         pred = sess.run(pred_tensor)
 91 |         if save_ckpt:
 92 |             saver.save(sess, save_ckpt)
 93 | 
 94 |     return pred
 95 | 
 96 | 
 97 | def cal_erro(img_path, use_tf_ckpt=False, ckpt_path=None, save_ckpt=False):
 98 | 
 99 |     mxnet_img, tf_img = mxnet_process_img(img_path)
100 | 
101 |     mxnet_pred = mxnet_infer(mxnet_img)
102 | 
103 |     mxnet_pred = np.squeeze(mxnet_pred, axis=0)
104 |     tf_pred = tf_infer(tf_img, restore_from_tfckpt=use_tf_ckpt, ckpt_path=ckpt_path, save_ckpt=save_ckpt)
105 |     tf_pred = np.squeeze(tf_pred, axis=0)
106 |     assert mxnet_pred.shape == tf_pred.shape, "mxnet_pred shape Do Not equal with tf_pred shape"
107 | 
108 |     argmax_mxnet = np.argmax(mxnet_pred)
109 |     argmax_tf = np.argmax(tf_pred)
110 | 
111 |     erro = np.linalg.norm(tf_pred-mxnet_pred)
112 |     for i, (m, t) in enumerate(zip(mxnet_pred, tf_pred)):
113 |         if i == 5:
114 |             break
115 |         print ("mxnet|tf==>>{} | {} ".format(m, t))
116 | 
117 |     print ('total_erro-->', erro)
118 |     print ('erro_rate-->', erro/np.linalg.norm(mxnet_pred))
119 |     print ("argmax_mxnet: {} || tf_argmx: {}".format(argmax_mxnet, argmax_tf))
120 | 
121 | 
122 | if __name__ == '__main__':
123 | 
124 |     # cal_erro(img_path='../demo_img/person.jpg',
125 |     #          use_tf_ckpt=False,
126 |     #          ckpt_path=None,
127 |     #          save_ckpt=True)
128 |     cal_erro(img_path='../demo_img/person.jpg',
129 |              use_tf_ckpt=True,
130 |              ckpt_path='../tf_ckpts/%s.ckpt' % MODEL_NAME,
131 |              save_ckpt=False)
132 |     print (20*"++")
133 | 


--------------------------------------------------------------------------------
/libs/label_name_dict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/label_name_dict/__init__.py


--------------------------------------------------------------------------------
/libs/label_name_dict/coco_dict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | class_names = [
 6 |     'back_ground', 'person', 'bicycle', 'car', 'motorcycle',
 7 |     'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 8 |     'fire hydrant', 'stop sign', 'parking meter', 'bench',
 9 |     'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
10 |     'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
11 |     'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
12 |     'sports ball', 'kite', 'baseball bat', 'baseball glove',
13 |     'skateboard', 'surfboard', 'tennis racket', 'bottle',
14 |     'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
15 |     'banana', 'apple', 'sandwich', 'orange', 'broccoli',
16 |     'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
17 |     'couch', 'potted plant', 'bed', 'dining table', 'toilet',
18 |     'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
19 |     'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
20 |     'book', 'clock', 'vase', 'scissors', 'teddy bear',
21 |     'hair drier', 'toothbrush']
22 | 
23 | 
24 | classes_originID = {
25 |     'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4,
26 |     'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
27 |     'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13,
28 |     'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
29 |     'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22,
30 |     'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
31 |     'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
32 |     'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
33 |     'kite': 38, 'baseball bat': 39, 'baseball glove': 40,
34 |     'skateboard': 41, 'surfboard': 42, 'tennis racket': 43,
35 |     'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48,
36 |     'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53,
37 |     'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57,
38 |     'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61,
39 |     'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65,
40 |     'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73,
41 |     'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77,
42 |     'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
43 |     'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86,
44 |     'scissors': 87, 'teddy bear': 88, 'hair drier': 89,
45 |     'toothbrush': 90}
46 | 
47 | originID_classes = {item: key for key, item in classes_originID.items()}
48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names))))
49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names))
50 | 
51 | # print (originID_classes)
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/libs/label_name_dict/label_dict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | 
  4 | from libs.configs import cfgs
  5 | 
  6 | 
  7 | class_names = [
  8 |         'back_ground', 'person', 'bicycle', 'car', 'motorcycle',
  9 |         'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 10 |         'fire hydrant', 'stop sign', 'parking meter', 'bench',
 11 |         'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
 12 |         'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
 13 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
 14 |         'sports ball', 'kite', 'baseball bat', 'baseball glove',
 15 |         'skateboard', 'surfboard', 'tennis racket', 'bottle',
 16 |         'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 17 |         'banana', 'apple', 'sandwich', 'orange', 'broccoli',
 18 |         'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
 19 |         'couch', 'potted plant', 'bed', 'dining table', 'toilet',
 20 |         'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 21 |         'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
 22 |         'book', 'clock', 'vase', 'scissors', 'teddy bear',
 23 |         'hair drier', 'toothbrush']
 24 | 
 25 | classes_originID = {
 26 |     'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4,
 27 |     'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
 28 |     'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13,
 29 |     'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
 30 |     'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22,
 31 |     'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
 32 |     'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
 33 |     'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
 34 |     'kite': 38, 'baseball bat': 39, 'baseball glove': 40,
 35 |     'skateboard': 41, 'surfboard': 42, 'tennis racket': 43,
 36 |     'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48,
 37 |     'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53,
 38 |     'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57,
 39 |     'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61,
 40 |     'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65,
 41 |     'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73,
 42 |     'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77,
 43 |     'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
 44 |     'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86,
 45 |     'scissors': 87, 'teddy bear': 88, 'hair drier': 89,
 46 |     'toothbrush': 90}
 47 | 
 48 | 
 49 | def get_coco_label_dict():
 50 |     originID_classes = {item: key for key, item in classes_originID.items()}
 51 |     NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names))))
 52 |     return NAME_LABEL_MAP
 53 | 
 54 | if cfgs.DATASET_NAME == 'ship':
 55 |     NAME_LABEL_MAP = {
 56 |         'back_ground': 0,
 57 |         'ship': 1
 58 |     }
 59 | elif cfgs.DATASET_NAME == 'aeroplane':
 60 |     NAME_LABEL_MAP = {
 61 |         'back_ground': 0,
 62 |         'aeroplane': 1
 63 |     }
 64 | elif cfgs.DATASET_NAME == 'WIDER':
 65 |     NAME_LABEL_MAP = {
 66 |         'back_ground': 0,
 67 |         'face': 1
 68 |     }
 69 | elif cfgs.DATASET_NAME == 'icdar':
 70 |     NAME_LABEL_MAP = {
 71 |         'back_ground': 0,
 72 |         'text': 1
 73 |     }
 74 | elif cfgs.DATASET_NAME.startswith('DOTA'):
 75 |     NAME_LABEL_MAP = {
 76 |         'back_ground': 0,
 77 |         'roundabout': 1,
 78 |         'tennis-court': 2,
 79 |         'swimming-pool': 3,
 80 |         'storage-tank': 4,
 81 |         'soccer-ball-field': 5,
 82 |         'small-vehicle': 6,
 83 |         'ship': 7,
 84 |         'plane': 8,
 85 |         'large-vehicle': 9,
 86 |         'helicopter': 10,
 87 |         'harbor': 11,
 88 |         'ground-track-field': 12,
 89 |         'bridge': 13,
 90 |         'basketball-court': 14,
 91 |         'baseball-diamond': 15
 92 |     }
 93 | elif cfgs.DATASET_NAME.startswith('DOAI2019'):
 94 |     NAME_LABEL_MAP = {
 95 |         'back_ground': 0,
 96 |         'turntable': 1,
 97 |         'tennis-court': 2,
 98 |         'swimming-pool': 3,
 99 |         'storage-tank': 4,
100 |         'soccer-ball-field': 5,
101 |         'small-vehicle': 6,
102 |         'ship': 7,
103 |         'plane': 8,
104 |         'large-vehicle': 9,
105 |         'helicopter': 10,
106 |         'harbor': 11,
107 |         'ground-track-field': 12,
108 |         'bridge': 13,
109 |         'basketball-court': 14,
110 |         'baseball-diamond': 15,
111 |         'container-crane': 16
112 |     }
113 | elif cfgs.DATASET_NAME == 'coco':
114 |     NAME_LABEL_MAP = get_coco_label_dict()
115 | elif cfgs.DATASET_NAME == 'pascal':
116 |     NAME_LABEL_MAP = {
117 |         'back_ground': 0,
118 |         'aeroplane': 1,
119 |         'bicycle': 2,
120 |         'bird': 3,
121 |         'boat': 4,
122 |         'bottle': 5,
123 |         'bus': 6,
124 |         'car': 7,
125 |         'cat': 8,
126 |         'chair': 9,
127 |         'cow': 10,
128 |         'diningtable': 11,
129 |         'dog': 12,
130 |         'horse': 13,
131 |         'motorbike': 14,
132 |         'person': 15,
133 |         'pottedplant': 16,
134 |         'sheep': 17,
135 |         'sofa': 18,
136 |         'train': 19,
137 |         'tvmonitor': 20
138 |     }
139 | elif cfgs.DATASET_NAME == 'bdd100k':
140 |     NAME_LABEL_MAP = {
141 |         'back_ground': 0,
142 |         'bus': 1,
143 |         'traffic light': 2,
144 |         'traffic sign': 3,
145 |         'person': 4,
146 |         'bike': 5,
147 |         'truck': 6,
148 |         'motor': 7,
149 |         'car': 8, 
150 |         'train': 9,
151 |         'rider': 10
152 |     }
153 | else:
154 |     assert 'please set label dict!'
155 | 
156 | 
157 | def get_label_name_map():
158 |     reverse_dict = {}
159 |     for name, label in NAME_LABEL_MAP.items():
160 |         reverse_dict[label] = name
161 |     return reverse_dict
162 | 
163 | 
164 | LABEl_NAME_MAP = get_label_name_map()


--------------------------------------------------------------------------------
/libs/label_name_dict/remote_sensing_dict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | NAME_LABEL_MAP = {
 4 |     'back_ground': 0,
 5 |     'building': 1
 6 | }
 7 | 
 8 | 
 9 | def get_label_name_map():
10 |     reverse_dict = {}
11 |     for name, label in NAME_LABEL_MAP.items():
12 |         reverse_dict[label] = name
13 |     return reverse_dict
14 | 
15 | LABEl_NAME_MAP = get_label_name_map()


--------------------------------------------------------------------------------
/libs/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/losses/__init__.py


--------------------------------------------------------------------------------
/libs/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/__init__.py


--------------------------------------------------------------------------------
/libs/networks/mobilenet/README.md:
--------------------------------------------------------------------------------
 1 | # Mobilenet V2
 2 | This folder contains building code for Mobilenet V2, based on
 3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation]
 4 | (https://arxiv.org/abs/1801.04381)
 5 | 
 6 | # Pretrained model
 7 | TODO
 8 | 
 9 | # Example
10 | TODO
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/libs/networks/mobilenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/mobilenet/__init__.py


--------------------------------------------------------------------------------
/libs/networks/mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import, print_function, division
  4 | import tensorflow.contrib.slim as slim
  5 | import tensorflow as tf
  6 | 
  7 | from libs.networks.mobilenet import mobilenet_v2
  8 | from libs.networks.mobilenet.mobilenet import training_scope
  9 | from libs.networks.mobilenet.mobilenet_v2 import op
 10 | from libs.networks.mobilenet.mobilenet_v2 import ops
 11 | expand_input = ops.expand_input_by_factor
 12 | 
 13 | V2_BASE_DEF = dict(
 14 |     defaults={
 15 |         # Note: these parameters of batch norm affect the architecture
 16 |         # that's why they are here and not in training_scope.
 17 |         (slim.batch_norm,): {'center': True, 'scale': True},
 18 |         (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
 19 |             'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
 20 |         },
 21 |         (ops.expanded_conv,): {
 22 |             'expansion_size': expand_input(6),
 23 |             'split_expansion': 1,
 24 |             'normalizer_fn': slim.batch_norm,
 25 |             'residual': True
 26 |         },
 27 |         (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
 28 |     },
 29 |     spec=[
 30 |         op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
 31 |         op(ops.expanded_conv,
 32 |            expansion_size=expand_input(1, divisible_by=1),
 33 |            num_outputs=16, scope='expanded_conv'),
 34 |         op(ops.expanded_conv, stride=2, num_outputs=24, scope='expanded_conv_1'),
 35 |         op(ops.expanded_conv, stride=1, num_outputs=24, scope='expanded_conv_2'),
 36 |         op(ops.expanded_conv, stride=2, num_outputs=32, scope='expanded_conv_3'),
 37 |         op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_4'),
 38 |         op(ops.expanded_conv, stride=1, num_outputs=32, scope='expanded_conv_5'),
 39 |         op(ops.expanded_conv, stride=2, num_outputs=64, scope='expanded_conv_6'),
 40 |         op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_7'),
 41 |         op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_8'),
 42 |         op(ops.expanded_conv, stride=1, num_outputs=64, scope='expanded_conv_9'),
 43 |         op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_10'),
 44 |         op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_11'),
 45 |         op(ops.expanded_conv, stride=1, num_outputs=96, scope='expanded_conv_12')
 46 |     ],
 47 | )
 48 | 
 49 | 
 50 | V2_HEAD_DEF = dict(
 51 |     defaults={
 52 |         # Note: these parameters of batch norm affect the architecture
 53 |         # that's why they are here and not in training_scope.
 54 |         (slim.batch_norm,): {'center': True, 'scale': True},
 55 |         (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
 56 |             'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
 57 |         },
 58 |         (ops.expanded_conv,): {
 59 |             'expansion_size': expand_input(6),
 60 |             'split_expansion': 1,
 61 |             'normalizer_fn': slim.batch_norm,
 62 |             'residual': True
 63 |         },
 64 |         (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
 65 |     },
 66 |     spec=[
 67 |         op(ops.expanded_conv, stride=2, num_outputs=160, scope='expanded_conv_13'),
 68 |         op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_14'),
 69 |         op(ops.expanded_conv, stride=1, num_outputs=160, scope='expanded_conv_15'),
 70 |         op(ops.expanded_conv, stride=1, num_outputs=320, scope='expanded_conv_16'),
 71 |         op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280, scope='Conv_1')
 72 |     ],
 73 | )
 74 | def mobilenetv2_scope(is_training=True,
 75 |                       trainable=True,
 76 |                       weight_decay=0.00004,
 77 |                       stddev=0.09,
 78 |                       dropout_keep_prob=0.8,
 79 |                       bn_decay=0.997):
 80 |   """Defines Mobilenet training scope.
 81 |   In default. We do not use BN
 82 | 
 83 |   ReWrite the scope.
 84 |   """
 85 |   batch_norm_params = {
 86 |       'is_training': False,
 87 |       'trainable': False,
 88 |       'decay': bn_decay,
 89 |   }
 90 |   with slim.arg_scope(training_scope(is_training=is_training, weight_decay=weight_decay)):
 91 |       with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_conv2d],
 92 |                           trainable=trainable):
 93 |           with slim.arg_scope([slim.batch_norm], **batch_norm_params) as sc:
 94 |               return sc
 95 | 
 96 | 
 97 | 
 98 | def mobilenetv2_base(img_batch, is_training=True):
 99 | 
100 |     with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)):
101 | 
102 |         feature_to_crop, endpoints = mobilenet_v2.mobilenet_base(input_tensor=img_batch,
103 |                                                       num_classes=None,
104 |                                                       is_training=False,
105 |                                                       depth_multiplier=1.0,
106 |                                                       scope='MobilenetV2',
107 |                                                       conv_defs=V2_BASE_DEF,
108 |                                                       finegrain_classification_mode=False)
109 | 
110 |         # feature_to_crop = tf.Print(feature_to_crop, [tf.shape(feature_to_crop)], summarize=10, message='rpn_shape')
111 |         return feature_to_crop
112 | 
113 | 
114 | def mobilenetv2_head(inputs, is_training=True):
115 |     with slim.arg_scope(mobilenetv2_scope(is_training=is_training, trainable=True)):
116 |         net, _ = mobilenet_v2.mobilenet(input_tensor=inputs,
117 |                                         num_classes=None,
118 |                                         is_training=False,
119 |                                         depth_multiplier=1.0,
120 |                                         scope='MobilenetV2',
121 |                                         conv_defs=V2_HEAD_DEF,
122 |                                         finegrain_classification_mode=False)
123 | 
124 |         net = tf.squeeze(net, [1, 2])
125 | 
126 |         return net


--------------------------------------------------------------------------------
/libs/networks/ops.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | 
 5 | def norm(x, norm_type, is_train, G=32, esp=1e-5):
 6 |     with tf.variable_scope('{}_norm'.format(norm_type)):
 7 |         if norm_type == 'none':
 8 |             output = x
 9 |         elif norm_type == 'batch':
10 |             output = tf.contrib.layers.batch_norm(
11 |                 x, center=True, scale=True, decay=0.999,
12 |                 is_training=is_train, updates_collections=None
13 |             )
14 |         elif norm_type == 'group':
15 |             # normalize
16 |             # tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper
17 |             x = tf.transpose(x, [0, 3, 1, 2])
18 |             N, C, H, W = x.get_shape().as_list()
19 |             G = min(G, C)
20 |             x = tf.reshape(x, [-1, G, C // G, H, W])
21 |             mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
22 |             x = (x - mean) / tf.sqrt(var + esp)
23 |             # per channel gamma and beta
24 |             gamma = tf.Variable(tf.constant(1.0, shape=[C]), dtype=tf.float32, name='gamma')
25 |             beta = tf.Variable(tf.constant(0.0, shape=[C]), dtype=tf.float32, name='beta')
26 |             gamma = tf.reshape(gamma, [1, C, 1, 1])
27 |             beta = tf.reshape(beta, [1, C, 1, 1])
28 | 
29 |             output = tf.reshape(x, [-1, C, H, W]) * gamma + beta
30 |             # tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper
31 |             output = tf.transpose(output, [0, 2, 3, 1])
32 |         else:
33 |             raise NotImplementedError
34 |     return output
35 | 
36 | 
37 | def lrelu(x, leak=0.2, name="lrelu"):
38 |     with tf.variable_scope(name):
39 |         f1 = 0.5 * (1 + leak)
40 |         f2 = 0.5 * (1 - leak)
41 |         return f1 * x + f2 * abs(x)
42 | 
43 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/alexnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a model definition for AlexNet.
 16 | 
 17 | This work was first described in:
 18 |   ImageNet Classification with Deep Convolutional Neural Networks
 19 |   Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
 20 | 
 21 | and later refined in:
 22 |   One weird trick for parallelizing convolutional neural networks
 23 |   Alex Krizhevsky, 2014
 24 | 
 25 | Here we provide the implementation proposed in "One weird trick" and not
 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed.
 27 | 
 28 | Usage:
 29 |   with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
 30 |     outputs, end_points = alexnet.alexnet_v2(inputs)
 31 | 
 32 | @@alexnet_v2
 33 | """
 34 | 
 35 | from __future__ import absolute_import
 36 | from __future__ import division
 37 | from __future__ import print_function
 38 | 
 39 | import tensorflow as tf
 40 | 
 41 | slim = tf.contrib.slim
 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 43 | 
 44 | 
 45 | def alexnet_v2_arg_scope(weight_decay=0.0005):
 46 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 47 |                       activation_fn=tf.nn.relu,
 48 |                       biases_initializer=tf.constant_initializer(0.1),
 49 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
 50 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 51 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 52 |         return arg_sc
 53 | 
 54 | 
 55 | def alexnet_v2(inputs,
 56 |                num_classes=1000,
 57 |                is_training=True,
 58 |                dropout_keep_prob=0.5,
 59 |                spatial_squeeze=True,
 60 |                scope='alexnet_v2'):
 61 |   """AlexNet version 2.
 62 | 
 63 |   Described in: http://arxiv.org/pdf/1404.5997v2.pdf
 64 |   Parameters from:
 65 |   github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
 66 |   layers-imagenet-1gpu.cfg
 67 | 
 68 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 69 |         To use in classification mode, resize input to 224x224. To use in fully
 70 |         convolutional mode, set spatial_squeeze to false.
 71 |         The LRN layers have been removed and change the initializers from
 72 |         random_normal_initializer to xavier_initializer.
 73 | 
 74 |   Args:
 75 |     inputs: a tensor of size [batch_size, height, width, channels].
 76 |     num_classes: number of predicted classes.
 77 |     is_training: whether or not the model is being trained.
 78 |     dropout_keep_prob: the probability that activations are kept in the dropout
 79 |       layers during training.
 80 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 81 |       outputs. Useful to remove unnecessary dimensions for classification.
 82 |     scope: Optional scope for the variables.
 83 | 
 84 |   Returns:
 85 |     the last op containing the log predictions and end_points dict.
 86 |   """
 87 |   with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
 88 |     end_points_collection = sc.name + '_end_points'
 89 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
 90 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
 91 |                         outputs_collections=[end_points_collection]):
 92 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 93 |                         scope='conv1')
 94 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
 95 |       net = slim.conv2d(net, 192, [5, 5], scope='conv2')
 96 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
 97 |       net = slim.conv2d(net, 384, [3, 3], scope='conv3')
 98 |       net = slim.conv2d(net, 384, [3, 3], scope='conv4')
 99 |       net = slim.conv2d(net, 256, [3, 3], scope='conv5')
100 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
101 | 
102 |       # Use conv2d instead of fully_connected layers.
103 |       with slim.arg_scope([slim.conv2d],
104 |                           weights_initializer=trunc_normal(0.005),
105 |                           biases_initializer=tf.constant_initializer(0.1)):
106 |         net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
107 |                           scope='fc6')
108 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
109 |                            scope='dropout6')
110 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
111 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
112 |                            scope='dropout7')
113 |         net = slim.conv2d(net, num_classes, [1, 1],
114 |                           activation_fn=None,
115 |                           normalizer_fn=None,
116 |                           biases_initializer=tf.zeros_initializer(),
117 |                           scope='fc8')
118 | 
119 |       # Convert end_points_collection into a end_point dict.
120 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
121 |       if spatial_squeeze:
122 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
123 |         end_points[sc.name + '/fc8'] = net
124 |       return net, end_points
125 | alexnet_v2.default_image_size = 224
126 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/cifarnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a variant of the CIFAR-10 model definition."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | slim = tf.contrib.slim
 24 | 
 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)
 26 | 
 27 | 
 28 | def cifarnet(images, num_classes=10, is_training=False,
 29 |              dropout_keep_prob=0.5,
 30 |              prediction_fn=slim.softmax,
 31 |              scope='CifarNet'):
 32 |   """Creates a variant of the CifarNet model.
 33 | 
 34 |   Note that since the output is a set of 'logits', the values fall in the
 35 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
 36 |   probability distribution over the characters, one will need to convert them
 37 |   using the softmax function:
 38 | 
 39 |         logits = cifarnet.cifarnet(images, is_training=False)
 40 |         probabilities = tf.nn.softmax(logits)
 41 |         predictions = tf.argmax(logits, 1)
 42 | 
 43 |   Args:
 44 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
 45 |     num_classes: the number of classes in the dataset.
 46 |     is_training: specifies whether or not we're currently training the model.
 47 |       This variable will determine the behaviour of the dropout layer.
 48 |     dropout_keep_prob: the percentage of activation values that are retained.
 49 |     prediction_fn: a function to get predictions out of logits.
 50 |     scope: Optional variable_scope.
 51 | 
 52 |   Returns:
 53 |     logits: the pre-softmax activations, a tensor of size
 54 |       [batch_size, `num_classes`]
 55 |     end_points: a dictionary from components of the network to the corresponding
 56 |       activation.
 57 |   """
 58 |   end_points = {}
 59 | 
 60 |   with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):
 61 |     net = slim.conv2d(images, 64, [5, 5], scope='conv1')
 62 |     end_points['conv1'] = net
 63 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
 64 |     end_points['pool1'] = net
 65 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
 66 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
 67 |     end_points['conv2'] = net
 68 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
 69 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
 70 |     end_points['pool2'] = net
 71 |     net = slim.flatten(net)
 72 |     end_points['Flatten'] = net
 73 |     net = slim.fully_connected(net, 384, scope='fc3')
 74 |     end_points['fc3'] = net
 75 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
 76 |                        scope='dropout3')
 77 |     net = slim.fully_connected(net, 192, scope='fc4')
 78 |     end_points['fc4'] = net
 79 |     logits = slim.fully_connected(net, num_classes,
 80 |                                   biases_initializer=tf.zeros_initializer(),
 81 |                                   weights_initializer=trunc_normal(1/192.0),
 82 |                                   weights_regularizer=None,
 83 |                                   activation_fn=None,
 84 |                                   scope='logits')
 85 | 
 86 |     end_points['Logits'] = logits
 87 |     end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
 88 | 
 89 |   return logits, end_points
 90 | cifarnet.default_image_size = 32
 91 | 
 92 | 
 93 | def cifarnet_arg_scope(weight_decay=0.004):
 94 |   """Defines the default cifarnet argument scope.
 95 | 
 96 |   Args:
 97 |     weight_decay: The weight decay to use for regularizing the model.
 98 | 
 99 |   Returns:
100 |     An `arg_scope` to use for the inception v3 model.
101 |   """
102 |   with slim.arg_scope(
103 |       [slim.conv2d],
104 |       weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
105 |       activation_fn=tf.nn.relu):
106 |     with slim.arg_scope(
107 |         [slim.fully_connected],
108 |         biases_initializer=tf.constant_initializer(0.1),
109 |         weights_initializer=trunc_normal(0.04),
110 |         weights_regularizer=slim.l2_regularizer(weight_decay),
111 |         activation_fn=tf.nn.relu) as sc:
112 |       return sc
113 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings all inception models under one namespace."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | from nets.inception_resnet_v2 import inception_resnet_v2_base
25 | from nets.inception_v1 import inception_v1
26 | from nets.inception_v1 import inception_v1_arg_scope
27 | from nets.inception_v1 import inception_v1_base
28 | from nets.inception_v2 import inception_v2
29 | from nets.inception_v2 import inception_v2_arg_scope
30 | from nets.inception_v2 import inception_v2_base
31 | from nets.inception_v3 import inception_v3
32 | from nets.inception_v3 import inception_v3_arg_scope
33 | from nets.inception_v3 import inception_v3_base
34 | from nets.inception_v4 import inception_v4
35 | from nets.inception_v4 import inception_v4_arg_scope
36 | from nets.inception_v4 import inception_v4_base
37 | # pylint: enable=unused-import
38 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 | 
17 | Usage of arg scope:
18 |   with slim.arg_scope(inception_arg_scope()):
19 |     logits, end_points = inception.inception_v3(images, num_classes,
20 |                                                 is_training=is_training)
21 | 
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def inception_arg_scope(weight_decay=0.00004,
33 |                         use_batch_norm=True,
34 |                         batch_norm_decay=0.9997,
35 |                         batch_norm_epsilon=0.001):
36 |   """Defines the default arg scope for inception models.
37 | 
38 |   Args:
39 |     weight_decay: The weight decay to use for regularizing the model.
40 |     use_batch_norm: "If `True`, batch_norm is applied after each convolution.
41 |     batch_norm_decay: Decay for batch norm moving average.
42 |     batch_norm_epsilon: Small float added to variance to avoid dividing by zero
43 |       in batch norm.
44 | 
45 |   Returns:
46 |     An `arg_scope` to use for the inception models.
47 |   """
48 |   batch_norm_params = {
49 |       # Decay for the moving averages.
50 |       'decay': batch_norm_decay,
51 |       # epsilon to prevent 0s in variance.
52 |       'epsilon': batch_norm_epsilon,
53 |       # collection containing update_ops.
54 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
55 |   }
56 |   if use_batch_norm:
57 |     normalizer_fn = slim.batch_norm
58 |     normalizer_params = batch_norm_params
59 |   else:
60 |     normalizer_fn = None
61 |     normalizer_params = {}
62 |   # Set weight_decay for weights in Conv and FC layers.
63 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
64 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
65 |     with slim.arg_scope(
66 |         [slim.conv2d],
67 |         weights_initializer=slim.variance_scaling_initializer(),
68 |         activation_fn=tf.nn.relu,
69 |         normalizer_fn=normalizer_fn,
70 |         normalizer_params=normalizer_params) as sc:
71 |       return sc
72 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/lenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the LeNet model definition."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def lenet(images, num_classes=10, is_training=False,
27 |           dropout_keep_prob=0.5,
28 |           prediction_fn=slim.softmax,
29 |           scope='LeNet'):
30 |   """Creates a variant of the LeNet model.
31 | 
32 |   Note that since the output is a set of 'logits', the values fall in the
33 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
34 |   probability distribution over the characters, one will need to convert them
35 |   using the softmax function:
36 | 
37 |         logits = lenet.lenet(images, is_training=False)
38 |         probabilities = tf.nn.softmax(logits)
39 |         predictions = tf.argmax(logits, 1)
40 | 
41 |   Args:
42 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
43 |     num_classes: the number of classes in the dataset.
44 |     is_training: specifies whether or not we're currently training the model.
45 |       This variable will determine the behaviour of the dropout layer.
46 |     dropout_keep_prob: the percentage of activation values that are retained.
47 |     prediction_fn: a function to get predictions out of logits.
48 |     scope: Optional variable_scope.
49 | 
50 |   Returns:
51 |     logits: the pre-softmax activations, a tensor of size
52 |       [batch_size, `num_classes`]
53 |     end_points: a dictionary from components of the network to the corresponding
54 |       activation.
55 |   """
56 |   end_points = {}
57 | 
58 |   with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
59 |     net = slim.conv2d(images, 32, [5, 5], scope='conv1')
60 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
61 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
62 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
63 |     net = slim.flatten(net)
64 |     end_points['Flatten'] = net
65 | 
66 |     net = slim.fully_connected(net, 1024, scope='fc3')
67 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
68 |                        scope='dropout3')
69 |     logits = slim.fully_connected(net, num_classes, activation_fn=None,
70 |                                   scope='fc4')
71 | 
72 |   end_points['Logits'] = logits
73 |   end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
74 | 
75 |   return logits, end_points
76 | lenet.default_image_size = 28
77 | 
78 | 
79 | def lenet_arg_scope(weight_decay=0.0):
80 |   """Defines the default lenet argument scope.
81 | 
82 |   Args:
83 |     weight_decay: The weight decay to use for regularizing the model.
84 | 
85 |   Returns:
86 |     An `arg_scope` to use for the inception v3 model.
87 |   """
88 |   with slim.arg_scope(
89 |       [slim.conv2d, slim.fully_connected],
90 |       weights_regularizer=slim.l2_regularizer(weight_decay),
91 |       weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
92 |       activation_fn=tf.nn.relu) as sc:
93 |     return sc
94 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.md:
--------------------------------------------------------------------------------
 1 | # MobileNet_v1
 2 | 
 3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
 4 | 
 5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature.
 6 | 
 7 | ![alt text](mobilenet_v1.png "MobileNet Graph")
 8 | 
 9 | # Pre-trained Models
10 | 
11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the
12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/)
13 | image classification dataset. Accuracies were computed by evaluating using a single image crop.
14 | 
15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy |
16 | :----:|:------------:|:----------:|:-------:|:-------:|
17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5|
18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9|
19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5|
20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3|
21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2|
22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3|
23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1|
24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6|
25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4|
26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0|
27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5|
28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6|
29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0|
30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6|
31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7|
32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2|
33 | 
34 | 
35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint:
36 | 
37 | ```shell
38 | $ CHECKPOINT_DIR=/tmp/checkpoints
39 | $ mkdir ${CHECKPOINT_DIR}
40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz
41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz
42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR}
43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz
44 | ```
45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md).
46 | 
47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
48 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/networks/slim_nets/mobilenet_v1.png


--------------------------------------------------------------------------------
/libs/networks/slim_nets/nets_factory.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a factory for building various models."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | import functools
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from nets import alexnet
 25 | from nets import cifarnet
 26 | from nets import inception
 27 | from nets import lenet
 28 | from nets import mobilenet_v1
 29 | from nets import overfeat
 30 | from nets import resnet_v1
 31 | from nets import resnet_v2
 32 | from nets import vgg
 33 | 
 34 | slim = tf.contrib.slim
 35 | 
 36 | networks_map = {'alexnet_v2': alexnet.alexnet_v2,
 37 |                 'cifarnet': cifarnet.cifarnet,
 38 |                 'overfeat': overfeat.overfeat,
 39 |                 'vgg_a': vgg.vgg_a,
 40 |                 'vgg_16': vgg.vgg_16,
 41 |                 'vgg_19': vgg.vgg_19,
 42 |                 'inception_v1': inception.inception_v1,
 43 |                 'inception_v2': inception.inception_v2,
 44 |                 'inception_v3': inception.inception_v3,
 45 |                 'inception_v4': inception.inception_v4,
 46 |                 'inception_resnet_v2': inception.inception_resnet_v2,
 47 |                 'lenet': lenet.lenet,
 48 |                 'resnet_v1_50': resnet_v1.resnet_v1_50,
 49 |                 'resnet_v1_101': resnet_v1.resnet_v1_101,
 50 |                 'resnet_v1_152': resnet_v1.resnet_v1_152,
 51 |                 'resnet_v1_200': resnet_v1.resnet_v1_200,
 52 |                 'resnet_v2_50': resnet_v2.resnet_v2_50,
 53 |                 'resnet_v2_101': resnet_v2.resnet_v2_101,
 54 |                 'resnet_v2_152': resnet_v2.resnet_v2_152,
 55 |                 'resnet_v2_200': resnet_v2.resnet_v2_200,
 56 |                 'mobilenet_v1': mobilenet_v1.mobilenet_v1,
 57 |                }
 58 | 
 59 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
 60 |                   'cifarnet': cifarnet.cifarnet_arg_scope,
 61 |                   'overfeat': overfeat.overfeat_arg_scope,
 62 |                   'vgg_a': vgg.vgg_arg_scope,
 63 |                   'vgg_16': vgg.vgg_arg_scope,
 64 |                   'vgg_19': vgg.vgg_arg_scope,
 65 |                   'inception_v1': inception.inception_v3_arg_scope,
 66 |                   'inception_v2': inception.inception_v3_arg_scope,
 67 |                   'inception_v3': inception.inception_v3_arg_scope,
 68 |                   'inception_v4': inception.inception_v4_arg_scope,
 69 |                   'inception_resnet_v2':
 70 |                   inception.inception_resnet_v2_arg_scope,
 71 |                   'lenet': lenet.lenet_arg_scope,
 72 |                   'resnet_v1_50': resnet_v1.resnet_arg_scope,
 73 |                   'resnet_v1_101': resnet_v1.resnet_arg_scope,
 74 |                   'resnet_v1_152': resnet_v1.resnet_arg_scope,
 75 |                   'resnet_v1_200': resnet_v1.resnet_arg_scope,
 76 |                   'resnet_v2_50': resnet_v2.resnet_arg_scope,
 77 |                   'resnet_v2_101': resnet_v2.resnet_arg_scope,
 78 |                   'resnet_v2_152': resnet_v2.resnet_arg_scope,
 79 |                   'resnet_v2_200': resnet_v2.resnet_arg_scope,
 80 |                   'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope,
 81 |                  }
 82 | 
 83 | 
 84 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
 85 |   """Returns a network_fn such as `logits, end_points = network_fn(images)`.
 86 | 
 87 |   Args:
 88 |     name: The name of the network.
 89 |     num_classes: The number of classes to use for classification.
 90 |     weight_decay: The l2 coefficient for the model weights.
 91 |     is_training: `True` if the model is being used for training and `False`
 92 |       otherwise.
 93 | 
 94 |   Returns:
 95 |     network_fn: A function that applies the model to a batch of images. It has
 96 |       the following signature:
 97 |         logits, end_points = network_fn(images)
 98 |   Raises:
 99 |     ValueError: If network `name` is not recognized.
100 |   """
101 |   if name not in networks_map:
102 |     raise ValueError('Name of network unknown %s' % name)
103 |   arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
104 |   func = networks_map[name]
105 |   @functools.wraps(func)
106 |   def network_fn(images):
107 |     with slim.arg_scope(arg_scope):
108 |       return func(images, num_classes, is_training=is_training)
109 |   if hasattr(func, 'default_image_size'):
110 |     network_fn.default_image_size = func.default_image_size
111 | 
112 |   return network_fn
113 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/nets_factory_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for slim.inception."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | import tensorflow as tf
23 | 
24 | from nets import nets_factory
25 | 
26 | slim = tf.contrib.slim
27 | 
28 | 
29 | class NetworksTest(tf.test.TestCase):
30 | 
31 |   def testGetNetworkFn(self):
32 |     batch_size = 5
33 |     num_classes = 1000
34 |     for net in nets_factory.networks_map:
35 |       with self.test_session():
36 |         net_fn = nets_factory.get_network_fn(net, num_classes)
37 |         # Most networks use 224 as their default_image_size
38 |         image_size = getattr(net_fn, 'default_image_size', 224)
39 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
40 |         logits, end_points = net_fn(inputs)
41 |         self.assertTrue(isinstance(logits, tf.Tensor))
42 |         self.assertTrue(isinstance(end_points, dict))
43 |         self.assertEqual(logits.get_shape().as_list()[0], batch_size)
44 |         self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
45 | 
46 |   def testGetNetworkFnArgScope(self):
47 |     batch_size = 5
48 |     num_classes = 10
49 |     net = 'cifarnet'
50 |     with self.test_session(use_gpu=True):
51 |       net_fn = nets_factory.get_network_fn(net, num_classes)
52 |       image_size = getattr(net_fn, 'default_image_size', 224)
53 |       with slim.arg_scope([slim.model_variable, slim.variable],
54 |                           device='/CPU:0'):
55 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
56 |         net_fn(inputs)
57 |       weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0]
58 |       self.assertDeviceEqual('/CPU:0', weights.device)
59 | 
60 | if __name__ == '__main__':
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/overfeat.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the model definition for the OverFeat network.
 16 | 
 17 | The definition for the network was obtained from:
 18 |   OverFeat: Integrated Recognition, Localization and Detection using
 19 |   Convolutional Networks
 20 |   Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 21 |   Yann LeCun, 2014
 22 |   http://arxiv.org/abs/1312.6229
 23 | 
 24 | Usage:
 25 |   with slim.arg_scope(overfeat.overfeat_arg_scope()):
 26 |     outputs, end_points = overfeat.overfeat(inputs)
 27 | 
 28 | @@overfeat
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import division
 32 | from __future__ import print_function
 33 | 
 34 | import tensorflow as tf
 35 | 
 36 | slim = tf.contrib.slim
 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 38 | 
 39 | 
 40 | def overfeat_arg_scope(weight_decay=0.0005):
 41 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 42 |                       activation_fn=tf.nn.relu,
 43 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
 44 |                       biases_initializer=tf.zeros_initializer()):
 45 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 46 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 47 |         return arg_sc
 48 | 
 49 | 
 50 | def overfeat(inputs,
 51 |              num_classes=1000,
 52 |              is_training=True,
 53 |              dropout_keep_prob=0.5,
 54 |              spatial_squeeze=True,
 55 |              scope='overfeat'):
 56 |   """Contains the model definition for the OverFeat network.
 57 | 
 58 |   The definition for the network was obtained from:
 59 |     OverFeat: Integrated Recognition, Localization and Detection using
 60 |     Convolutional Networks
 61 |     Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 62 |     Yann LeCun, 2014
 63 |     http://arxiv.org/abs/1312.6229
 64 | 
 65 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 66 |         To use in classification mode, resize input to 231x231. To use in fully
 67 |         convolutional mode, set spatial_squeeze to false.
 68 | 
 69 |   Args:
 70 |     inputs: a tensor of size [batch_size, height, width, channels].
 71 |     num_classes: number of predicted classes.
 72 |     is_training: whether or not the model is being trained.
 73 |     dropout_keep_prob: the probability that activations are kept in the dropout
 74 |       layers during training.
 75 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 76 |       outputs. Useful to remove unnecessary dimensions for classification.
 77 |     scope: Optional scope for the variables.
 78 | 
 79 |   Returns:
 80 |     the last op containing the log predictions and end_points dict.
 81 | 
 82 |   """
 83 |   with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
 84 |     end_points_collection = sc.name + '_end_points'
 85 |     # Collect outputs for conv2d, fully_connected and max_pool2d
 86 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
 87 |                         outputs_collections=end_points_collection):
 88 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 89 |                         scope='conv1')
 90 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
 91 |       net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
 92 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
 93 |       net = slim.conv2d(net, 512, [3, 3], scope='conv3')
 94 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
 95 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
 96 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
 97 |       with slim.arg_scope([slim.conv2d],
 98 |                           weights_initializer=trunc_normal(0.005),
 99 |                           biases_initializer=tf.constant_initializer(0.1)):
100 |         # Use conv2d instead of fully_connected layers.
101 |         net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
102 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
103 |                            scope='dropout6')
104 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
105 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
106 |                            scope='dropout7')
107 |         net = slim.conv2d(net, num_classes, [1, 1],
108 |                           activation_fn=None,
109 |                           normalizer_fn=None,
110 |                           biases_initializer=tf.zeros_initializer(),
111 |                           scope='fc8')
112 |       # Convert end_points_collection into a end_point dict.
113 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
114 |       if spatial_squeeze:
115 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
116 |         end_points[sc.name + '/fc8'] = net
117 |       return net, end_points
118 | overfeat.default_image_size = 231
119 | 


--------------------------------------------------------------------------------
/libs/val_libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/libs/val_libs/__init__.py


--------------------------------------------------------------------------------
/output/trained_weights/README.md:
--------------------------------------------------------------------------------
1 | Please download [trained model](https://github.com/DetectionTeamUCAS/Models/tree/master/FPN_Tensorflow) by this project, then put it here.


--------------------------------------------------------------------------------
/scalars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/scalars.png


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionTeamUCAS/Cascade_FPN_Tensorflow/796dab177cb9d06038c7b4b27c35d704d890a1e8/tools/__init__.py


--------------------------------------------------------------------------------
/tools/cocoval.py:
--------------------------------------------------------------------------------
 1 | from data.lib_coco.PythonAPI.pycocotools.coco import COCO
 2 | from data.lib_coco.PythonAPI.pycocotools.cocoeval import COCOeval
 3 | 
 4 | 
 5 | def cocoval(detected_json, eval_json):
 6 |     eval_gt = COCO(eval_json)
 7 | 
 8 |     eval_dt = eval_gt.loadRes(detected_json)
 9 |     cocoEval = COCOeval(eval_gt, eval_dt, iouType='bbox')
10 | 
11 |     # cocoEval.params.imgIds = eval_gt.getImgIds()
12 |     cocoEval.evaluate()
13 |     cocoEval.accumulate()
14 |     cocoEval.summarize()
15 | 
16 | 
17 | detected_json = '/home/yangxue/isilon/yangxue/code/ADAS/output/yangxue/fpn/fpn.res50.coco.roialign.2x.detectron.new.concat/eval_dump/epoch-2.coco'
18 | eval_gt = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/instances_minival2014.json'
19 | cocoval(detected_json, eval_gt)


--------------------------------------------------------------------------------
/tools/inference.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import print_function
  5 | from __future__ import division
  6 | 
  7 | import os, sys
  8 | import tensorflow as tf
  9 | import time
 10 | import cv2
 11 | import argparse
 12 | import numpy as np
 13 | sys.path.append("../")
 14 | 
 15 | from data.io.image_preprocess import short_side_resize_for_inference_data
 16 | from libs.configs import cfgs
 17 | from libs.networks import build_whole_network
 18 | from libs.box_utils import draw_box_in_img
 19 | from help_utils import tools
 20 | 
 21 | 
 22 | def detect(det_net, inference_save_path, real_test_imgname_list):
 23 | 
 24 |     # 1. preprocess img
 25 |     img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not GBR
 26 |     img_batch = tf.cast(img_plac, tf.float32)
 27 |     img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
 28 |                                                      target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
 29 |                                                      length_limitation=cfgs.IMG_MAX_LENGTH)
 30 | 
 31 |     if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
 32 |         img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
 33 |     else:
 34 |         img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
 35 |     img_batch = tf.expand_dims(img_batch, axis=0)  # [1, None, None, 3]
 36 | 
 37 |     detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
 38 |         input_img_batch=img_batch,
 39 |         gtboxes_batch=None)
 40 | 
 41 |     init_op = tf.group(
 42 |         tf.global_variables_initializer(),
 43 |         tf.local_variables_initializer()
 44 |     )
 45 | 
 46 |     restorer, restore_ckpt = det_net.get_restorer()
 47 | 
 48 |     config = tf.ConfigProto()
 49 |     config.gpu_options.allow_growth = True
 50 | 
 51 |     with tf.Session(config=config) as sess:
 52 |         sess.run(init_op)
 53 |         if not restorer is None:
 54 |             restorer.restore(sess, restore_ckpt)
 55 |             print('restore model')
 56 | 
 57 |         for i, a_img_name in enumerate(real_test_imgname_list):
 58 | 
 59 |             raw_img = cv2.imread(a_img_name)
 60 |             start = time.time()
 61 |             resized_img, detected_boxes, detected_scores, detected_categories = \
 62 |                 sess.run(
 63 |                     [img_batch, detection_boxes, detection_scores, detection_category],
 64 |                     feed_dict={img_plac: raw_img[:, :, ::-1]}  # cv is BGR. But need RGB
 65 |                 )
 66 |             end = time.time()
 67 |             # print("{} cost time : {} ".format(img_name, (end - start)))
 68 | 
 69 |             show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
 70 |             show_scores = detected_scores[show_indices]
 71 |             show_boxes = detected_boxes[show_indices]
 72 |             show_categories = detected_categories[show_indices]
 73 | 
 74 |             draw_img = np.squeeze(resized_img, 0)
 75 | 
 76 |             if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
 77 |                 draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255
 78 |             else:
 79 |                 draw_img = draw_img + np.array(cfgs.PIXEL_MEAN)
 80 |             final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img,
 81 |                                                                                 boxes=show_boxes,
 82 |                                                                                 labels=show_categories,
 83 |                                                                                 scores=show_scores,
 84 |                                                                                 in_graph=False)
 85 |             nake_name = a_img_name.split('/')[-1]
 86 |             # print (inference_save_path + '/' + nake_name)
 87 |             cv2.imwrite(inference_save_path + '/' + nake_name,
 88 |                         final_detections[:, :, ::-1])
 89 | 
 90 |             tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
 91 | 
 92 | 
 93 | def inference(test_dir, inference_save_path):
 94 | 
 95 |     test_imgname_list = [os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir)
 96 |                                                           if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))]
 97 |     assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \
 98 |                                         ' Note that, we only support img format of (.jpg, .png, and .tiff) '
 99 | 
100 |     faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
101 |                                                        is_training=False)
102 |     detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list)
103 | 
104 | 
105 | def parse_args():
106 |     """
107 |     Parse input arguments
108 |     """
109 |     parser = argparse.ArgumentParser(description='TestImgs...U need provide the test dir')
110 |     parser.add_argument('--data_dir', dest='data_dir',
111 |                         help='data path',
112 |                         default='demos', type=str)
113 |     parser.add_argument('--save_dir', dest='save_dir',
114 |                         help='demo imgs to save',
115 |                         default='inference_results', type=str)
116 |     parser.add_argument('--GPU', dest='GPU',
117 |                         help='gpu id ',
118 |                         default='0', type=str)
119 | 
120 |     if len(sys.argv) == 1:
121 |         parser.print_help()
122 |         sys.exit(1)
123 | 
124 |     args = parser.parse_args()
125 | 
126 |     return args
127 | 
128 | 
129 | if __name__ == '__main__':
130 | 
131 |     args = parse_args()
132 |     print('Called with args:')
133 |     print(args)
134 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU
135 |     inference(args.data_dir,
136 |               inference_save_path=args.save_dir)
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 


--------------------------------------------------------------------------------