├── .DS_Store ├── .gitignore ├── CSL_DCL.png ├── LICENSE ├── README.md ├── data ├── .DS_Store ├── __init__.py ├── io │ ├── .DS_Store │ ├── DOTA │ │ ├── data_crop.py │ │ └── val_set.txt │ ├── FDDB │ │ ├── fddb.py │ │ └── txt2xml.py │ ├── HRSC2016 │ │ ├── __init__.py │ │ └── make_test_xml.py │ ├── ICDAR2015 │ │ └── txt2xml.py │ ├── MLT │ │ ├── filter_box.py │ │ └── txt2xml.py │ ├── OHD-SJTU │ │ ├── .DS_Store │ │ ├── data_crop.py │ │ ├── divide_data.py │ │ ├── filter_dota.py │ │ ├── ohd-sjtu-all-testset.txt │ │ ├── ohd-sjtu-testset.txt │ │ └── vis_op.py │ ├── SSDD++ │ │ └── divide_data.py │ ├── UCAS-AOD │ │ ├── split_data.py │ │ └── txt2xml.py │ ├── __init__.py │ ├── convert_data_to_tfrecord.py │ ├── image_preprocess.py │ ├── image_preprocess_multi_gpu.py │ ├── image_preprocess_multi_gpu_ohdet.py │ ├── read_tfrecord.py │ ├── read_tfrecord_multi_gpu.py │ └── read_tfrecord_multi_gpu_ohdet.py └── pretrained_weights │ ├── README.md │ ├── efficientnet │ ├── README.md │ ├── noisy-student-efficientnet-b1 │ │ └── results.txt │ └── noisy_student_efficientnet-b0 │ │ └── results.txt │ └── mobilenet │ └── README.md ├── demo1.png ├── eval_devkit ├── OHD_SJTU_evaluation_OBB.py ├── OHD_SJTU_evaluation_OHD.py ├── dota_evaluation_task1.py ├── poly_nms_gpu │ ├── Makefile │ ├── __init__.py │ ├── nms_wrapper.py │ ├── poly_nms.hpp │ ├── poly_nms.pyx │ ├── poly_nms_kernel.cu │ ├── poly_nms_test.py │ ├── poly_overlaps.hpp │ ├── poly_overlaps.pyx │ ├── poly_overlaps_kernel.cu │ └── setup.py ├── polyiou.cpp ├── polyiou.h ├── polyiou.i ├── polyiou.py ├── readme.md └── setup.py ├── feature_vis.png ├── help_utils ├── __init__.py ├── densely_coded_label.py ├── smooth_label.py └── tools.py ├── images.png ├── libs ├── .DS_Store ├── __init__.py ├── box_utils │ ├── __init__.py │ ├── anchor_utils.py │ ├── bbox_transform.py │ ├── boxes_utils.py │ ├── coordinate_convert.py │ ├── cython_utils │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── nms.c │ │ ├── nms.pyx │ │ └── setup.py │ ├── draw_box_in_img.py │ ├── generate_anchors.py │ ├── generate_rotate_anchors.py │ ├── iou.py │ ├── iou_cpu.cpp │ ├── iou_cpu.pyx │ ├── iou_rotate.py │ ├── mask_utils.py │ ├── nms_rotate.py │ ├── rbbox_overlaps.cpp │ ├── rbbox_overlaps.hpp │ ├── rbbox_overlaps.pyx │ ├── rbbox_overlaps_kernel.cu │ ├── rotate_anchors.jpg │ ├── rotate_gpu_nms.hpp │ ├── rotate_polygon_nms.cpp │ ├── rotate_polygon_nms.hpp │ ├── rotate_polygon_nms.pyx │ ├── rotate_polygon_nms_kernel.cu │ ├── setup.py │ ├── show_box_in_tensor.py │ └── tf_ops.py ├── configs │ ├── DOTA1.0 │ │ ├── __init__.py │ │ ├── baseline │ │ │ ├── __init__.py │ │ │ ├── cfgs_res50_dota_v15.py │ │ │ ├── cfgs_res50_dota_v4.py │ │ │ └── cfgs_res50_dota_win_v19.py │ │ ├── csl │ │ │ ├── cfgs_res50_dota_v1.py │ │ │ ├── cfgs_res50_dota_v41.py │ │ │ └── cfgs_res50_dota_v45.py │ │ ├── dcl │ │ │ ├── __init__.py │ │ │ ├── cfgs_res101_dota_dcl_v1.py │ │ │ ├── cfgs_res152_dota_dcl_v1.py │ │ │ ├── cfgs_res50_dota_dcl_v10.py │ │ │ └── cfgs_res50_dota_dcl_v5.py │ │ ├── dota_train │ │ │ ├── cfgs_res50_dotatrain_baseline_v1.py │ │ │ ├── cfgs_res50_dotatrain_baseline_v2.py │ │ │ ├── cfgs_res50_dotatrain_csl_v1.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v1.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v10.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v11.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v12.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v13.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v14.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v15.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v16.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v17.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v18.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v2.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v3.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v4.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v5.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v6.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v7.py │ │ │ ├── cfgs_res50_dotatrain_dcl_v8.py │ │ │ └── cfgs_res50_dotatrain_dcl_v9.py │ │ └── r3det_dcl │ │ │ ├── cfgs_res152_dota_r3det_dcl_v1.py │ │ │ ├── cfgs_res50_dota_r3det_dcl_v1.py │ │ │ ├── cfgs_res50_dota_refine_dcl_v1.py │ │ │ └── cfgs_res50_dota_refine_dcl_v2.py │ ├── HRSC2016 │ │ ├── __init__.py │ │ ├── dcl │ │ │ └── cfgs_res101_hrsc2016_dcl_v1.py │ │ └── r3det_dcl │ │ │ ├── __init__.py │ │ │ ├── cfgs_res101_hrsc2016_r3det_dcl_v1.py │ │ │ ├── cfgs_res101_hrsc2016_r3det_dcl_v2.py │ │ │ └── cfgs_res152_hrsc2016_r3det_dcl_v1.py │ ├── ICDAR2015 │ │ ├── baseline │ │ │ ├── cfgs_res101_icdar2015_baseline_v2.py │ │ │ └── cfgs_res50_icdar2015_baseline_v1.py │ │ ├── csl │ │ │ └── cfgs_res101_icdar2015_csl_v1.py │ │ └── dcl │ │ │ ├── cfgs_res101_icdar2015_dcl_v4.py │ │ │ ├── cfgs_res50_icdar2015_dcl_v1.py │ │ │ ├── cfgs_res50_icdar2015_dcl_v2.py │ │ │ └── cfgs_res50_icdar2015_dcl_v3.py │ ├── MLT │ │ ├── baseline │ │ │ └── cfgs_res101_icdar2015_baseline_v1.py │ │ ├── csl │ │ │ └── cfgs_res101_mlt_csl_v1.py │ │ └── dcl │ │ │ └── cfgs_res101_mlt_dcl_v1.py │ ├── OHD-SJTU │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_csl_ohdet_v1.py │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_csl_ohdet_v2.py │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v1.py │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v2.py │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v3.py │ │ ├── cfgs_res101_ohd-sjtu-all_r3det_v1.py │ │ ├── cfgs_res101_ohd-sjtu-all_v1.py │ │ ├── cfgs_res101_ohd-sjtu-all_v2.py │ │ ├── cfgs_res101_ohd-sjtu_gwd_v1.py │ │ ├── cfgs_res101_ohd-sjtu_r3det_csl_ohdet_v1.py │ │ ├── cfgs_res101_ohd-sjtu_r3det_csl_ohdet_v2.py │ │ ├── cfgs_res101_ohd-sjtu_r3det_csl_v1.py │ │ ├── cfgs_res101_ohd-sjtu_r3det_v1.py │ │ ├── cfgs_res101_ohd-sjtu_r3det_v2.py │ │ ├── cfgs_res101_ohd-sjtu_v1.py │ │ ├── cfgs_res101_ohd-sjtu_v2.py │ │ └── dcl │ │ │ └── cfgs_res101_ohd-sjtu-all_dcl_v1.py │ ├── SSDD++ │ │ ├── baseline │ │ │ └── cfgs_res101_ssdd++_baseline_v1.py │ │ └── dcl │ │ │ ├── cfgs_res101_ssdd++_dcl_v1.py │ │ │ └── cfgs_res101_ssdd++_dcl_v2.py │ ├── UCAS-AOD │ │ ├── baseline │ │ │ └── cfgs_res101_ucas-aod_baseline_v1.py │ │ ├── csl │ │ │ └── cfgs_res101_ucas-aod_csl_v1.py │ │ ├── dcl │ │ │ ├── cfgs_res101_ucas-aod_dcl_v1.py │ │ │ └── cfgs_res50_ucas-aod_dcl_v1.py │ │ └── r3det_dcl │ │ │ ├── __init__.py │ │ │ └── cfgs_res152_ucas-aod_r3det_dcl_v1.py │ ├── __init__.py │ └── cfgs.py ├── detection_oprations │ ├── __init__.py │ ├── anchor_target_layer_without_boxweight.py │ ├── anchor_target_layer_without_boxweight_.py │ ├── anchor_target_layer_without_boxweight_csl.py │ ├── anchor_target_layer_without_boxweight_dcl.py │ ├── anchor_target_layer_without_boxweight_dcl_batch.py │ ├── anchor_target_layer_without_boxweight_win.py │ ├── proposal_opr.py │ ├── proposal_opr_.py │ ├── proposal_opr_csl.py │ ├── proposal_opr_csl_tsne.py │ ├── proposal_opr_dcl.py │ ├── proposal_opr_dcl_tsne.py │ ├── proposal_opr_win.py │ ├── refine_proposal_opr_dcl.py │ ├── refine_proposal_opr_dcl_.py │ └── refinebox_target_layer_without_boxweight_dcl.py ├── label_name_dict │ ├── __init__.py │ ├── coco_dict.py │ └── label_dict.py ├── losses │ ├── __init__.py │ ├── losses.py │ ├── losses_dcl.py │ └── losses_win.py ├── networks │ ├── __init__.py │ ├── build_whole_network.py │ ├── build_whole_network_csl.py │ ├── build_whole_network_csl_tsne.py │ ├── build_whole_network_dcl.py │ ├── build_whole_network_dcl_batch.py │ ├── build_whole_network_dcl_tsne.py │ ├── build_whole_network_r3det_dcl.py │ ├── build_whole_network_refine_dcl.py │ ├── build_whole_network_win.py │ ├── efficientnet │ │ ├── __init__.py │ │ ├── condconv │ │ │ ├── condconv_layers.py │ │ │ └── efficientnet_condconv_builder.py │ │ ├── demo.py │ │ ├── efficientnet_builder.py │ │ ├── efficientnet_lite_builder.py │ │ ├── efficientnet_model.py │ │ ├── panda.jpg │ │ ├── test.py │ │ └── utils.py │ ├── mobilenet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conv_blocks.py │ │ ├── mobilenet.py │ │ ├── mobilenet_v2.py │ │ └── mobilenet_v2_test.py │ ├── mobilenet_v2.py │ ├── opts.py │ ├── resnet.py │ ├── resnet_gluoncv.py │ ├── slim_nets │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── alexnet_test.py │ │ ├── cifarnet.py │ │ ├── inception.py │ │ ├── inception_resnet_v2.py │ │ ├── inception_resnet_v2_test.py │ │ ├── inception_utils.py │ │ ├── inception_v1.py │ │ ├── inception_v1_test.py │ │ ├── inception_v2.py │ │ ├── inception_v2_test.py │ │ ├── inception_v3.py │ │ ├── inception_v3_test.py │ │ ├── inception_v4.py │ │ ├── inception_v4_test.py │ │ ├── lenet.py │ │ ├── mobilenet_v1.md │ │ ├── mobilenet_v1.png │ │ ├── mobilenet_v1.py │ │ ├── mobilenet_v1_test.py │ │ ├── nets_factory.py │ │ ├── nets_factory_test.py │ │ ├── overfeat.py │ │ ├── overfeat_test.py │ │ ├── resnet_utils.py │ │ ├── resnet_v1.py │ │ ├── resnet_v1_test.py │ │ ├── resnet_v2.py │ │ ├── resnet_v2_test.py │ │ ├── vgg.py │ │ └── vgg_test.py │ ├── xception.bak.py │ └── xception.py ├── setup.py └── val_libs │ ├── __init__.py │ ├── voc_eval.py │ └── voc_eval_r.py ├── output ├── .DS_Store └── trained_weights │ ├── .DS_Store │ └── README.md ├── pipeline.png ├── scalars.png ├── tools ├── .DS_Store ├── __init__.py ├── inference.py ├── multi_gpu_train.py ├── multi_gpu_train_csl.py ├── multi_gpu_train_dcl.py ├── multi_gpu_train_dcl_batch.py ├── multi_gpu_train_r3det_dcl.py ├── multi_gpu_train_refine_dcl.py ├── multi_gpu_train_win.py ├── test_dota_csl_ms.py ├── test_dota_dcl_ms.py ├── test_dota_ms.py ├── test_dota_r3det_dcl_ms.py ├── test_dota_refine_dcl_ms.py ├── test_hrsc2016.py ├── test_hrsc2016_dcl.py ├── test_hrsc2016_r3det_dcl_ms.py ├── test_icdar2015_csl_ms.py ├── test_icdar2015_dcl_ms.py ├── test_icdar2015_ms.py ├── test_mlt_csl_ms.py ├── test_mlt_dcl_ms.py ├── test_mlt_ms.py ├── test_ohd-sjtu_dcl_ms.py ├── test_ohd-sjtu_ms.py ├── test_ssdd_dcl_ms.py ├── test_ssdd_ms.py ├── test_ucas-aod_csl_ms.py ├── test_ucas-aod_dcl_ms.py ├── test_ucas-aod_ms.py └── test_ucas-aod_r3det_dcl_ms.py └── tsne ├── feature_extract_csl.py ├── feature_extract_dcl.py ├── tsne.py └── tsv_radius.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .idea/ 104 | .pyc 105 | .so 106 | *.data-00000-of-00001 107 | *.index 108 | *.meta 109 | events.* 110 | checkpoint 111 | __pycache__/ 112 | *.json 113 | *.zip 114 | *.pb 115 | *.pbtxt 116 | *.tflite 117 | *info.txt 118 | *events.out.tfevents* 119 | *.ckpt 120 | *.pb 121 | *.tfrecord* 122 | *plusplus_.py* 123 | *flops_param* 124 | *r3det_loss** 125 | *refine_feature_multi_stage* 126 | *mining_loss* 127 | *ablation_study* 128 | *tmp* 129 | *head* 130 | *.mp4* 131 | *.MP4* 132 | *inld* 133 | 134 | demo 135 | tools/demo.py 136 | tools/demos/* 137 | tools/test_dota/* 138 | tools/test_icdar2015/* 139 | tools/test_ohd_sjtu/* 140 | output/summary/* 141 | data/tfrecord/* 142 | -------------------------------------------------------------------------------- /CSL_DCL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/CSL_DCL.png -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/.DS_Store -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/__init__.py -------------------------------------------------------------------------------- /data/io/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/.DS_Store -------------------------------------------------------------------------------- /data/io/HRSC2016/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/HRSC2016/__init__.py -------------------------------------------------------------------------------- /data/io/MLT/filter_box.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | filter_thres = 0.45 5 | res_files = os.listdir('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007') 6 | filter_res_path = '../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres) 7 | 8 | if not os.path.exists('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres)): 9 | os.makedirs('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres)) 10 | 11 | for rf in res_files: 12 | fr = open('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007/{}'.format(rf), 'r') 13 | fw = open('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}/{}'.format(filter_thres, rf), 'w') 14 | lines = fr.readlines() 15 | for line in lines: 16 | if float(line.split(',')[-1].split('\n')[0])>filter_thres: 17 | fw.write(line) 18 | fr.close() 19 | fw.close() -------------------------------------------------------------------------------- /data/io/OHD-SJTU/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/OHD-SJTU/.DS_Store -------------------------------------------------------------------------------- /data/io/OHD-SJTU/divide_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import shutil 6 | import os 7 | import random 8 | import math 9 | 10 | 11 | def mkdir(path): 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | 16 | divide_rate = 0.7 17 | 18 | root_path = '/data/yangxue/dataset/OHD-SJTU' 19 | 20 | image_path = root_path + '/all_data/images' 21 | rotation_txt_path = root_path + '/all_data/rotation_txt' 22 | polygon_txt_path = root_path + '/all_data/polygon_txt' 23 | 24 | image_list = os.listdir(image_path) 25 | 26 | image_name = [n.split('.')[0] for n in image_list] 27 | 28 | random.shuffle(image_name) 29 | 30 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)] 31 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):] 32 | 33 | image_output_train = os.path.join(root_path, 'trainval/images') 34 | mkdir(image_output_train) 35 | image_output_test = os.path.join(root_path, 'test/images') 36 | mkdir(image_output_test) 37 | 38 | polygon_txt_output_train = os.path.join(root_path, 'trainval/polygon_txt') 39 | mkdir(polygon_txt_output_train) 40 | polygon_txt_output_test = os.path.join(root_path, 'test/polygon_txt') 41 | mkdir(polygon_txt_output_test) 42 | 43 | rotation_txt_output_train = os.path.join(root_path, 'trainval/rotation_txt') 44 | mkdir(rotation_txt_output_train) 45 | rotation_txt_output_test = os.path.join(root_path, 'test/rotation_txt') 46 | mkdir(rotation_txt_output_test) 47 | 48 | 49 | count = 0 50 | for i in train_image: 51 | shutil.copy(os.path.join(image_path, i + '.jpg'), os.path.join(image_output_train, 'P{}.jpg'.format(count))) 52 | shutil.copy(os.path.join(polygon_txt_path, i + '.txt'), os.path.join(polygon_txt_output_train, 'P{}.txt'.format(count))) 53 | shutil.copy(os.path.join(rotation_txt_path, i + '.txt'), os.path.join(rotation_txt_output_train, 'P{}.txt'.format(count))) 54 | if count % 10 == 0: 55 | print("process step {}".format(count)) 56 | count += 1 57 | 58 | for i in test_image: 59 | shutil.copy(os.path.join(image_path, i + '.jpg'), os.path.join(image_output_test, 'P{}.jpg'.format(count))) 60 | shutil.copy(os.path.join(polygon_txt_path, i + '.txt'), os.path.join(polygon_txt_output_test, 'P{}.txt'.format(count))) 61 | shutil.copy(os.path.join(rotation_txt_path, i + '.txt'), os.path.join(rotation_txt_output_test, 'P{}.txt'.format(count))) 62 | if count % 10 == 0: 63 | print("process step {}".format(count)) 64 | count += 1 65 | 66 | -------------------------------------------------------------------------------- /data/io/OHD-SJTU/filter_dota.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | image_dir = '/data/yangxue/dataset/DOTA/val/images/images' 5 | txt_dir = '/data/yangxue/dataset/DOTA/val/labelTxt/labelTxt' 6 | 7 | save_image_dir = '/data/yangxue/dataset/OHD-SJTU-LARGE/test/images' 8 | save_txt_dir = '/data/yangxue/dataset/OHD-SJTU-LARGE/test/rotation_txt' 9 | 10 | 11 | class_list = ['plane', 'small-vehicle', 'large-vehicle', 'ship', 'harbor', 'helicopter'] 12 | 13 | all_txt = os.listdir(txt_dir) 14 | 15 | for t in all_txt: 16 | fr = open(os.path.join(txt_dir, t), 'r') 17 | lines = fr.readlines() 18 | fw = open(os.path.join(save_txt_dir, t), 'w') 19 | cnt = 0 20 | for line in lines: 21 | if len(line.split(' ')) < 9: 22 | continue 23 | 24 | label = line.split(' ')[8] 25 | if label not in class_list: 26 | continue 27 | 28 | box = [int(xy) for xy in line.split(' ')[:8]] 29 | 30 | difficult = line.split(' ')[-1] 31 | 32 | new_line = '{} {} {} {} {} {} {} {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], 33 | box[4], box[5], box[6], box[7], 34 | (box[0] + box[2]) // 2, 35 | (box[1] + box[3]) // 2, 36 | label, difficult) 37 | fw.write(new_line) 38 | cnt += 1 39 | fw.close() 40 | fr.close() 41 | 42 | if cnt == 0: 43 | os.remove(os.path.join(save_txt_dir, t)) 44 | else: 45 | shutil.copy(os.path.join(image_dir, t.replace('.txt', '.png')), os.path.join(save_image_dir, t.replace('.txt', '.jpg'))) -------------------------------------------------------------------------------- /data/io/OHD-SJTU/ohd-sjtu-all-testset.txt: -------------------------------------------------------------------------------- 1 | P1529 2 | P0964 3 | P1007 4 | P40 5 | P1598 6 | P1122 7 | P0970 8 | P0019 9 | P0249 10 | P0060 11 | P0199 12 | P0962 13 | P0998 14 | P0168 15 | P2059 16 | P0764 17 | P1266 18 | P2068 19 | P1090 20 | P1604 21 | P2789 22 | P1095 23 | P0564 24 | P0623 25 | P2587 26 | P2429 27 | P0189 28 | P2050 29 | P1940 30 | P1384 31 | P1738 32 | P2608 33 | P0837 34 | P2582 35 | P1283 36 | P0486 37 | P1091 38 | P2126 39 | P0559 40 | P2598 41 | P1212 42 | P0348 43 | P0929 44 | P1178 45 | P0261 46 | P2155 47 | P1398 48 | P1904 49 | P0382 50 | P1982 51 | P0550 52 | P2097 53 | P1219 54 | P1957 55 | P1390 56 | P0420 57 | P1512 58 | P0841 59 | P0712 60 | P0056 61 | P0414 62 | P1742 63 | P0795 64 | P0342 65 | P0003 66 | P1278 67 | P0329 68 | P1257 69 | P0007 70 | P2181 71 | P1825 72 | P2111 73 | P2625 74 | P1478 75 | P0454 76 | P1143 77 | P0004 78 | P2124 79 | P0704 80 | P1128 81 | P39 82 | P0796 83 | P0660 84 | P1133 85 | P1315 86 | P0683 87 | P2197 88 | P1088 89 | P0989 90 | P0725 91 | P1860 92 | P1518 93 | P0117 94 | P0128 95 | P1386 96 | P1376 97 | P2198 98 | P0969 99 | P2689 100 | P2678 101 | P1023 102 | P0763 103 | P1099 104 | P2236 105 | P1138 106 | P0259 107 | P1137 108 | P1838 109 | P1492 110 | P0961 111 | P1268 112 | P1179 113 | P0476 114 | P0524 115 | P0968 116 | P2082 117 | P0300 118 | P1147 119 | P1051 120 | P1275 121 | P2802 122 | P2027 123 | P0990 124 | P0237 125 | P0647 126 | P2285 127 | P0932 128 | P0816 129 | P0170 130 | P0590 131 | P0557 132 | P1878 133 | P2630 134 | P0547 135 | P1101 136 | P0262 137 | P2218 138 | P1513 139 | P1065 140 | P2239 141 | P0086 142 | P0974 143 | P2617 144 | P1672 145 | P31 146 | P1473 147 | P0173 148 | P1410 149 | P0305 150 | P2610 151 | P0801 152 | P1880 153 | P1156 154 | P2645 155 | P2771 156 | P1601 157 | P0949 158 | P1269 159 | P0684 160 | P1983 161 | P1332 162 | P2781 163 | P1809 164 | P1995 165 | P1134 166 | P2310 167 | P0309 168 | P0347 169 | P1541 170 | P2701 171 | P0385 172 | P2754 173 | P0936 174 | P0613 175 | P0526 176 | P0411 177 | P1476 178 | P34 179 | P0518 180 | P1234 181 | P0665 182 | P37 183 | P0053 184 | P1314 185 | P1397 186 | P0027 187 | P0336 188 | P0352 189 | P1508 190 | P0706 191 | P1452 192 | P0179 193 | P2331 194 | P2599 195 | P0217 196 | P1911 197 | P1066 198 | P1029 199 | P1787 200 | P0643 201 | P36 202 | P0577 203 | P1992 204 | P1474 205 | P2322 206 | P2042 207 | P0206 208 | P38 209 | P0761 210 | P1610 211 | P2271 212 | P1184 213 | P0353 214 | P42 215 | P1005 216 | P0368 217 | P1950 218 | P1903 219 | P0904 220 | P0110 221 | P0787 222 | P1014 223 | P0715 224 | P0791 225 | P2093 226 | P1075 227 | P0161 228 | P1021 229 | P0833 230 | P1342 231 | P0882 232 | P1189 233 | P2242 234 | P1924 235 | P35 236 | P2726 237 | P1623 238 | P32 239 | P0910 240 | P0589 241 | P2709 242 | P0858 243 | P0615 244 | P0374 245 | P0789 246 | P2791 247 | P2166 248 | P2230 249 | P0977 250 | P0375 251 | P2220 252 | P0525 253 | P1958 254 | P1973 255 | P1225 256 | P0945 257 | P1030 258 | P2733 259 | P2779 260 | P2214 261 | P2286 262 | P0583 263 | P1356 264 | P1246 265 | P1213 266 | P0814 267 | P41 268 | P33 269 | P1434 270 | P2294 271 | P2215 272 | P0579 273 | P2003 274 | P1806 275 | P1566 276 | P1149 277 | P0729 278 | P1009 279 | P1022 280 | P0749 281 | P1960 282 | P1483 283 | P1786 284 | P1770 285 | P0887 286 | P2758 287 | P1829 288 | P1749 289 | P0457 290 | P1103 291 | P2778 292 | P2766 293 | P0551 294 | P0654 295 | P1877 296 | P1978 297 | P0622 298 | P0432 299 | P1905 300 | P1373 301 | P2570 302 | P0897 303 | P0604 304 | P0839 305 | P0650 306 | P0213 307 | P2231 308 | P30 309 | P0558 310 | P1154 311 | P0175 312 | P0246 313 | P0331 314 | P0543 315 | P0123 316 | P1854 317 | P1230 318 | P1273 319 | P0104 320 | P0953 321 | P0799 322 | P1242 323 | P2241 324 | P2794 325 | P0257 326 | P1821 327 | P0864 328 | P0194 329 | P0838 330 | P2088 331 | P1429 332 | P1583 333 | P1888 334 | P0696 335 | P0499 336 | -------------------------------------------------------------------------------- /data/io/OHD-SJTU/ohd-sjtu-testset.txt: -------------------------------------------------------------------------------- 1 | P34 2 | P32 3 | P30 4 | P42 5 | P40 6 | P31 7 | P35 8 | P33 9 | P41 10 | P38 11 | P37 12 | P39 13 | P36 14 | -------------------------------------------------------------------------------- /data/io/OHD-SJTU/vis_op.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | img_path = '/data/yangxue/dataset/OHD-SJTU/all_data/images' 7 | images_vis_path = '/data/yangxue/dataset/OHD-SJTU/all_data/images_vis' 8 | txt_path = '/data/yangxue/dataset/OHD-SJTU/all_data/polygon_txt' 9 | 10 | 11 | all_txt = os.listdir(txt_path) 12 | 13 | 14 | for t in all_txt: 15 | 16 | img = cv2.imread(os.path.join(img_path, t.replace('txt', 'jpg'))) 17 | fr = open(os.path.join(txt_path, t), 'r') 18 | data = fr.readlines() 19 | fr.close() 20 | print(len(data)) 21 | 22 | for d in data: 23 | dd = [int(float(xy)) for xy in d.split(' ')[:-1]] 24 | 25 | if d.split(' ')[-1] == 'ship\n': 26 | color = (0, 255, 0) 27 | else: 28 | color = (255, 0, 0) 29 | dd_ = np.array(dd).reshape(-1, 2) 30 | cv2.polylines(img, [dd_], thickness=3, color=color, isClosed=True) 31 | cv2.line(img, (dd[0], dd[1]), (dd[0], dd[1]), thickness=10, color=(0, 0, 255)) 32 | cv2.imwrite(os.path.join(images_vis_path, t.replace('txt', 'jpg')), img) 33 | -------------------------------------------------------------------------------- /data/io/SSDD++/divide_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import shutil 6 | import os 7 | import random 8 | import math 9 | 10 | 11 | def mkdir(path): 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | 16 | divide_rate = 0.8 17 | 18 | root_path = '/data2/yangxue/dataset' 19 | 20 | image_path = root_path + '/SSDD++/JPEGImages' 21 | xml_path = root_path + '/SSDD++/Annotations' 22 | 23 | image_list = os.listdir(image_path) 24 | 25 | image_name = [n.split('.')[0] for n in image_list] 26 | 27 | random.shuffle(image_name) 28 | 29 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)] 30 | print('train image number:', len(train_image)) 31 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):] 32 | print('test image number:', len(test_image)) 33 | 34 | image_output_train = os.path.join(root_path, 'SSDD++/train/JPEGImages') 35 | mkdir(image_output_train) 36 | image_output_test = os.path.join(root_path, 'SSDD++/test/JPEGImages') 37 | mkdir(image_output_test) 38 | 39 | xml_train = os.path.join(root_path, 'SSDD++/train/Annotations') 40 | mkdir(xml_train) 41 | xml_test = os.path.join(root_path, 'SSDD++/test/Annotations') 42 | mkdir(xml_test) 43 | 44 | 45 | count = 0 46 | for i in train_image: 47 | shutil.copy(os.path.join(image_path, i + '.jpg'), image_output_train) 48 | shutil.copy(os.path.join(xml_path, i + '.xml'), xml_train) 49 | if count % 1000 == 0: 50 | print("process step {}".format(count)) 51 | count += 1 52 | 53 | for i in test_image: 54 | shutil.copy(os.path.join(image_path, i + '.jpg'), image_output_test) 55 | shutil.copy(os.path.join(xml_path, i + '.xml'), xml_test) 56 | if count % 1000 == 0: 57 | print("process step {}".format(count)) 58 | count += 1 59 | -------------------------------------------------------------------------------- /data/io/UCAS-AOD/split_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import sys 4 | sys.path.append('../../') 5 | import shutil 6 | import os 7 | import random 8 | import math 9 | 10 | 11 | def mkdir(path): 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | 16 | divide_rate = 0.7351 17 | 18 | root_path = '/data/dataset/UCAS-AOD' 19 | 20 | image_path = root_path + '/images' 21 | xml_path = root_path + '/label-xml' 22 | 23 | image_list = os.listdir(image_path) 24 | 25 | image_name = [n.split('.')[0] for n in image_list] 26 | 27 | random.shuffle(image_name) 28 | 29 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)] 30 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):] 31 | 32 | image_output_train = os.path.join(root_path, 'VOCdevkit_train/JPEGImages') 33 | mkdir(image_output_train) 34 | image_output_test = os.path.join(root_path, 'VOCdevkit_test/JPEGImages') 35 | mkdir(image_output_test) 36 | 37 | xml_train = os.path.join(root_path, 'VOCdevkit_train/Annotations') 38 | mkdir(xml_train) 39 | xml_test = os.path.join(root_path, 'VOCdevkit_test/Annotations') 40 | mkdir(xml_test) 41 | 42 | 43 | count = 0 44 | for i in train_image: 45 | shutil.copy(os.path.join(image_path, i + '.png'), image_output_train) 46 | shutil.copy(os.path.join(xml_path, i + '.xml'), xml_train) 47 | if count % 100 == 0: 48 | print("process step {}".format(count)) 49 | count += 1 50 | 51 | for i in test_image: 52 | shutil.copy(os.path.join(image_path, i + '.png'), image_output_test) 53 | shutil.copy(os.path.join(xml_path, i + '.xml'), xml_test) 54 | if count % 100 == 0: 55 | print("process step {}".format(count)) 56 | count += 1 57 | -------------------------------------------------------------------------------- /data/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/__init__.py -------------------------------------------------------------------------------- /data/io/image_preprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | from libs.configs import cfgs 11 | 12 | 13 | def max_length_limitation(length, length_limitation): 14 | return tf.cond(tf.less(length, length_limitation), 15 | true_fn=lambda: length, 16 | false_fn=lambda: length_limitation) 17 | 18 | 19 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200): 20 | ''' 21 | 22 | :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5]. gtboxes: [xmin, ymin, xmax, ymax] 23 | :param target_shortside_len: 24 | :param length_limitation: set max length to avoid OUT OF MEMORY 25 | :return: 26 | ''' 27 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 28 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 29 | true_fn=lambda: (target_shortside_len, 30 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 31 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 32 | target_shortside_len)) 33 | 34 | img_tensor = tf.expand_dims(img_tensor, axis=0) 35 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 36 | 37 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 38 | 39 | new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h 40 | new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h 41 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 42 | 43 | return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0)) 44 | 45 | 46 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True): 47 | if is_resize: 48 | img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 49 | 50 | new_h, new_w = tf.cond(tf.less(img_h, img_w), 51 | true_fn=lambda: (target_shortside_len, 52 | max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)), 53 | false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation), 54 | target_shortside_len)) 55 | 56 | img_tensor = tf.expand_dims(img_tensor, axis=0) 57 | img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w]) 58 | 59 | img_tensor = tf.squeeze(img_tensor, axis=0) # ensure image tensor rank is 3 60 | return img_tensor 61 | 62 | 63 | def flip_left_to_right(img_tensor, gtboxes_and_label): 64 | 65 | h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1] 66 | 67 | img_tensor = tf.image.flip_left_right(img_tensor) 68 | 69 | xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1) 70 | new_xmax = w - xmin 71 | new_xmin = w - xmax 72 | 73 | return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0)) 74 | 75 | 76 | def random_flip_left_right(img_tensor, gtboxes_and_label): 77 | img_tensor, gtboxes_and_label = tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5), 78 | lambda: flip_left_to_right(img_tensor, gtboxes_and_label), 79 | lambda: (img_tensor, gtboxes_and_label)) 80 | 81 | return img_tensor, gtboxes_and_label 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /data/pretrained_weights/README.md: -------------------------------------------------------------------------------- 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet. 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing) 5 | 3. Path tree of pretrained_weight 6 | ``` 7 | ├── pretrained_weight 8 | │   ├── efficientnet 9 | │   ├── efficientnet-b0 10 | │   ├── checkpoint 11 | │   ├── model.ckpt.data-00000-of-00001 12 | │   ├── model.ckpt.index 13 | │   ├── model.ckpt.meta 14 | │ ├── mobilenet 15 | │   ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001 16 | │   ├── mobilenet_v1_0.25_128.ckpt.index 17 | │   ├── mobilenet_v1_0.25_128.ckpt.meta 18 | │   ├── mobilenet_v1_0.25_128.tflite 19 | │   ├── mobilenet_v1_0.25_128_eval.pbtxt 20 | │   ├── mobilenet_v1_0.25_128_frozen.pb 21 | │   ├── mobilenet_v1_0.25_128_info.txt 22 | │ ├── resnet_v1_50.ckpt 23 | │ ├── resnet50_v1d.ckpt.index 24 | │ ├── resnet50_v1d.ckpt.data-00000-of-00001 25 | │ ├── resnet50_v1d.ckpt.meta 26 | ``` 27 | -------------------------------------------------------------------------------- /data/pretrained_weights/efficientnet/README.md: -------------------------------------------------------------------------------- 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet. 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing) 5 | 3. Path tree of pretrained_weight 6 | ``` 7 | ├── pretrained_weight 8 | │   ├── efficientnet 9 | │   ├── efficientnet-b0 10 | │   ├── checkpoint 11 | │   ├── model.ckpt.data-00000-of-00001 12 | │   ├── model.ckpt.index 13 | │   ├── model.ckpt.meta 14 | │ ├── mobilenet 15 | │   ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001 16 | │   ├── mobilenet_v1_0.25_128.ckpt.index 17 | │   ├── mobilenet_v1_0.25_128.ckpt.meta 18 | │   ├── mobilenet_v1_0.25_128.tflite 19 | │   ├── mobilenet_v1_0.25_128_eval.pbtxt 20 | │   ├── mobilenet_v1_0.25_128_frozen.pb 21 | │   ├── mobilenet_v1_0.25_128_info.txt 22 | │ ├── resnet_v1_50.ckpt 23 | │ ├── resnet50_v1d.ckpt.index 24 | │ ├── resnet50_v1d.ckpt.data-00000-of-00001 25 | │ ├── resnet50_v1d.ckpt.meta 26 | ``` 27 | -------------------------------------------------------------------------------- /data/pretrained_weights/efficientnet/noisy-student-efficientnet-b1/results.txt: -------------------------------------------------------------------------------- 1 | {'top_1_accuracy': 0.81528, 'top_5_accuracy': 0.95792} 2 | -------------------------------------------------------------------------------- /data/pretrained_weights/efficientnet/noisy_student_efficientnet-b0/results.txt: -------------------------------------------------------------------------------- 1 | {'top_1_accuracy': 0.78844, 'top_5_accuracy': 0.9451} 2 | -------------------------------------------------------------------------------- /data/pretrained_weights/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet. 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing) 5 | 3. Path tree of pretrained_weight 6 | ``` 7 | ├── pretrained_weight 8 | │   ├── efficientnet 9 | │   ├── efficientnet-b0 10 | │   ├── checkpoint 11 | │   ├── model.ckpt.data-00000-of-00001 12 | │   ├── model.ckpt.index 13 | │   ├── model.ckpt.meta 14 | │ ├── mobilenet 15 | │   ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001 16 | │   ├── mobilenet_v1_0.25_128.ckpt.index 17 | │   ├── mobilenet_v1_0.25_128.ckpt.meta 18 | │   ├── mobilenet_v1_0.25_128.tflite 19 | │   ├── mobilenet_v1_0.25_128_eval.pbtxt 20 | │   ├── mobilenet_v1_0.25_128_frozen.pb 21 | │   ├── mobilenet_v1_0.25_128_info.txt 22 | │ ├── resnet_v1_50.ckpt 23 | │ ├── resnet50_v1d.ckpt.index 24 | │ ├── resnet50_v1d.ckpt.data-00000-of-00001 25 | │ ├── resnet50_v1d.ckpt.meta 26 | ``` 27 | -------------------------------------------------------------------------------- /demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/demo1.png -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/eval_devkit/poly_nms_gpu/__init__.py -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # from nms.gpu_nms import gpu_nms 9 | # from nms.cpu_nms import cpu_nms 10 | from .poly_nms import poly_gpu_nms 11 | def poly_nms_gpu(dets, thresh, force_cpu=False): 12 | """Dispatch to either CPU or GPU NMS implementations.""" 13 | 14 | if dets.shape[0] == 0: 15 | return [] 16 | return poly_gpu_nms(dets, thresh, device_id=0) 17 | 18 | -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/poly_nms.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-5-24. 3 | // 4 | 5 | #ifndef DOTA_DEVKIT_POLY_NMS_HPP 6 | #define DOTA_DEVKIT_POLY_NMS_HPP 7 | 8 | 9 | void _poly_nms(int* keep_out, int* num_out, const float* polys_host, int polys_num, 10 | int polys_dim, float nms_overlap_thresh, int device_id); 11 | 12 | #endif //DOTA_DEVKIT_POLY_NMS_HPP 13 | -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/poly_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "poly_nms.hpp": 7 | void _poly_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | 9 | def poly_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 10 | np.int32_t device_id=0): 11 | cdef int boxes_num = dets.shape[0] 12 | cdef int boxes_dim = dets.shape[1] 13 | cdef int num_out 14 | cdef np.ndarray[np.int32_t, ndim=1] \ 15 | keep = np.zeros(boxes_num, dtype=np.int32) 16 | cdef np.ndarray[np.float32_t, ndim=1] \ 17 | scores = dets[:, 8] 18 | cdef np.ndarray[np.int_t, ndim=1] \ 19 | order = scores.argsort()[::-1] 20 | cdef np.ndarray[np.float32_t, ndim=2] \ 21 | sorted_dets = dets[order, :] 22 | _poly_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 23 | keep = keep[:num_out] 24 | return list(order[keep]) 25 | -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/poly_nms_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/eval_devkit/poly_nms_gpu/poly_nms_test.py -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/poly_overlaps.hpp: -------------------------------------------------------------------------------- 1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id); 2 | -------------------------------------------------------------------------------- /eval_devkit/poly_nms_gpu/poly_overlaps.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | cdef extern from "poly_overlaps.hpp": 5 | void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int) 6 | 7 | def poly_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0): 8 | cdef int N = boxes.shape[0] 9 | cdef int K = query_boxes.shape[0] 10 | cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32) 11 | _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id) 12 | return overlaps 13 | 14 | 15 | -------------------------------------------------------------------------------- /eval_devkit/polyiou.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-2-3. 3 | // 4 | 5 | #ifndef POLYIOU_POLYIOU_H 6 | #define POLYIOU_POLYIOU_H 7 | 8 | #include 9 | double iou_poly(std::vector p, std::vector q); 10 | #endif //POLYIOU_POLYIOU_H 11 | -------------------------------------------------------------------------------- /eval_devkit/polyiou.i: -------------------------------------------------------------------------------- 1 | %module polyiou 2 | %include "std_vector.i" 3 | 4 | namespace std { 5 | %template(VectorDouble) vector; 6 | }; 7 | 8 | %{ 9 | #define SWIG_FILE_WITH_INIT 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "polyiou.h" 16 | %} 17 | 18 | %include "polyiou.h" 19 | 20 | -------------------------------------------------------------------------------- /eval_devkit/readme.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 1. install swig 3 | ``` 4 | sudo apt-get install swig 5 | ``` 6 | 2. create the c++ extension for python 7 | ``` 8 | swig -c++ -python polyiou.i 9 | python setup.py build_ext --inplace 10 | ``` 11 | -------------------------------------------------------------------------------- /eval_devkit/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | setup.py file for SWIG example 3 | """ 4 | from distutils.core import setup, Extension 5 | import numpy 6 | 7 | polyiou_module = Extension('_polyiou', 8 | sources=['polyiou_wrap.cxx', 'polyiou.cpp'], 9 | ) 10 | setup(name = 'polyiou', 11 | version = '0.1', 12 | author = "SWIG Docs", 13 | description = """Simple swig example from docs""", 14 | ext_modules = [polyiou_module], 15 | py_modules = ["polyiou"], 16 | ) 17 | -------------------------------------------------------------------------------- /feature_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/feature_vis.png -------------------------------------------------------------------------------- /help_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/help_utils/__init__.py -------------------------------------------------------------------------------- /help_utils/smooth_label.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | import numpy as np 5 | import math 6 | 7 | 8 | def gaussian_label(label, num_class, u=0, sig=4.0): 9 | x = np.array(range(math.floor(-num_class/2), math.ceil(num_class/2), 1)) 10 | y_sig = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) 11 | return np.concatenate([y_sig[math.ceil(num_class/2)-label:], 12 | y_sig[:math.ceil(num_class/2)-label]], axis=0) 13 | 14 | 15 | def rectangular_label(label, num_class, raduius=4): 16 | x = np.zeros([num_class]) 17 | x[:raduius+1] = 1 18 | x[-raduius:] = 1 19 | y_sig = np.concatenate([x[-label:], x[:-label]], axis=0) 20 | return y_sig 21 | 22 | 23 | def pulse_label(label, num_class): 24 | x = np.zeros([num_class]) 25 | x[label] = 1 26 | return x 27 | 28 | 29 | def triangle_label(label, num_class, raduius=4): 30 | y_sig = np.zeros([num_class]) 31 | x = np.array(range(raduius+1)) 32 | y = -1/(raduius+1) * x + 1 33 | y_sig[:raduius+1] = y 34 | y_sig[-raduius:] = y[-1:0:-1] 35 | 36 | return np.concatenate([y_sig[-label:], y_sig[:-label]], axis=0) 37 | 38 | 39 | def get_all_smooth_label(num_label, label_type=0, raduius=4): 40 | all_smooth_label = [] 41 | 42 | if label_type == 0: 43 | for i in range(num_label): 44 | all_smooth_label.append(gaussian_label(i, num_label, sig=raduius)) 45 | elif label_type == 1: 46 | for i in range(num_label): 47 | all_smooth_label.append(rectangular_label(i, num_label, raduius=raduius)) 48 | elif label_type == 2: 49 | for i in range(num_label): 50 | all_smooth_label.append(pulse_label(i, num_label)) 51 | elif label_type == 3: 52 | for i in range(num_label): 53 | all_smooth_label.append(triangle_label(i, num_label, raduius=raduius)) 54 | else: 55 | raise Exception('Only support gaussian, rectangular, triangle and pulse label') 56 | return np.array(all_smooth_label) 57 | 58 | 59 | def angle_smooth_label(angle_label, angle_range=90, label_type=0, raduius=4, omega=1): 60 | """ 61 | :param angle_label: [-90,0) or [-90, 0) 62 | :param angle_range: 90 or 180 63 | :return: 64 | """ 65 | 66 | assert angle_range % omega == 0, 'wrong omega' 67 | 68 | angle_range /= omega 69 | angle_label /= omega 70 | 71 | angle_label = np.array(-np.round(angle_label), np.int32) 72 | all_smooth_label = get_all_smooth_label(int(angle_range), label_type, raduius) 73 | inx = angle_label == angle_range 74 | angle_label[inx] = angle_range - 1 75 | smooth_label = all_smooth_label[angle_label] 76 | return np.array(smooth_label, np.float32) 77 | 78 | 79 | if __name__ == '__main__': 80 | import matplotlib.pyplot as plt 81 | 82 | # angle_label = np.array([-89.9, -45.2, -0.3, -1.9]) 83 | # smooth_label = angle_smooth_label(angle_label) 84 | 85 | # y_sig = triangle_label(30, 180, raduius=8) 86 | # y_sig = gaussian_label(30, 180, sig=0.1) 87 | # y_sig = pulse_label(30, 180) 88 | y_sig = triangle_label(0, 90) 89 | x = np.array(range(0, 90, 1)) 90 | plt.plot(x, y_sig, "r-", linewidth=2) 91 | plt.grid(True) 92 | plt.show() 93 | print(y_sig) 94 | -------------------------------------------------------------------------------- /help_utils/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import math 4 | import sys 5 | import os 6 | import cv2 7 | 8 | from libs.configs import cfgs 9 | 10 | 11 | def view_bar(message, num, total): 12 | rate = num / total 13 | rate_num = int(rate * 40) 14 | rate_nums = math.ceil(rate * 100) 15 | r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,) 16 | sys.stdout.write(r) 17 | sys.stdout.flush() 18 | 19 | 20 | def mkdir(path): 21 | if not os.path.exists(path): 22 | os.makedirs(path) 23 | 24 | 25 | def get_feature_map_size(src_len): 26 | feature_map_size = [] 27 | src_len /= 2 ** (int(cfgs.LEVEL[0][-1])-1) 28 | for _ in range(len(cfgs.LEVEL)): 29 | src_len = math.ceil(src_len / 2) 30 | feature_map_size.append((src_len, src_len)) 31 | 32 | return feature_map_size 33 | 34 | 35 | def get_dota_short_names(label): 36 | DOTA_SHORT_NAMES = { 37 | 'roundabout': 'RA', 38 | 'tennis-court': 'TC', 39 | 'swimming-pool': 'SP', 40 | 'storage-tank': 'ST', 41 | 'soccer-ball-field': 'SBF', 42 | 'small-vehicle': 'SV', 43 | 'ship': 'SH', 44 | 'plane': 'PL', 45 | 'large-vehicle': 'LV', 46 | 'helicopter': 'HC', 47 | 'harbor': 'HA', 48 | 'ground-track-field': 'GTF', 49 | 'bridge': 'BR', 50 | 'basketball-court': 'BC', 51 | 'baseball-diamond': 'BD' 52 | } 53 | 54 | return DOTA_SHORT_NAMES[label] 55 | 56 | 57 | def read_dota_gt_and_vis(img, gt_txt): 58 | txt_data = open(gt_txt, 'r').readlines() 59 | for i in txt_data: 60 | if len(i.split(' ')) < 9: 61 | continue 62 | 63 | gt_box = [int(xy) for xy in i.split(' ')[:8]] 64 | # gt_label = i.split(' ')[8] 65 | cv2.line(img, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), color=(0, 0, 255), thickness=3) 66 | cv2.line(img, (gt_box[2], gt_box[3]), (gt_box[4], gt_box[5]), color=(0, 0, 255), thickness=3) 67 | cv2.line(img, (gt_box[4], gt_box[5]), (gt_box[6], gt_box[7]), color=(0, 0, 255), thickness=3) 68 | cv2.line(img, (gt_box[6], gt_box[7]), (gt_box[0], gt_box[1]), color=(0, 0, 255), thickness=3) 69 | return img 70 | -------------------------------------------------------------------------------- /images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/images.png -------------------------------------------------------------------------------- /libs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/.DS_Store -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/anchor_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, print_function, division 3 | 4 | import tensorflow as tf 5 | import sys 6 | sys.path.append('../..') 7 | 8 | from libs.configs import cfgs 9 | 10 | 11 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios, 12 | featuremap_height, featuremap_width, 13 | stride, name='make_anchors'): 14 | ''' 15 | :param base_anchor_size:256 16 | :param anchor_scales: 17 | :param anchor_ratios: 18 | :param featuremap_height: 19 | :param featuremap_width: 20 | :param stride: 21 | :return: 22 | ''' 23 | with tf.variable_scope(name): 24 | base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32) # [x_center, y_center, w, h] 25 | 26 | ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales), 27 | anchor_ratios) # per locations ws and hs 28 | 29 | x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride 30 | y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride 31 | 32 | if cfgs.USE_CENTER_OFFSET: 33 | x_centers += stride / 2. 34 | y_centers += stride / 2. 35 | 36 | x_centers, y_centers = tf.meshgrid(x_centers, y_centers) 37 | 38 | ws, x_centers = tf.meshgrid(ws, x_centers) 39 | hs, y_centers = tf.meshgrid(hs, y_centers) 40 | 41 | anchor_centers = tf.stack([x_centers, y_centers], 2) 42 | anchor_centers = tf.reshape(anchor_centers, [-1, 2]) 43 | 44 | box_sizes = tf.stack([ws, hs], axis=2) 45 | box_sizes = tf.reshape(box_sizes, [-1, 2]) 46 | # anchors = tf.concat([anchor_centers, box_sizes], axis=1) 47 | anchors = tf.concat([anchor_centers - 0.5*box_sizes, 48 | anchor_centers + 0.5*box_sizes], axis=1) 49 | return anchors 50 | 51 | 52 | def enum_scales(base_anchor, anchor_scales): 53 | 54 | anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1)) 55 | 56 | return anchor_scales 57 | 58 | 59 | def enum_ratios(anchors, anchor_ratios): 60 | ''' 61 | ratio = h /w 62 | :param anchors: 63 | :param anchor_ratios: 64 | :return: 65 | ''' 66 | ws = anchors[:, 2] # for base anchor: w == h 67 | hs = anchors[:, 3] 68 | sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios)) 69 | 70 | ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1]) 71 | hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1]) 72 | 73 | return ws, hs 74 | 75 | 76 | if __name__ == '__main__': 77 | import os 78 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 79 | base_anchor_size = 32 80 | anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 81 | anchor_ratios = [0.5, 2.0, 1.0] 82 | anchors = make_anchors(base_anchor_size=base_anchor_size, anchor_ratios=anchor_ratios, 83 | anchor_scales=anchor_scales, 84 | featuremap_width=512, 85 | featuremap_height=512, 86 | stride=8) 87 | init = tf.global_variables_initializer() 88 | with tf.Session() as sess: 89 | sess.run(init) 90 | anchor_result = sess.run(anchors) 91 | print(anchor_result[:10]) 92 | 93 | -------------------------------------------------------------------------------- /libs/box_utils/boxes_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | 9 | 10 | def ious_calu(boxes_1, boxes_2): 11 | ''' 12 | 13 | :param boxes_1: [N, 4] [xmin, ymin, xmax, ymax] 14 | :param boxes_2: [M, 4] [xmin, ymin. xmax, ymax] 15 | :return: 16 | ''' 17 | boxes_1 = tf.cast(boxes_1, tf.float32) 18 | boxes_2 = tf.cast(boxes_2, tf.float32) 19 | xmin_1, ymin_1, xmax_1, ymax_1 = tf.split(boxes_1, 4, axis=1) # xmin_1 shape is [N, 1].. 20 | xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1) # xmin_2 shape is [M, ].. 21 | 22 | max_xmin = tf.maximum(xmin_1, xmin_2) 23 | min_xmax = tf.minimum(xmax_1, xmax_2) 24 | 25 | max_ymin = tf.maximum(ymin_1, ymin_2) 26 | min_ymax = tf.minimum(ymax_1, ymax_2) 27 | 28 | overlap_h = tf.maximum(0., min_ymax - max_ymin) # avoid h < 0 29 | overlap_w = tf.maximum(0., min_xmax - max_xmin) 30 | 31 | overlaps = overlap_h * overlap_w 32 | 33 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 34 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 35 | 36 | ious = overlaps / (area_1 + area_2 - overlaps) 37 | 38 | return ious 39 | 40 | 41 | def clip_boxes_to_img_boundaries(boxes, img_shape): 42 | ''' 43 | 44 | :param decode_boxes: 45 | :return: decode boxes, and already clip to boundaries 46 | ''' 47 | 48 | with tf.name_scope('clip_boxes_to_img_boundaries'): 49 | 50 | # xmin, ymin, xmax, ymax = tf.unstack(decode_boxes, axis=1) 51 | xmin = boxes[:, 0] 52 | ymin = boxes[:, 1] 53 | xmax = boxes[:, 2] 54 | ymax = boxes[:, 3] 55 | img_h, img_w = img_shape[1], img_shape[2] 56 | 57 | img_h, img_w = tf.cast(img_h, tf.float32), tf.cast(img_w, tf.float32) 58 | 59 | xmin = tf.maximum(tf.minimum(xmin, img_w-1.), 0.) 60 | ymin = tf.maximum(tf.minimum(ymin, img_h-1.), 0.) 61 | 62 | xmax = tf.maximum(tf.minimum(xmax, img_w-1.), 0.) 63 | ymax = tf.maximum(tf.minimum(ymax, img_h-1.), 0.) 64 | 65 | return tf.transpose(tf.stack([xmin, ymin, xmax, ymax])) 66 | 67 | 68 | def filter_outside_boxes(boxes, img_h, img_w): 69 | ''' 70 | :param anchors:boxes with format [xmin, ymin, xmax, ymax] 71 | :param img_h: height of image 72 | :param img_w: width of image 73 | :return: indices of anchors that inside the image boundary 74 | ''' 75 | 76 | with tf.name_scope('filter_outside_boxes'): 77 | xmin, ymin, xmax, ymax = tf.unstack(boxes, axis=1) 78 | 79 | xmin_index = tf.greater_equal(xmin, 0) 80 | ymin_index = tf.greater_equal(ymin, 0) 81 | xmax_index = tf.less_equal(xmax, tf.cast(img_w, tf.float32)) 82 | ymax_index = tf.less_equal(ymax, tf.cast(img_h, tf.float32)) 83 | 84 | indices = tf.transpose(tf.stack([xmin_index, ymin_index, xmax_index, ymax_index])) 85 | indices = tf.cast(indices, dtype=tf.int32) 86 | indices = tf.reduce_sum(indices, axis=1) 87 | indices = tf.where(tf.equal(indices, 4)) 88 | # indices = tf.equal(indices, 4) 89 | return tf.reshape(indices, [-1]) 90 | 91 | 92 | def padd_boxes_with_zeros(boxes, scores, max_num_of_boxes): 93 | 94 | ''' 95 | num of boxes less than max num of boxes, so it need to pad with zeros[0, 0, 0, 0] 96 | :param boxes: 97 | :param scores: [-1] 98 | :param max_num_of_boxes: 99 | :return: 100 | ''' 101 | 102 | pad_num = tf.cast(max_num_of_boxes, tf.int32) - tf.shape(boxes)[0] 103 | 104 | zero_boxes = tf.zeros(shape=[pad_num, 4], dtype=boxes.dtype) 105 | zero_scores = tf.zeros(shape=[pad_num], dtype=scores.dtype) 106 | 107 | final_boxes = tf.concat([boxes, zero_boxes], axis=0) 108 | 109 | final_scores = tf.concat([scores, zero_scores], axis=0) 110 | 111 | return final_boxes, final_scores -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf */*.pyc 6 | rm -rf */*.so 7 | -------------------------------------------------------------------------------- /libs/box_utils/cython_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/cython_utils/__init__.py -------------------------------------------------------------------------------- /libs/box_utils/iou.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | 12 | def iou_calculate(boxes_1, boxes_2): 13 | 14 | with tf.name_scope('iou_caculate'): 15 | 16 | xmin_1, ymin_1, xmax_1, ymax_1 = tf.unstack(boxes_1, axis=1) # ymin_1 shape is [N, 1].. 17 | 18 | xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1) # ymin_2 shape is [M, ].. 19 | 20 | max_xmin = tf.maximum(xmin_1, xmin_2) 21 | min_xmax = tf.minimum(xmax_1, xmax_2) 22 | 23 | max_ymin = tf.maximum(ymin_1, ymin_2) 24 | min_ymax = tf.minimum(ymax_1, ymax_2) 25 | 26 | overlap_h = tf.maximum(0., min_ymax - max_ymin) # avoid h < 0 27 | overlap_w = tf.maximum(0., min_xmax - max_xmin) 28 | 29 | overlaps = overlap_h * overlap_w 30 | 31 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 32 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 33 | 34 | iou = overlaps / (area_1 + area_2 - overlaps) 35 | 36 | return iou 37 | 38 | 39 | def iou_calculate_np(boxes_1, boxes_2): 40 | xmin_1, ymin_1, xmax_1, ymax_1 = np.split(boxes_1, 4, axis=1) 41 | # xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3] 42 | 43 | xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3] 44 | 45 | max_xmin = np.maximum(xmin_1, xmin_2) 46 | min_xmax = np.minimum(xmax_1, xmax_2) 47 | 48 | max_ymin = np.maximum(ymin_1, ymin_2) 49 | min_ymax = np.minimum(ymax_1, ymax_2) 50 | 51 | overlap_h = np.maximum(0., min_ymax - max_ymin) # avoid h < 0 52 | overlap_w = np.maximum(0., min_xmax - max_xmin) 53 | 54 | overlaps = overlap_h * overlap_w 55 | 56 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 57 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 58 | 59 | iou = overlaps / (area_1 + area_2 - overlaps) 60 | 61 | return iou 62 | 63 | 64 | def iou_calculate1(boxes_1, boxes_2): 65 | 66 | xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3] 67 | 68 | xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3] 69 | 70 | max_xmin = np.maximum(xmin_1, xmin_2) 71 | min_xmax = np.minimum(xmax_1, xmax_2) 72 | 73 | max_ymin = np.maximum(ymin_1, ymin_2) 74 | min_ymax = np.minimum(ymax_1, ymax_2) 75 | 76 | overlap_h = np.maximum(0., min_ymax - max_ymin) # avoid h < 0 77 | overlap_w = np.maximum(0., min_xmax - max_xmin) 78 | 79 | overlaps = overlap_h * overlap_w 80 | 81 | area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1) # [N, 1] 82 | area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2) # [M, ] 83 | 84 | iou = overlaps / (area_1 + area_2 - overlaps) 85 | 86 | return iou 87 | 88 | 89 | if __name__ == '__main__': 90 | import os 91 | os.environ["CUDA_VISIBLE_DEVICES"] = '13' 92 | boxes1 = np.array([[50, 50, 100, 300], 93 | [60, 60, 100, 200]], np.float32) 94 | 95 | boxes2 = np.array([[50, 50, 100, 300], 96 | [200, 200, 100, 200]], np.float32) 97 | 98 | print(iou_calculate_np(boxes1, boxes2)) 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /libs/box_utils/iou_cpu.pyx: -------------------------------------------------------------------------------- 1 | # written by yjr 2 | 3 | cimport cython 4 | import numpy as np 5 | cimport numpy as np 6 | import cv2 7 | import time 8 | 9 | DTYPE = np.float32 10 | ctypedef np.float32_t DTYPE_t 11 | ctypedef bint BOOL 12 | 13 | cdef DTYPE_t two_boxes_iou(np.ndarray[DTYPE_t, ndim=1] rectangle_1, np.ndarray[DTYPE_t, ndim=1] rectangle_2): 14 | 15 | """ 16 | calu rectangle_1 and rectangle_2 iou 17 | :param rectangle_1: [x, y, w, h, theta]. shape: (5, ) 18 | :param rectangle_2: 19 | :return: 20 | """ 21 | cdef DTYPE_t area1 = rectangle_1[2] * rectangle_1[3] 22 | cdef DTYPE_t area2 = rectangle_2[2] * rectangle_2[3] 23 | 24 | rect_1 = ((rectangle_1[0], rectangle_1[1]), (rectangle_1[3], rectangle_1[2]), rectangle_1[-1]) 25 | rect_2 = ((rectangle_2[0], rectangle_2[1]), (rectangle_2[3], rectangle_2[2]), rectangle_2[-1]) 26 | 27 | inter_points = cv2.rotatedRectangleIntersection(rect_1, rect_2)[1] 28 | 29 | cdef np.ndarray[DTYPE_t, ndim=3] order_points 30 | cdef float inter_area, iou 31 | if inter_points is not None: 32 | order_points = cv2.convexHull(inter_points, returnPoints=True) 33 | 34 | inter_area = cv2.contourArea(order_points) 35 | if area1 + area2 == inter_area: 36 | print ("area1-->", area1) 37 | print ("area2-->", area2) 38 | print ("inter_area-->", inter_area) 39 | iou = inter_area *1.0 / (area1 + area2 - inter_area) 40 | return iou 41 | else: 42 | return 0.0 43 | 44 | cpdef np.ndarray[DTYPE_t, ndim=2] get_iou_matrix( 45 | np.ndarray[DTYPE_t, ndim=2] boxes1, # (N, 5) 46 | np.ndarray[DTYPE_t, ndim=2] boxes2): # (M, 5) 47 | 48 | cdef unsigned int num_of_boxes1 = boxes1.shape[0] 49 | cdef unsigned int num_of_boxes2 = boxes2.shape[0] 50 | 51 | cdef np.ndarray[DTYPE_t, ndim=2] iou_matrix = np.zeros((num_of_boxes1, num_of_boxes2), dtype=DTYPE) 52 | # cdef DTYPE_t box_iou 53 | cdef unsigned int n, m 54 | # st = time.time() 55 | for n in range(num_of_boxes1): 56 | for m in range(num_of_boxes2): 57 | 58 | iou_matrix[n, m] = two_boxes_iou(boxes1[n], boxes2[m]) 59 | # print "iou_matrix cost time: ", time.time() - st 60 | return iou_matrix 61 | 62 | -------------------------------------------------------------------------------- /libs/box_utils/mask_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | import numpy as np 5 | import tfplot as tfp 6 | import cv2 7 | 8 | 9 | def make_gt_mask(fet_h, fet_w, img_h, img_w, gtboxes): 10 | ''' 11 | :param fet_h: 12 | :param fet_w: 13 | :param img_h: 14 | :param img_w: 15 | :param gtboxes: [xmin, ymin, xmax, ymax, label]. shape is (N, 5) 16 | :return: 17 | ''' 18 | gtboxes = np.reshape(gtboxes, [-1, 5]) 19 | # xmin, ymin, xmax, ymax, label = gtboxes[:, 0], gtboxes[:, 1], gtboxes[:, 2], gtboxes[:, 3], gtboxes[:, 4] 20 | 21 | areas = (gtboxes[:, 2]-gtboxes[:, 0])*(gtboxes[:, 3]-gtboxes[:, 1]) 22 | arg_areas = np.argsort(-1*areas) # sort from large to small 23 | gtboxes = gtboxes[arg_areas] 24 | 25 | fet_h, fet_w = int(fet_h), int(fet_w) 26 | mask = np.zeros(shape=[fet_h, fet_w], dtype=np.int32) 27 | for a_box in gtboxes: 28 | xmin, ymin, xmax, ymax, label = a_box[0], a_box[1], a_box[2], a_box[3], a_box[4] 29 | 30 | new_xmin, new_ymin, new_xmax, new_ymax = int(xmin*fet_w/float(img_w)), int(ymin*fet_h/float(img_h)),\ 31 | int(xmax*fet_w/float(img_w)), int(ymax*fet_h/float(img_h)) 32 | 33 | new_xmin, new_ymin = max(0, new_xmin), max(0, new_ymin) 34 | new_xmax, new_ymax = min(fet_w, new_xmax), min(fet_h, new_ymax) 35 | 36 | mask[new_ymin:new_ymax, new_xmin:new_xmax] = np.int32(label) 37 | return mask 38 | 39 | 40 | def make_r_gt_mask(fet_h, fet_w, img_h, img_w, gtboxes): 41 | gtboxes = np.reshape(gtboxes, [-1, 6]) # [x, y, w, h, theta, label] 42 | 43 | areas = gtboxes[:, 2] * gtboxes[:, 3] 44 | arg_areas = np.argsort(-1 * areas) # sort from large to small 45 | gtboxes = gtboxes[arg_areas] 46 | 47 | fet_h, fet_w = int(fet_h), int(fet_w) 48 | mask = np.zeros(shape=[fet_h, fet_w], dtype=np.int32) 49 | for a_box in gtboxes: 50 | # print(a_box) 51 | box = cv2.boxPoints(((a_box[0], a_box[1]), (a_box[2], a_box[3]), a_box[4])) 52 | box = np.reshape(box, [-1, ]) 53 | label = a_box[-1] 54 | new_box = [] 55 | for i in range(8): 56 | if i % 2 == 0: 57 | x = box[i] 58 | new_x = int(x * fet_w / float(img_w)) 59 | new_box.append(new_x) 60 | else: 61 | y = box[i] 62 | new_y = int(y*fet_h/float(img_h)) 63 | new_box.append(new_y) 64 | 65 | new_box = np.int0(new_box).reshape([4, 2]) 66 | color = int(label) 67 | # print(type(color), color) 68 | cv2.fillConvexPoly(mask, new_box, color=color) 69 | # print (mask.dtype) 70 | return mask 71 | 72 | 73 | def vis_mask_tfsmry(mask, name): 74 | ''' 75 | :param mask:[H, W]. It's a tensor, not array 76 | :return: 77 | ''' 78 | 79 | def figure_attention(activation): 80 | fig, ax = tfp.subplots() 81 | im = ax.imshow(activation, cmap='jet') 82 | fig.colorbar(im) 83 | return fig 84 | 85 | heatmap = mask*10 86 | 87 | tfp.summary.plot(name, figure_attention, [heatmap]) -------------------------------------------------------------------------------- /libs/box_utils/rbbox_overlaps.hpp: -------------------------------------------------------------------------------- 1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id); 2 | 3 | -------------------------------------------------------------------------------- /libs/box_utils/rbbox_overlaps.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | cdef extern from "rbbox_overlaps.hpp": 5 | void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int) 6 | 7 | def rbbx_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0): 8 | # boxes: [x, y, w, h, theta] 9 | cdef int N = boxes.shape[0] 10 | cdef int K = query_boxes.shape[0] 11 | cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32) 12 | _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id) 13 | return overlaps 14 | 15 | 16 | -------------------------------------------------------------------------------- /libs/box_utils/rotate_anchors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/rotate_anchors.jpg -------------------------------------------------------------------------------- /libs/box_utils/rotate_gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /libs/box_utils/rotate_polygon_nms.hpp: -------------------------------------------------------------------------------- 1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /libs/box_utils/rotate_polygon_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "rotate_gpu_nms.hpp": 7 | void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float_t thresh, np.int32_t device_id=0): 10 | cdef int boxes_num = dets.shape[0] 11 | cdef int boxes_dim = dets.shape[1] 12 | cdef int num_out 13 | cdef np.ndarray[np.int32_t, ndim=1] \ 14 | keep = np.zeros(boxes_num, dtype=np.int32) 15 | cdef np.ndarray[np.float32_t, ndim=1] \ 16 | scores = dets[:, 5] 17 | cdef np.ndarray[np.int_t, ndim=1] \ 18 | order = scores.argsort()[::-1] 19 | cdef np.ndarray[np.float32_t, ndim=2] \ 20 | sorted_dets = dets[order, :] 21 | thresh = thresh 22 | _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 23 | keep = keep[:num_out] 24 | return order[keep] 25 | -------------------------------------------------------------------------------- /libs/box_utils/show_box_in_tensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | 9 | from libs.box_utils import draw_box_in_img 10 | 11 | 12 | def only_draw_boxes(img_batch, boxes, method, head=None, is_csl=False): 13 | 14 | boxes = tf.stop_gradient(boxes) 15 | img_tensor = tf.squeeze(img_batch, 0) 16 | img_tensor = tf.cast(img_tensor, tf.float32) 17 | labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES 18 | scores = tf.zeros_like(labels, dtype=tf.float32) 19 | 20 | if head is None: 21 | head = tf.ones_like(scores) * -1 22 | 23 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 24 | inp=[img_tensor, boxes, labels, scores, method, head, is_csl], 25 | Tout=tf.uint8) 26 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) # [batch_size, h, w, c] 27 | 28 | return img_tensor_with_boxes 29 | 30 | 31 | def draw_boxes_with_scores(img_batch, boxes, scores, method, head, is_csl=False): 32 | 33 | if head is None: 34 | head = tf.ones_like(scores) * -1 35 | 36 | boxes = tf.stop_gradient(boxes) 37 | scores = tf.stop_gradient(scores) 38 | 39 | img_tensor = tf.squeeze(img_batch, 0) 40 | img_tensor = tf.cast(img_tensor, tf.float32) 41 | labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES 42 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 43 | inp=[img_tensor, boxes, labels, scores, method, head, is_csl], 44 | Tout=[tf.uint8]) 45 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 46 | return img_tensor_with_boxes 47 | 48 | 49 | def draw_boxes_with_categories(img_batch, boxes, labels, method, head=None, is_csl=False): 50 | 51 | if head is None: 52 | head = tf.ones_like(labels) * -1 53 | 54 | boxes = tf.stop_gradient(boxes) 55 | 56 | img_tensor = tf.squeeze(img_batch, 0) 57 | img_tensor = tf.cast(img_tensor, tf.float32) 58 | scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32) 59 | 60 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 61 | inp=[img_tensor, boxes, labels, scores, method, head, is_csl], 62 | Tout=[tf.uint8]) 63 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 64 | return img_tensor_with_boxes 65 | 66 | 67 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores, method, head=None, is_csl=False): 68 | 69 | if head is None: 70 | head = tf.ones_like(labels) * -1 71 | 72 | boxes = tf.stop_gradient(boxes) 73 | scores = tf.stop_gradient(scores) 74 | 75 | img_tensor = tf.squeeze(img_batch, 0) 76 | img_tensor = tf.cast(img_tensor, tf.float32) 77 | img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores, 78 | inp=[img_tensor, boxes, labels, scores, method, head, is_csl], 79 | Tout=[tf.uint8]) 80 | img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch)) 81 | return img_tensor_with_boxes 82 | 83 | 84 | if __name__ == "__main__": 85 | print (1) 86 | 87 | -------------------------------------------------------------------------------- /libs/box_utils/tf_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import tensorflow as tf 6 | 7 | ''' 8 | all of these ops are derived from tenosrflow Object Detection API 9 | ''' 10 | def indices_to_dense_vector(indices, 11 | size, 12 | indices_value=1., 13 | default_value=0, 14 | dtype=tf.float32): 15 | """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros. 16 | 17 | This function exists because it is unclear if it is safe to use 18 | tf.sparse_to_dense(indices, [size], 1, validate_indices=False) 19 | with indices which are not ordered. 20 | This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) 21 | 22 | Args: 23 | indices: 1d Tensor with integer indices which are to be set to 24 | indices_values. 25 | size: scalar with size (integer) of output Tensor. 26 | indices_value: values of elements specified by indices in the output vector 27 | default_value: values of other elements in the output vector. 28 | dtype: data type. 29 | 30 | Returns: 31 | dense 1D Tensor of shape [size] with indices set to indices_values and the 32 | rest set to default_value. 33 | """ 34 | size = tf.to_int32(size) 35 | zeros = tf.ones([size], dtype=dtype) * default_value 36 | values = tf.ones_like(indices, dtype=dtype) * indices_value 37 | 38 | return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], 39 | [zeros, values]) -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/__init__.py -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/baseline/__init__.py -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/baseline/cfgs_res50_dota_win_v19.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | v4 + windows version 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_DOTA_1x_20200607' 14 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "0" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 27000 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | 47 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 48 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 49 | 50 | CLS_WEIGHT = 1.0 51 | REG_WEIGHT = 1.0 / 5.0 52 | REG_LOSS_MODE = None 53 | 54 | BATCH_SIZE = 1 55 | EPSILON = 1e-5 56 | MOMENTUM = 0.9 57 | LR = 5e-4 58 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 59 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 60 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 61 | 62 | # -------------------------------------------- Data_preprocess_config 63 | DATASET_NAME = 'DOTA' # 'pascal', 'coco' 64 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 65 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 66 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 67 | IMG_SHORT_SIDE_LEN = 800 68 | IMG_MAX_LENGTH = 800 69 | CLASS_NUM = 15 70 | 71 | IMG_ROTATE = False 72 | RGB2GRAY = False 73 | VERTICAL_FLIP = False 74 | HORIZONTAL_FLIP = True 75 | IMAGE_PYRAMID = False 76 | 77 | # --------------------------------------------- Network_config 78 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 79 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 80 | PROBABILITY = 0.01 81 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 82 | WEIGHT_DECAY = 1e-4 83 | USE_GN = False 84 | FPN_CHANNEL = 256 85 | 86 | # ---------------------------------------------Anchor config 87 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 88 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 89 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 90 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 91 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 92 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 93 | ANCHOR_SCALE_FACTORS = None 94 | USE_CENTER_OFFSET = True 95 | METHOD = 'H' 96 | USE_ANGLE_COND = False 97 | ANGLE_RANGE = 90 # or 180 98 | 99 | # --------------------------------------------RPN config 100 | SHARE_NET = True 101 | USE_P5 = True 102 | IOU_POSITIVE_THRESHOLD = 0.5 103 | IOU_NEGATIVE_THRESHOLD = 0.4 104 | 105 | NMS = True 106 | NMS_IOU_THRESHOLD = 0.1 107 | MAXIMUM_DETECTIONS = 100 108 | FILTERED_SCORE = 0.05 109 | VIS_SCORE = 0.4 110 | 111 | 112 | -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/dcl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/dcl/__init__.py -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/dota_train/cfgs_res50_dotatrain_dcl_v8.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | BCL + OMEGA = 180 / 32. + period loss 9 | 10 | 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_DOTA_DCL_B_2x_20200921' 16 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 2000 27 | SAVE_WEIGHTS_INTE = 20673 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'DOTATrain' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = 800 73 | IMG_MAX_LENGTH = 800 74 | CLASS_NUM = 15 75 | OMEGA = 180 / 32. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = False 79 | RGB2GRAY = False 80 | VERTICAL_FLIP = False 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = False 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.4 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/DOTA1.0/r3det_dcl/cfgs_res50_dota_refine_dcl_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 9 | 10 | 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_DOTA_Refine_DCL_G_2x_20201026' 16 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 27000 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | USE_IOU_FACTOR = True 56 | REG_LOSS_MODE = None 57 | ALPHA = 1.0 58 | BETA = 1.0 59 | 60 | BATCH_SIZE = 1 61 | EPSILON = 1e-5 62 | MOMENTUM = 0.9 63 | LR = 5e-4 64 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 65 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 66 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 67 | 68 | # -------------------------------------------- Data_preprocess_config 69 | DATASET_NAME = 'DOTA' # 'pascal', 'coco' 70 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 71 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 72 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 73 | IMG_SHORT_SIDE_LEN = 800 74 | IMG_MAX_LENGTH = 800 75 | CLASS_NUM = 15 76 | OMEGA = 180 / 256. 77 | ANGLE_MODE = 1 78 | 79 | IMG_ROTATE = False 80 | RGB2GRAY = False 81 | VERTICAL_FLIP = False 82 | HORIZONTAL_FLIP = True 83 | IMAGE_PYRAMID = False 84 | 85 | # --------------------------------------------- Network_config 86 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 87 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 88 | PROBABILITY = 0.01 89 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 90 | WEIGHT_DECAY = 1e-4 91 | USE_GN = False 92 | FPN_CHANNEL = 256 93 | 94 | # ---------------------------------------------Anchor config 95 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 96 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 97 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 98 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 99 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 100 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 101 | ANCHOR_SCALE_FACTORS = None 102 | USE_CENTER_OFFSET = True 103 | METHOD = 'H' 104 | USE_ANGLE_COND = False 105 | ANGLE_RANGE = 180 # 90 or 180 106 | 107 | # --------------------------------------------RPN config 108 | SHARE_NET = True 109 | USE_P5 = True 110 | IOU_POSITIVE_THRESHOLD = 0.5 111 | IOU_NEGATIVE_THRESHOLD = 0.4 112 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7] 113 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6] 114 | 115 | NMS = True 116 | NMS_IOU_THRESHOLD = 0.1 117 | MAXIMUM_DETECTIONS = 100 118 | FILTERED_SCORE = 0.05 119 | VIS_SCORE = 0.4 120 | 121 | 122 | -------------------------------------------------------------------------------- /libs/configs/HRSC2016/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/HRSC2016/__init__.py -------------------------------------------------------------------------------- /libs/configs/HRSC2016/dcl/cfgs_res101_hrsc2016_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_HRSC2016_DCL_B_2x_20200930' 14 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "0,1,2" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 10000 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | EVAL_THRESHOLD = 0.5 47 | 48 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 49 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 50 | 51 | CLS_WEIGHT = 1.0 52 | REG_WEIGHT = 1.0 53 | ANGLE_WEIGHT = 0.5 54 | REG_LOSS_MODE = None 55 | ALPHA = 1.0 56 | BETA = 1.0 57 | 58 | BATCH_SIZE = 1 59 | EPSILON = 1e-5 60 | MOMENTUM = 0.9 61 | LR = 5e-4 62 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 63 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 64 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 65 | 66 | # -------------------------------------------- Data_preprocess_config 67 | DATASET_NAME = 'HRSC2016' # 'pascal', 'coco' 68 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 70 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 71 | IMG_SHORT_SIDE_LEN = 600 72 | IMG_MAX_LENGTH = 1000 73 | CLASS_NUM = 1 74 | OMEGA = 180 / 128. 75 | ANGLE_MODE = 0 76 | 77 | IMG_ROTATE = True 78 | RGB2GRAY = True 79 | VERTICAL_FLIP = True 80 | HORIZONTAL_FLIP = True 81 | IMAGE_PYRAMID = False 82 | 83 | # --------------------------------------------- Network_config 84 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 85 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 86 | PROBABILITY = 0.01 87 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 88 | WEIGHT_DECAY = 1e-4 89 | USE_GN = False 90 | FPN_CHANNEL = 256 91 | 92 | # ---------------------------------------------Anchor config 93 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 94 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 95 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 96 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 97 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 98 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 99 | ANCHOR_SCALE_FACTORS = None 100 | USE_CENTER_OFFSET = True 101 | METHOD = 'H' 102 | USE_ANGLE_COND = False 103 | ANGLE_RANGE = 180 # 90 or 180 104 | 105 | # --------------------------------------------RPN config 106 | SHARE_NET = True 107 | USE_P5 = True 108 | IOU_POSITIVE_THRESHOLD = 0.5 109 | IOU_NEGATIVE_THRESHOLD = 0.4 110 | 111 | NMS = True 112 | NMS_IOU_THRESHOLD = 0.1 113 | MAXIMUM_DETECTIONS = 100 114 | FILTERED_SCORE = 0.05 115 | VIS_SCORE = 0.4 116 | 117 | 118 | -------------------------------------------------------------------------------- /libs/configs/HRSC2016/r3det_dcl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/HRSC2016/r3det_dcl/__init__.py -------------------------------------------------------------------------------- /libs/configs/HRSC2016/r3det_dcl/cfgs_res101_hrsc2016_r3det_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | FLOPs: 1461458647; Trainable params: 56011145 9 | cls : ship|| Recall: 0.9714983713355049 || Precison: 0.2847255369928401|| AP: 0.8846049339336871 10 | F1:0.9243833400727861 P:0.9180722891566265 R:0.9307817589576547 11 | mAP is : 0.8846049339336871 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_HRSC2016_R3Det_DCL_B_2x_20201108' 16 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "1,2,3" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 7500 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | EVAL_THRESHOLD = 0.5 49 | 50 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 51 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 52 | 53 | CLS_WEIGHT = 1.0 54 | REG_WEIGHT = 1.0 55 | ANGLE_WEIGHT = 0.5 56 | USE_IOU_FACTOR = True 57 | REG_LOSS_MODE = None 58 | ALPHA = 1.0 59 | BETA = 1.0 60 | 61 | BATCH_SIZE = 1 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | LR = 5e-4 65 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 66 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 67 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 68 | 69 | # -------------------------------------------- Data_preprocess_config 70 | DATASET_NAME = 'HRSC2016' # 'pascal', 'coco' 71 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 73 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 74 | IMG_SHORT_SIDE_LEN = [800, 400, 600, 1000, 1200] 75 | IMG_MAX_LENGTH = 1200 76 | CLASS_NUM = 1 77 | OMEGA = 180 / 64. 78 | ANGLE_MODE = 0 79 | 80 | IMG_ROTATE = True 81 | RGB2GRAY = True 82 | VERTICAL_FLIP = True 83 | HORIZONTAL_FLIP = True 84 | IMAGE_PYRAMID = True 85 | 86 | # --------------------------------------------- Network_config 87 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 88 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 89 | PROBABILITY = 0.01 90 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 91 | WEIGHT_DECAY = 1e-4 92 | USE_GN = False 93 | FPN_CHANNEL = 256 94 | 95 | # ---------------------------------------------Anchor config 96 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 97 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 98 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 99 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 100 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 101 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 102 | ANCHOR_SCALE_FACTORS = None 103 | USE_CENTER_OFFSET = True 104 | METHOD = 'H' 105 | USE_ANGLE_COND = False 106 | ANGLE_RANGE = 180 # 90 or 180 107 | 108 | # --------------------------------------------RPN config 109 | SHARE_NET = True 110 | USE_P5 = True 111 | IOU_POSITIVE_THRESHOLD = 0.5 112 | IOU_NEGATIVE_THRESHOLD = 0.4 113 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7] 114 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6] 115 | 116 | NMS = True 117 | NMS_IOU_THRESHOLD = 0.1 118 | MAXIMUM_DETECTIONS = 100 119 | FILTERED_SCORE = 0.05 120 | VIS_SCORE = 0.4 121 | 122 | 123 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/baseline/cfgs_res101_icdar2015_baseline_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 2020-10-01 retinanet 81.49% 83.29% 82.38% 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_ICDAR2015_Baseline_2x_20200929' 14 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "0,1,2" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 10000 * 2 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | 47 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 48 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 49 | 50 | CLS_WEIGHT = 1.0 51 | REG_WEIGHT = 1.0 52 | ANGLE_WEIGHT = 0.5 53 | REG_LOSS_MODE = None 54 | ALPHA = 1.0 55 | BETA = 1.0 56 | 57 | BATCH_SIZE = 1 58 | EPSILON = 1e-5 59 | MOMENTUM = 0.9 60 | LR = 5e-4 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 64 | 65 | # -------------------------------------------- Data_preprocess_config 66 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 67 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 69 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 71 | IMG_MAX_LENGTH = 1500 72 | CLASS_NUM = 1 73 | 74 | IMG_ROTATE = True 75 | RGB2GRAY = True 76 | VERTICAL_FLIP = True 77 | HORIZONTAL_FLIP = True 78 | IMAGE_PYRAMID = True 79 | 80 | # --------------------------------------------- Network_config 81 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 82 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 83 | PROBABILITY = 0.01 84 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 85 | WEIGHT_DECAY = 1e-4 86 | USE_GN = False 87 | FPN_CHANNEL = 256 88 | 89 | # ---------------------------------------------Anchor config 90 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 91 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 92 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 93 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 94 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 95 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 96 | ANCHOR_SCALE_FACTORS = None 97 | USE_CENTER_OFFSET = True 98 | METHOD = 'H' 99 | USE_ANGLE_COND = False 100 | ANGLE_RANGE = 180 # 90 or 180 101 | 102 | # --------------------------------------------RPN config 103 | SHARE_NET = True 104 | USE_P5 = True 105 | IOU_POSITIVE_THRESHOLD = 0.5 106 | IOU_NEGATIVE_THRESHOLD = 0.4 107 | 108 | NMS = True 109 | NMS_IOU_THRESHOLD = 0.1 110 | MAXIMUM_DETECTIONS = 100 111 | FILTERED_SCORE = 0.05 112 | VIS_SCORE = 0.7 113 | 114 | 115 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/baseline/cfgs_res50_icdar2015_baseline_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 2020-09-25 retinanet 72.32% 66.23% 69.14% 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_ICDAR2015_Baseline_2x_20200925' 14 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "3" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 10000 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | 47 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 48 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 49 | 50 | CLS_WEIGHT = 1.0 51 | REG_WEIGHT = 1.0 52 | ANGLE_WEIGHT = 0.5 53 | REG_LOSS_MODE = None 54 | ALPHA = 1.0 55 | BETA = 1.0 56 | 57 | BATCH_SIZE = 1 58 | EPSILON = 1e-5 59 | MOMENTUM = 0.9 60 | LR = 5e-4 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 64 | 65 | # -------------------------------------------- Data_preprocess_config 66 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 67 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 69 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | IMG_SHORT_SIDE_LEN = 800 71 | IMG_MAX_LENGTH = 1000 72 | CLASS_NUM = 1 73 | 74 | IMG_ROTATE = False 75 | RGB2GRAY = False 76 | VERTICAL_FLIP = False 77 | HORIZONTAL_FLIP = True 78 | IMAGE_PYRAMID = False 79 | 80 | # --------------------------------------------- Network_config 81 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 82 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 83 | PROBABILITY = 0.01 84 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 85 | WEIGHT_DECAY = 1e-4 86 | USE_GN = False 87 | FPN_CHANNEL = 256 88 | 89 | # ---------------------------------------------Anchor config 90 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 91 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 92 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 93 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 94 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 95 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 96 | ANCHOR_SCALE_FACTORS = None 97 | USE_CENTER_OFFSET = True 98 | METHOD = 'H' 99 | USE_ANGLE_COND = False 100 | ANGLE_RANGE = 180 # 90 or 180 101 | 102 | # --------------------------------------------RPN config 103 | SHARE_NET = True 104 | USE_P5 = True 105 | IOU_POSITIVE_THRESHOLD = 0.5 106 | IOU_NEGATIVE_THRESHOLD = 0.4 107 | 108 | NMS = True 109 | NMS_IOU_THRESHOLD = 0.1 110 | MAXIMUM_DETECTIONS = 100 111 | FILTERED_SCORE = 0.05 112 | VIS_SCORE = 0.8 113 | 114 | 115 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/csl/cfgs_res101_icdar2015_csl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | CSL + gaussian label, omega=1, r=6 9 | 2020-10-04 CSL 80.50% 87.40% 83.81% 10 | 11 | """ 12 | 13 | # ------------------------------------------------ 14 | VERSION = 'RetinaNet_ICDAR2015_CSL_2x_20201001' 15 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 16 | ADD_BOX_IN_TENSORBOARD = True 17 | 18 | # ---------------------------------------- System_config 19 | ROOT_PATH = os.path.abspath('../') 20 | print(20*"++--") 21 | print(ROOT_PATH) 22 | GPU_GROUP = "0,1,2" 23 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 24 | SHOW_TRAIN_INFO_INTE = 20 25 | SMRY_ITER = 2000 26 | SAVE_WEIGHTS_INTE = 10000 * 2 27 | 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 30 | 31 | if NET_NAME.startswith("resnet"): 32 | weights_name = NET_NAME 33 | elif NET_NAME.startswith("MobilenetV2"): 34 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 35 | else: 36 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 37 | 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | FIXED_BLOCKS = 1 # allow 0~3 45 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 46 | USE_07_METRIC = True 47 | 48 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 49 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 50 | 51 | CLS_WEIGHT = 1.0 52 | REG_WEIGHT = 1.0 53 | ANGLE_WEIGHT = 0.5 54 | REG_LOSS_MODE = None 55 | 56 | BATCH_SIZE = 1 57 | EPSILON = 1e-5 58 | MOMENTUM = 0.9 59 | LR = 5e-4 60 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 61 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 62 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 63 | 64 | # -------------------------------------------- Data_preprocess_config 65 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 66 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 67 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 68 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 69 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 70 | IMG_MAX_LENGTH = 1500 71 | CLASS_NUM = 1 72 | LABEL_TYPE = 0 73 | RADUIUS = 6 74 | OMEGA = 1 75 | 76 | IMG_ROTATE = True 77 | RGB2GRAY = True 78 | VERTICAL_FLIP = True 79 | HORIZONTAL_FLIP = True 80 | IMAGE_PYRAMID = True 81 | 82 | # --------------------------------------------- Network_config 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 85 | PROBABILITY = 0.01 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 87 | WEIGHT_DECAY = 1e-4 88 | USE_GN = False 89 | FPN_CHANNEL = 256 90 | 91 | # ---------------------------------------------Anchor config 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 98 | ANCHOR_SCALE_FACTORS = None 99 | USE_CENTER_OFFSET = True 100 | METHOD = 'H' 101 | USE_ANGLE_COND = False 102 | ANGLE_RANGE = 180 # 90 or 180 103 | 104 | # --------------------------------------------RPN config 105 | SHARE_NET = True 106 | USE_P5 = True 107 | IOU_POSITIVE_THRESHOLD = 0.5 108 | IOU_NEGATIVE_THRESHOLD = 0.4 109 | 110 | NMS = True 111 | NMS_IOU_THRESHOLD = 0.1 112 | MAXIMUM_DETECTIONS = 100 113 | FILTERED_SCORE = 0.05 114 | VIS_SCORE = 0.7 115 | 116 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/dcl/cfgs_res101_icdar2015_dcl_v4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 64. 9 | 10 | 2020-09-29 DCL 81.61% 84.79% 83.17% 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200928' 16 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 73 | IMG_MAX_LENGTH = 1500 74 | CLASS_NUM = 1 75 | OMEGA = 180 / 64. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = True 79 | RGB2GRAY = True 80 | VERTICAL_FLIP = True 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = True 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.6 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 256. 9 | FLOPs: 478813602; Trainable params: 32664081 10 | 2020-09-25 DCL 67.21% 71.15% 69.13% 11 | """ 12 | 13 | # ------------------------------------------------ 14 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200924' 15 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 16 | ADD_BOX_IN_TENSORBOARD = True 17 | 18 | # ---------------------------------------- System_config 19 | ROOT_PATH = os.path.abspath('../') 20 | print(20*"++--") 21 | print(ROOT_PATH) 22 | GPU_GROUP = "3" 23 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 24 | SHOW_TRAIN_INFO_INTE = 20 25 | SMRY_ITER = 200 26 | SAVE_WEIGHTS_INTE = 10000 27 | 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 30 | 31 | if NET_NAME.startswith("resnet"): 32 | weights_name = NET_NAME 33 | elif NET_NAME.startswith("MobilenetV2"): 34 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 35 | else: 36 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 37 | 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | FIXED_BLOCKS = 1 # allow 0~3 45 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 46 | USE_07_METRIC = True 47 | 48 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 49 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 50 | 51 | CLS_WEIGHT = 1.0 52 | REG_WEIGHT = 1.0 53 | ANGLE_WEIGHT = 0.5 54 | REG_LOSS_MODE = None 55 | ALPHA = 1.0 56 | BETA = 1.0 57 | 58 | BATCH_SIZE = 4 59 | EPSILON = 1e-5 60 | MOMENTUM = 0.9 61 | LR = 5e-4 62 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 63 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 64 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 65 | 66 | # -------------------------------------------- Data_preprocess_config 67 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 68 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 70 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 71 | IMG_SHORT_SIDE_LEN = 800 72 | IMG_MAX_LENGTH = 1000 73 | CLASS_NUM = 1 74 | OMEGA = 180 / 256. 75 | ANGLE_MODE = 0 76 | 77 | IMG_ROTATE = False 78 | RGB2GRAY = False 79 | VERTICAL_FLIP = False 80 | HORIZONTAL_FLIP = True 81 | IMAGE_PYRAMID = False 82 | 83 | # --------------------------------------------- Network_config 84 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 85 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 86 | PROBABILITY = 0.01 87 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 88 | WEIGHT_DECAY = 1e-4 89 | USE_GN = False 90 | FPN_CHANNEL = 256 91 | 92 | # ---------------------------------------------Anchor config 93 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 94 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 95 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 96 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 97 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 98 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 99 | ANCHOR_SCALE_FACTORS = None 100 | USE_CENTER_OFFSET = True 101 | METHOD = 'H' 102 | USE_ANGLE_COND = False 103 | ANGLE_RANGE = 180 # 90 or 180 104 | 105 | # --------------------------------------------RPN config 106 | SHARE_NET = True 107 | USE_P5 = True 108 | IOU_POSITIVE_THRESHOLD = 0.5 109 | IOU_NEGATIVE_THRESHOLD = 0.4 110 | 111 | NMS = True 112 | NMS_IOU_THRESHOLD = 0.1 113 | MAXIMUM_DETECTIONS = 100 114 | FILTERED_SCORE = 0.05 115 | VIS_SCORE = 0.75 116 | 117 | 118 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 256. 9 | 10 | 2020-09-26 DCL 71.79% 68.58% 70.15% 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200926' 16 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "3" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = 800 73 | IMG_MAX_LENGTH = 1000 74 | CLASS_NUM = 1 75 | OMEGA = 180 / 256. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = False 79 | RGB2GRAY = False 80 | VERTICAL_FLIP = False 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = False 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.78 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 64. 9 | 10 | 2020-09-27 DCL 71.40% 69.53% 70.45% 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200927' 16 | NET_NAME = 'resnet50_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "3" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'ICDAR2015' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = 800 73 | IMG_MAX_LENGTH = 1000 74 | CLASS_NUM = 1 75 | OMEGA = 180 / 64. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = False 79 | RGB2GRAY = False 80 | VERTICAL_FLIP = False 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = False 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.85 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/MLT/baseline/cfgs_res101_icdar2015_baseline_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 2020-10-11 CSL 64.01% 75.24% 55.70% 51.13% (0.45) 9 | 2020-10-11 CSL 63.81% 71.78% 57.43% 52.40% (0.4) 10 | 2020-10-11 CSL 61.25% 62.30% 60.24% 54.30% (0.3) 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_MLT_Baseline_2x_20201002' 16 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'MLT' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 73 | IMG_MAX_LENGTH = 1500 74 | CLASS_NUM = 1 75 | 76 | IMG_ROTATE = True 77 | RGB2GRAY = True 78 | VERTICAL_FLIP = True 79 | HORIZONTAL_FLIP = True 80 | IMAGE_PYRAMID = True 81 | 82 | # --------------------------------------------- Network_config 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 85 | PROBABILITY = 0.01 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 87 | WEIGHT_DECAY = 1e-4 88 | USE_GN = False 89 | FPN_CHANNEL = 256 90 | 91 | # ---------------------------------------------Anchor config 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 98 | ANCHOR_SCALE_FACTORS = None 99 | USE_CENTER_OFFSET = True 100 | METHOD = 'H' 101 | USE_ANGLE_COND = False 102 | ANGLE_RANGE = 180 # 90 or 180 103 | 104 | # --------------------------------------------RPN config 105 | SHARE_NET = True 106 | USE_P5 = True 107 | IOU_POSITIVE_THRESHOLD = 0.5 108 | IOU_NEGATIVE_THRESHOLD = 0.4 109 | 110 | NMS = True 111 | NMS_IOU_THRESHOLD = 0.1 112 | MAXIMUM_DETECTIONS = 100 113 | FILTERED_SCORE = 0.05 114 | VIS_SCORE = 0.1 115 | 116 | 117 | -------------------------------------------------------------------------------- /libs/configs/MLT/csl/cfgs_res101_mlt_csl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 2020-10-07 DCL 62.67% 64.24% 61.18% 55.51% (0.3) 9 | 2020-10-07 DCL 65.08% 73.62% 58.32% 53.52% (0.4) 10 | 2020-10-07 DCL 65.23% 77.12% 56.52% 52.16% (0.45) 11 | 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_MLT_CSL_2x_20201007' 16 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'MLT' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 73 | IMG_MAX_LENGTH = 1500 74 | CLASS_NUM = 1 75 | LABEL_TYPE = 0 76 | RADUIUS = 6 77 | OMEGA = 1 78 | 79 | IMG_ROTATE = True 80 | RGB2GRAY = True 81 | VERTICAL_FLIP = True 82 | HORIZONTAL_FLIP = True 83 | IMAGE_PYRAMID = True 84 | 85 | # --------------------------------------------- Network_config 86 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 87 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 88 | PROBABILITY = 0.01 89 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 90 | WEIGHT_DECAY = 1e-4 91 | USE_GN = False 92 | FPN_CHANNEL = 256 93 | 94 | # ---------------------------------------------Anchor config 95 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 96 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 97 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 98 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 99 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 100 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 101 | ANCHOR_SCALE_FACTORS = None 102 | USE_CENTER_OFFSET = True 103 | METHOD = 'H' 104 | USE_ANGLE_COND = False 105 | ANGLE_RANGE = 180 # 90 or 180 106 | 107 | # --------------------------------------------RPN config 108 | SHARE_NET = True 109 | USE_P5 = True 110 | IOU_POSITIVE_THRESHOLD = 0.5 111 | IOU_NEGATIVE_THRESHOLD = 0.4 112 | 113 | NMS = True 114 | NMS_IOU_THRESHOLD = 0.1 115 | MAXIMUM_DETECTIONS = 100 116 | FILTERED_SCORE = 0.05 117 | VIS_SCORE = 0.1 118 | 119 | 120 | -------------------------------------------------------------------------------- /libs/configs/MLT/dcl/cfgs_res101_mlt_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 64. 9 | 10 | 2020-10-06 retinanet 62.60% 63.34% 61.88% 56.45% (0.3) 11 | 2020-10-06 retinanet 65.26% 73.14% 58.91% 54.41% (0.4) 12 | """ 13 | 14 | # ------------------------------------------------ 15 | VERSION = 'RetinaNet_MLT_DCL_B_2x_20200928' 16 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 17 | ADD_BOX_IN_TENSORBOARD = True 18 | 19 | # ---------------------------------------- System_config 20 | ROOT_PATH = os.path.abspath('../') 21 | print(20*"++--") 22 | print(ROOT_PATH) 23 | GPU_GROUP = "0,1,2" 24 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 25 | SHOW_TRAIN_INFO_INTE = 20 26 | SMRY_ITER = 200 27 | SAVE_WEIGHTS_INTE = 10000 * 2 28 | 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 31 | 32 | if NET_NAME.startswith("resnet"): 33 | weights_name = NET_NAME 34 | elif NET_NAME.startswith("MobilenetV2"): 35 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 36 | else: 37 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 38 | 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 42 | 43 | # ------------------------------------------ Train config 44 | RESTORE_FROM_RPN = False 45 | FIXED_BLOCKS = 1 # allow 0~3 46 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 47 | USE_07_METRIC = True 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'MLT' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] 73 | IMG_MAX_LENGTH = 1500 74 | CLASS_NUM = 1 75 | OMEGA = 180 / 64. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = True 79 | RGB2GRAY = True 80 | VERTICAL_FLIP = True 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = True 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.1 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_gwd_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_OHD-SJTU_GWD_2x_20200728' 14 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "1,2,3" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 5000 * 2 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | 47 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 48 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 49 | 50 | CLS_WEIGHT = 1.0 51 | REG_WEIGHT = 1.0 52 | REG_LOSS_MODE = 4 53 | ALPHA = 1.0 54 | BETA = 1.0 55 | 56 | BATCH_SIZE = 1 57 | EPSILON = 1e-5 58 | MOMENTUM = 0.9 59 | LR = 1e-4 60 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 61 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 62 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) 63 | 64 | # -------------------------------------------- Data_preprocess_config 65 | DATASET_NAME = 'OHD-SJTU-600' # 'pascal', 'coco' 66 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 67 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 68 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 69 | IMG_SHORT_SIDE_LEN = 800 70 | IMG_MAX_LENGTH = 800 71 | CLASS_NUM = 2 72 | 73 | IMG_ROTATE = True 74 | RGB2GRAY = True 75 | VERTICAL_FLIP = True 76 | HORIZONTAL_FLIP = True 77 | IMAGE_PYRAMID = False 78 | 79 | # --------------------------------------------- Network_config 80 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 81 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 82 | PROBABILITY = 0.01 83 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 84 | WEIGHT_DECAY = 1e-4 85 | USE_GN = False 86 | FPN_CHANNEL = 256 87 | 88 | # ---------------------------------------------Anchor config 89 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 90 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 91 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 92 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 93 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.] 94 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 95 | ANCHOR_SCALE_FACTORS = None 96 | USE_CENTER_OFFSET = True 97 | METHOD = 'H' 98 | USE_ANGLE_COND = False 99 | ANGLE_RANGE = 90 # or 180 100 | 101 | # --------------------------------------------RPN config 102 | SHARE_NET = True 103 | USE_P5 = True 104 | IOU_POSITIVE_THRESHOLD = 0.5 105 | IOU_NEGATIVE_THRESHOLD = 0.4 106 | 107 | NMS = True 108 | NMS_IOU_THRESHOLD = 0.1 109 | MAXIMUM_DETECTIONS = 100 110 | FILTERED_SCORE = 0.05 111 | VIS_SCORE = 0.4 112 | 113 | 114 | -------------------------------------------------------------------------------- /libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | classname: plane 9 | npos num: 223 10 | ap: 0.9086680761099367 11 | classname: ship 12 | npos num: 1025 13 | ap: 0.7680000964611735 14 | map: 0.8383340862855551 15 | classaps: [90.86680761 76.80000965] 16 | 17 | [0.8383340862855551, 0.8294369391086178, 0.8213729593604102, 0.7846857109313947, 0.7710299710411961, 18 | 0.6591302619122246, 0.5440679304087981, 0.34149128761647274, 0.11706834092408161, 0.006581439393939394] 19 | 0.571319892698269 20 | 21 | """ 22 | 23 | # ------------------------------------------------ 24 | VERSION = 'RetinaNet_OHD-SJTU_2x_20200728' 25 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 26 | ADD_BOX_IN_TENSORBOARD = True 27 | 28 | # ---------------------------------------- System_config 29 | ROOT_PATH = os.path.abspath('../') 30 | print(20*"++--") 31 | print(ROOT_PATH) 32 | GPU_GROUP = "0,1" 33 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 34 | SHOW_TRAIN_INFO_INTE = 20 35 | SMRY_ITER = 200 36 | SAVE_WEIGHTS_INTE = 5000 * 2 37 | 38 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 39 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 40 | 41 | if NET_NAME.startswith("resnet"): 42 | weights_name = NET_NAME 43 | elif NET_NAME.startswith("MobilenetV2"): 44 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 45 | else: 46 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 47 | 48 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 49 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 50 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 51 | 52 | # ------------------------------------------ Train config 53 | RESTORE_FROM_RPN = False 54 | FIXED_BLOCKS = 1 # allow 0~3 55 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 56 | USE_07_METRIC = True 57 | 58 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 59 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 60 | 61 | CLS_WEIGHT = 1.0 62 | REG_WEIGHT = 1.0 63 | REG_LOSS_MODE = None 64 | 65 | BATCH_SIZE = 1 66 | EPSILON = 1e-5 67 | MOMENTUM = 0.9 68 | LR = 5e-4 # * NUM_GPU * BATCH_SIZE 69 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 70 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 71 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) 72 | 73 | # -------------------------------------------- Data_preprocess_config 74 | DATASET_NAME = 'OHD-SJTU-600' # 'pascal', 'coco' 75 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 76 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 77 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 78 | IMG_SHORT_SIDE_LEN = 800 79 | IMG_MAX_LENGTH = 800 80 | CLASS_NUM = 2 81 | 82 | IMG_ROTATE = True 83 | RGB2GRAY = True 84 | VERTICAL_FLIP = True 85 | HORIZONTAL_FLIP = True 86 | IMAGE_PYRAMID = False 87 | 88 | # --------------------------------------------- Network_config 89 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 90 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 91 | PROBABILITY = 0.01 92 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 93 | WEIGHT_DECAY = 1e-4 94 | USE_GN = False 95 | FPN_CHANNEL = 256 96 | 97 | # ---------------------------------------------Anchor config 98 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 99 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 100 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 101 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 102 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.] 103 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 104 | ANCHOR_SCALE_FACTORS = None 105 | USE_CENTER_OFFSET = True 106 | METHOD = 'H' 107 | USE_ANGLE_COND = False 108 | ANGLE_RANGE = 90 # or 180 109 | 110 | # --------------------------------------------RPN config 111 | SHARE_NET = True 112 | USE_P5 = True 113 | IOU_POSITIVE_THRESHOLD = 0.5 114 | IOU_NEGATIVE_THRESHOLD = 0.4 115 | 116 | NMS = True 117 | NMS_IOU_THRESHOLD = 0.1 118 | MAXIMUM_DETECTIONS = 100 119 | FILTERED_SCORE = 0.05 120 | VIS_SCORE = 0.4 121 | 122 | 123 | -------------------------------------------------------------------------------- /libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | classname: plane 9 | npos num: 223 10 | ap: 0.907399577167019 11 | classname: ship 12 | npos num: 1025 13 | ap: 0.8853244888009996 14 | map: 0.8963620329840093 15 | classaps: [90.73995772 88.53244888] 16 | 17 | [0.8963620329840093, 0.896220113688575, 0.894407255037192, 0.8910001332998732, 0.836621416086192, 18 | 0.7461668460602382, 0.532126970665127, 0.29881340556372865, 0.06862263630283386, 0.004134429400386847] 19 | 0.6064475239088156 20 | 21 | """ 22 | 23 | # ------------------------------------------------ 24 | VERSION = 'RetinaNet_OHD-SJTU_2x_20200729' 25 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 26 | ADD_BOX_IN_TENSORBOARD = True 27 | 28 | # ---------------------------------------- System_config 29 | ROOT_PATH = os.path.abspath('../') 30 | print(20*"++--") 31 | print(ROOT_PATH) 32 | GPU_GROUP = "1,2,3" 33 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 34 | SHOW_TRAIN_INFO_INTE = 20 35 | SMRY_ITER = 200 36 | SAVE_WEIGHTS_INTE = 5000 * 2 37 | 38 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 39 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 40 | 41 | if NET_NAME.startswith("resnet"): 42 | weights_name = NET_NAME 43 | elif NET_NAME.startswith("MobilenetV2"): 44 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 45 | else: 46 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 47 | 48 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 49 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 50 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 51 | 52 | # ------------------------------------------ Train config 53 | RESTORE_FROM_RPN = False 54 | FIXED_BLOCKS = 1 # allow 0~3 55 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 56 | USE_07_METRIC = True 57 | 58 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 59 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 60 | 61 | CLS_WEIGHT = 1.0 62 | REG_WEIGHT = 1.0 63 | REG_LOSS_MODE = None 64 | 65 | BATCH_SIZE = 1 66 | EPSILON = 1e-5 67 | MOMENTUM = 0.9 68 | LR = 5e-4 # * NUM_GPU * BATCH_SIZE 69 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 70 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 71 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) 72 | 73 | # -------------------------------------------- Data_preprocess_config 74 | DATASET_NAME = 'OHD-SJTU-600' # 'pascal', 'coco' 75 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 76 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 77 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 78 | IMG_SHORT_SIDE_LEN = 800 79 | IMG_MAX_LENGTH = 800 80 | CLASS_NUM = 2 81 | 82 | IMG_ROTATE = True 83 | RGB2GRAY = True 84 | VERTICAL_FLIP = True 85 | HORIZONTAL_FLIP = True 86 | IMAGE_PYRAMID = False 87 | 88 | # --------------------------------------------- Network_config 89 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 90 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 91 | PROBABILITY = 0.01 92 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 93 | WEIGHT_DECAY = 1e-4 94 | USE_GN = False 95 | FPN_CHANNEL = 256 96 | 97 | # ---------------------------------------------Anchor config 98 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 99 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 100 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 101 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 102 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.] 103 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 104 | ANCHOR_SCALE_FACTORS = None 105 | USE_CENTER_OFFSET = True 106 | METHOD = 'R' 107 | USE_ANGLE_COND = False 108 | ANGLE_RANGE = 90 # or 180 109 | 110 | # --------------------------------------------RPN config 111 | SHARE_NET = True 112 | USE_P5 = True 113 | IOU_POSITIVE_THRESHOLD = 0.5 114 | IOU_NEGATIVE_THRESHOLD = 0.4 115 | 116 | NMS = True 117 | NMS_IOU_THRESHOLD = 0.1 118 | MAXIMUM_DETECTIONS = 100 119 | FILTERED_SCORE = 0.05 120 | VIS_SCORE = 0.4 121 | 122 | 123 | -------------------------------------------------------------------------------- /libs/configs/OHD-SJTU/dcl/cfgs_res101_ohd-sjtu-all_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_OHD-SJTU-ALL_DCL_G_2x_20200910' 14 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "0,1,2" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 20000 * 2 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | 47 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 48 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 49 | 50 | CLS_WEIGHT = 1.0 51 | REG_WEIGHT = 1.0 52 | ANGLE_WEIGHT = 0.5 53 | REG_LOSS_MODE = None 54 | ALPHA = 1.0 55 | BETA = 1.0 56 | 57 | BATCH_SIZE = 1 58 | EPSILON = 1e-5 59 | MOMENTUM = 0.9 60 | LR = 5e-4 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 64 | 65 | # -------------------------------------------- Data_preprocess_config 66 | DATASET_NAME = 'OHD-SJTU-ALL-600' # 'pascal', 'coco' 67 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 69 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | IMG_SHORT_SIDE_LEN = 800 71 | IMG_MAX_LENGTH = 800 72 | CLASS_NUM = 6 73 | OMEGA = 180 / 256. 74 | ANGLE_MODE = 1 75 | 76 | IMG_ROTATE = False 77 | RGB2GRAY = False 78 | VERTICAL_FLIP = False 79 | HORIZONTAL_FLIP = True 80 | IMAGE_PYRAMID = False 81 | 82 | # --------------------------------------------- Network_config 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 85 | PROBABILITY = 0.01 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 87 | WEIGHT_DECAY = 1e-4 88 | USE_GN = False 89 | FPN_CHANNEL = 256 90 | 91 | # ---------------------------------------------Anchor config 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 98 | ANCHOR_SCALE_FACTORS = None 99 | USE_CENTER_OFFSET = True 100 | METHOD = 'H' 101 | USE_ANGLE_COND = False 102 | ANGLE_RANGE = 180 # 90 or 180 103 | 104 | # --------------------------------------------RPN config 105 | SHARE_NET = True 106 | USE_P5 = True 107 | IOU_POSITIVE_THRESHOLD = 0.5 108 | IOU_NEGATIVE_THRESHOLD = 0.4 109 | 110 | NMS = True 111 | NMS_IOU_THRESHOLD = 0.1 112 | MAXIMUM_DETECTIONS = 100 113 | FILTERED_SCORE = 0.05 114 | VIS_SCORE = 0.4 115 | 116 | 117 | -------------------------------------------------------------------------------- /libs/configs/SSDD++/baseline/cfgs_res101_ssdd++_baseline_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | cls : ship|| Recall: 0.8970873786407767 || Precison: 0.6834319526627219|| AP: 0.7866073337921715 9 | F1:0.9000989119683481 P:0.9173387096774194 R:0.883495145631068 10 | mAP is : 0.7866073337921715 11 | 12 | 13 | """ 14 | 15 | # ------------------------------------------------ 16 | VERSION = 'RetinaNet_SSDD++_Baseline_2x_20201012' 17 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 18 | ADD_BOX_IN_TENSORBOARD = True 19 | 20 | # ---------------------------------------- System_config 21 | ROOT_PATH = os.path.abspath('../') 22 | print(20*"++--") 23 | print(ROOT_PATH) 24 | GPU_GROUP = "0,1,2" 25 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 26 | SHOW_TRAIN_INFO_INTE = 20 27 | SMRY_ITER = 200 28 | SAVE_WEIGHTS_INTE = 2000 * 2 29 | 30 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 31 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 32 | 33 | if NET_NAME.startswith("resnet"): 34 | weights_name = NET_NAME 35 | elif NET_NAME.startswith("MobilenetV2"): 36 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 37 | else: 38 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 39 | 40 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 41 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 42 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 43 | 44 | # ------------------------------------------ Train config 45 | RESTORE_FROM_RPN = False 46 | FIXED_BLOCKS = 1 # allow 0~3 47 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 48 | USE_07_METRIC = True 49 | EVAL_THRESHOLD = 0.5 50 | 51 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 52 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 53 | 54 | CLS_WEIGHT = 1.0 55 | REG_WEIGHT = 1.0 56 | ANGLE_WEIGHT = 0.5 57 | REG_LOSS_MODE = None 58 | ALPHA = 1.0 59 | BETA = 1.0 60 | 61 | BATCH_SIZE = 1 62 | EPSILON = 1e-5 63 | MOMENTUM = 0.9 64 | LR = 5e-4 65 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 66 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 67 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 68 | 69 | # -------------------------------------------- Data_preprocess_config 70 | DATASET_NAME = 'SSDD++' # 'pascal', 'coco' 71 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 73 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 74 | IMG_SHORT_SIDE_LEN = 600 75 | IMG_MAX_LENGTH = 1200 76 | CLASS_NUM = 1 77 | 78 | IMG_ROTATE = False 79 | RGB2GRAY = False 80 | VERTICAL_FLIP = False 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = False 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.1 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/SSDD++/dcl/cfgs_res101_ssdd++_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | 180 / 64. 9 | FLOPs: 1299327161; Trainable params: 51507175 10 | 11 | """ 12 | 13 | # ------------------------------------------------ 14 | VERSION = 'RetinaNet_SSDD++_DCL_B_2x_20201011' 15 | NET_NAME = 'resnet101_v1d' # 'MobilenetV2' 16 | ADD_BOX_IN_TENSORBOARD = True 17 | 18 | # ---------------------------------------- System_config 19 | ROOT_PATH = os.path.abspath('../') 20 | print(20*"++--") 21 | print(ROOT_PATH) 22 | GPU_GROUP = "0,1,2" 23 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 24 | SHOW_TRAIN_INFO_INTE = 20 25 | SMRY_ITER = 200 26 | SAVE_WEIGHTS_INTE = 2000 * 2 27 | 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 30 | 31 | if NET_NAME.startswith("resnet"): 32 | weights_name = NET_NAME 33 | elif NET_NAME.startswith("MobilenetV2"): 34 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 35 | else: 36 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 37 | 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 40 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 41 | 42 | # ------------------------------------------ Train config 43 | RESTORE_FROM_RPN = False 44 | FIXED_BLOCKS = 1 # allow 0~3 45 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 46 | USE_07_METRIC = True 47 | EVAL_THRESHOLD = 0.5 48 | 49 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 50 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 51 | 52 | CLS_WEIGHT = 1.0 53 | REG_WEIGHT = 1.0 54 | ANGLE_WEIGHT = 0.5 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'SSDD++' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = 600 73 | IMG_MAX_LENGTH = 1200 74 | CLASS_NUM = 1 75 | OMEGA = 180 / 64. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = True 79 | RGB2GRAY = True 80 | VERTICAL_FLIP = True 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = False 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | 112 | NMS = True 113 | NMS_IOU_THRESHOLD = 0.1 114 | MAXIMUM_DETECTIONS = 100 115 | FILTERED_SCORE = 0.05 116 | VIS_SCORE = 0.4 117 | 118 | 119 | -------------------------------------------------------------------------------- /libs/configs/UCAS-AOD/r3det_dcl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/UCAS-AOD/r3det_dcl/__init__.py -------------------------------------------------------------------------------- /libs/configs/UCAS-AOD/r3det_dcl/cfgs_res152_ucas-aod_r3det_dcl_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function, absolute_import 3 | import os 4 | import tensorflow as tf 5 | import math 6 | 7 | """ 8 | FLOPs: 1440221956; Trainable params: 71664033 9 | 10 | """ 11 | 12 | # ------------------------------------------------ 13 | VERSION = 'RetinaNet_UCAS-AOD_R3Det_DCL_B_2x_20201026' 14 | NET_NAME = 'resnet152_v1d' # 'MobilenetV2' 15 | ADD_BOX_IN_TENSORBOARD = True 16 | 17 | # ---------------------------------------- System_config 18 | ROOT_PATH = os.path.abspath('../') 19 | print(20*"++--") 20 | print(ROOT_PATH) 21 | GPU_GROUP = "2,3" 22 | NUM_GPU = len(GPU_GROUP.strip().split(',')) 23 | SHOW_TRAIN_INFO_INTE = 20 24 | SMRY_ITER = 200 25 | SAVE_WEIGHTS_INTE = 5000 * 2 26 | 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary' 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' 29 | 30 | if NET_NAME.startswith("resnet"): 31 | weights_name = NET_NAME 32 | elif NET_NAME.startswith("MobilenetV2"): 33 | weights_name = "mobilenet/mobilenet_v2_1.0_224" 34 | else: 35 | raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') 36 | 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') 39 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' 40 | 41 | # ------------------------------------------ Train config 42 | RESTORE_FROM_RPN = False 43 | FIXED_BLOCKS = 1 # allow 0~3 44 | FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone 45 | USE_07_METRIC = True 46 | EVAL_THRESHOLD = 0.5 47 | 48 | MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy 49 | GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip 50 | 51 | CLS_WEIGHT = 1.0 52 | REG_WEIGHT = 1.0 53 | ANGLE_WEIGHT = 0.5 54 | USE_IOU_FACTOR = True 55 | REG_LOSS_MODE = None 56 | ALPHA = 1.0 57 | BETA = 1.0 58 | 59 | BATCH_SIZE = 1 60 | EPSILON = 1e-5 61 | MOMENTUM = 0.9 62 | LR = 5e-4 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) 66 | 67 | # -------------------------------------------- Data_preprocess_config 68 | DATASET_NAME = 'UCAS-AOD' # 'pascal', 'coco' 69 | PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406] 71 | PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR 72 | IMG_SHORT_SIDE_LEN = [800, 400, 600, 1000, 1200] 73 | IMG_MAX_LENGTH = 1200 74 | CLASS_NUM = 2 75 | OMEGA = 180 / 256. 76 | ANGLE_MODE = 0 77 | 78 | IMG_ROTATE = True 79 | RGB2GRAY = True 80 | VERTICAL_FLIP = True 81 | HORIZONTAL_FLIP = True 82 | IMAGE_PYRAMID = True 83 | 84 | # --------------------------------------------- Network_config 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) 87 | PROBABILITY = 0.01 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) 89 | WEIGHT_DECAY = 1e-4 90 | USE_GN = False 91 | FPN_CHANNEL = 256 92 | 93 | # ---------------------------------------------Anchor config 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128] 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] 100 | ANCHOR_SCALE_FACTORS = None 101 | USE_CENTER_OFFSET = True 102 | METHOD = 'H' 103 | USE_ANGLE_COND = False 104 | ANGLE_RANGE = 180 # 90 or 180 105 | 106 | # --------------------------------------------RPN config 107 | SHARE_NET = True 108 | USE_P5 = True 109 | IOU_POSITIVE_THRESHOLD = 0.5 110 | IOU_NEGATIVE_THRESHOLD = 0.4 111 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7] 112 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6] 113 | 114 | NMS = True 115 | NMS_IOU_THRESHOLD = 0.1 116 | MAXIMUM_DETECTIONS = 100 117 | FILTERED_SCORE = 0.05 118 | VIS_SCORE = 0.4 119 | 120 | 121 | -------------------------------------------------------------------------------- /libs/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/__init__.py -------------------------------------------------------------------------------- /libs/detection_oprations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/detection_oprations/__init__.py -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 15 | from libs.box_utils.iou_cpu import get_iou_matrix 16 | from libs.box_utils import bbox_transform 17 | from libs.box_utils.coordinate_convert import coordinate_present_convert 18 | 19 | 20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0): 21 | 22 | anchor_states = np.zeros((anchors.shape[0],)) 23 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 24 | if gt_boxes_r.shape[0]: 25 | # [N, M] 26 | 27 | if cfgs.METHOD == 'H': 28 | overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), 29 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 30 | else: 31 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 32 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 33 | 34 | # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32), 35 | # np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32)) 36 | 37 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 38 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 39 | 40 | # compute box regression targets 41 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 42 | 43 | if cfgs.USE_ANGLE_COND: 44 | if cfgs.METHOD == 'R': 45 | delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) 46 | theta_indices = delta_theta < 15 47 | positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices 48 | else: 49 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 50 | 51 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) 52 | 53 | else: 54 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 55 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 56 | 57 | anchor_states[ignore_indices] = -1 58 | anchor_states[positive_indices] = 1 59 | 60 | # compute target class labels 61 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 62 | else: 63 | # no annotations? then everything is background 64 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 65 | 66 | if cfgs.METHOD == 'H': 67 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 68 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 69 | h = anchors[:, 2] - anchors[:, 0] + 1 70 | w = anchors[:, 3] - anchors[:, 1] + 1 71 | theta = -90 * np.ones_like(x_c) 72 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 73 | 74 | if cfgs.ANGLE_RANGE == 180: 75 | anchors = coordinate_present_convert(anchors, mode=-1) 76 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 77 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 78 | 79 | return np.array(labels, np.float32), np.array(target_delta, np.float32), \ 80 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32) 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight_.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 15 | from libs.box_utils.iou_cpu import get_iou_matrix 16 | from libs.box_utils import bbox_transform 17 | from libs.box_utils.coordinate_convert import coordinate_present_convert 18 | 19 | 20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0): 21 | 22 | anchor_states = np.zeros((anchors.shape[0],)) 23 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 24 | if gt_boxes_r.shape[0]: 25 | # [N, M] 26 | 27 | if cfgs.METHOD == 'H': 28 | overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), 29 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 30 | else: 31 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 32 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 33 | 34 | # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32), 35 | # np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32)) 36 | 37 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 38 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 39 | 40 | # compute box regression targets 41 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 42 | 43 | if cfgs.USE_ANGLE_COND: 44 | if cfgs.METHOD == 'R': 45 | delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) 46 | theta_indices = delta_theta < 15 47 | positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices 48 | else: 49 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 50 | 51 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) 52 | 53 | else: 54 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 55 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 56 | 57 | anchor_states[ignore_indices] = -1 58 | anchor_states[positive_indices] = 1 59 | 60 | # compute target class labels 61 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 62 | else: 63 | # no annotations? then everything is background 64 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 65 | 66 | if cfgs.METHOD == 'H': 67 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 68 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 69 | h = anchors[:, 2] - anchors[:, 0] + 1 70 | w = anchors[:, 3] - anchors[:, 1] + 1 71 | theta = -90 * np.ones_like(x_c) 72 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 73 | 74 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 75 | 76 | return np.array(labels, np.float32), np.array(target_delta, np.float32), \ 77 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32) 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight_csl.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 15 | from libs.box_utils.iou_cpu import get_iou_matrix 16 | from libs.box_utils import bbox_transform 17 | from libs.box_utils.coordinate_convert import coordinate_present_convert 18 | 19 | 20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, gt_smooth_label, anchors, gpu_id=0): 21 | 22 | anchor_states = np.zeros((anchors.shape[0],)) 23 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 24 | if gt_boxes_r.shape[0]: 25 | # [N, M] 26 | 27 | if cfgs.METHOD == 'H': 28 | overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), 29 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 30 | else: 31 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 32 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 33 | 34 | # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32), 35 | # np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32)) 36 | 37 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 38 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 39 | 40 | # compute box regression targets 41 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 42 | target_smooth_label = gt_smooth_label[argmax_overlaps_inds] 43 | 44 | if cfgs.USE_ANGLE_COND: 45 | if cfgs.METHOD == 'R': 46 | delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) 47 | theta_indices = delta_theta < 15 48 | positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices 49 | else: 50 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 51 | 52 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) 53 | 54 | else: 55 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 56 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 57 | 58 | anchor_states[ignore_indices] = -1 59 | anchor_states[positive_indices] = 1 60 | 61 | # compute target class labels 62 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 63 | else: 64 | # no annotations? then everything is background 65 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 66 | target_smooth_label = np.zeros((anchors.shape[0], gt_smooth_label.shape[1])) 67 | 68 | if cfgs.METHOD == 'H': 69 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 70 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 71 | h = anchors[:, 2] - anchors[:, 0] + 1 72 | w = anchors[:, 3] - anchors[:, 1] + 1 73 | theta = -90 * np.ones_like(x_c) 74 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 75 | 76 | if cfgs.ANGLE_RANGE == 180: 77 | anchors = coordinate_present_convert(anchors, mode=-1) 78 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 79 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 80 | 81 | return np.array(labels, np.float32), np.array(target_delta, np.float32), \ 82 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32),\ 83 | np.array(target_smooth_label, np.float32) 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight_dcl.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 15 | from libs.box_utils.iou_cpu import get_iou_matrix 16 | from libs.box_utils import bbox_transform 17 | from libs.box_utils.coordinate_convert import coordinate_present_convert 18 | 19 | 20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, gt_encode_label, anchors, gpu_id=0): 21 | 22 | anchor_states = np.zeros((anchors.shape[0],)) 23 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 24 | if gt_boxes_r.shape[0]: 25 | # [N, M] 26 | 27 | if cfgs.METHOD == 'H': 28 | overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), 29 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 30 | else: 31 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 32 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 33 | 34 | # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32), 35 | # np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32)) 36 | 37 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 38 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 39 | 40 | # compute box regression targets 41 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 42 | target_encode_label = gt_encode_label[argmax_overlaps_inds] 43 | 44 | if cfgs.USE_ANGLE_COND: 45 | if cfgs.METHOD == 'R': 46 | delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) 47 | theta_indices = delta_theta < 15 48 | positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices 49 | else: 50 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 51 | 52 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) 53 | 54 | else: 55 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 56 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 57 | 58 | anchor_states[ignore_indices] = -1 59 | anchor_states[positive_indices] = 1 60 | 61 | # compute target class labels 62 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 63 | else: 64 | # no annotations? then everything is background 65 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 66 | target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1])) 67 | 68 | if cfgs.METHOD == 'H': 69 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 70 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 71 | h = anchors[:, 2] - anchors[:, 0] + 1 72 | w = anchors[:, 3] - anchors[:, 1] + 1 73 | theta = -90 * np.ones_like(x_c) 74 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 75 | 76 | if cfgs.ANGLE_RANGE == 180: 77 | anchors = coordinate_present_convert(anchors, mode=-1) 78 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 79 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 80 | 81 | return np.array(labels, np.float32), np.array(target_delta[:, :-1], np.float32), \ 82 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32), \ 83 | np.array(target_encode_label, np.float32) 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight_dcl_batch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps 14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 15 | from libs.box_utils import bbox_transform 16 | from libs.box_utils.coordinate_convert import coordinate_present_convert 17 | 18 | 19 | def anchor_target_layer(gt_boxes_h_batch, gt_boxes_r_batch, gt_encode_label_batch, anchor_batch, gpu_id=0): 20 | 21 | all_labels, all_target_delta, all_anchor_states, all_target_boxes, all_target_encode_label = [], [], [], [], [] 22 | for i in range(cfgs.BATCH_SIZE): 23 | anchors = np.array(anchor_batch[i], np.float32) 24 | gt_boxes_h = gt_boxes_h_batch[i, :, :] 25 | gt_boxes_r = gt_boxes_r_batch[i, :, :] 26 | gt_encode_label = gt_encode_label_batch[i, :, :] 27 | anchor_states = np.zeros((anchors.shape[0],)) 28 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 29 | if gt_boxes_r.shape[0]: 30 | # [N, M] 31 | 32 | if cfgs.METHOD == 'H': 33 | overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), 34 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 35 | else: 36 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 37 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 38 | 39 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 40 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 41 | 42 | # compute box regression targets 43 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 44 | target_encode_label = gt_encode_label[argmax_overlaps_inds] 45 | 46 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 47 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 48 | 49 | anchor_states[ignore_indices] = -1 50 | anchor_states[positive_indices] = 1 51 | 52 | # compute target class labels 53 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 54 | else: 55 | # no annotations? then everything is background 56 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 57 | target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1])) 58 | 59 | if cfgs.METHOD == 'H': 60 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 61 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 62 | h = anchors[:, 2] - anchors[:, 0] + 1 63 | w = anchors[:, 3] - anchors[:, 1] + 1 64 | theta = -90 * np.ones_like(x_c) 65 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 66 | 67 | if cfgs.ANGLE_RANGE == 180: 68 | anchors = coordinate_present_convert(anchors, mode=-1) 69 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 70 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 71 | 72 | all_labels.append(labels) 73 | all_target_delta.append(target_delta) 74 | all_anchor_states.append(anchor_states) 75 | all_target_boxes.append(target_boxes) 76 | all_target_encode_label.append(target_encode_label) 77 | 78 | return np.array(all_labels, np.float32), np.array(all_target_delta, np.float32)[:, :, :-1], \ 79 | np.array(all_anchor_states, np.float32), np.array(all_target_boxes, np.float32), \ 80 | np.array(all_target_encode_label, np.float32) 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /libs/detection_oprations/anchor_target_layer_without_boxweight_win.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.iou import iou_calculate_np 14 | from libs.box_utils import bbox_transform 15 | from libs.box_utils.coordinate_convert import coordinate_present_convert 16 | 17 | 18 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0): 19 | 20 | anchor_states = np.zeros((anchors.shape[0],)) 21 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 22 | if gt_boxes_r.shape[0]: 23 | # [N, M] 24 | 25 | if cfgs.METHOD == 'H': 26 | overlaps = iou_calculate_np(np.ascontiguousarray(anchors, dtype=np.float), 27 | np.ascontiguousarray(gt_boxes_h, dtype=np.float)) 28 | else: 29 | raise Exception('Do not support mode=R in windows version') 30 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 31 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 32 | 33 | # compute box regression targets 34 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 35 | 36 | if cfgs.USE_ANGLE_COND: 37 | if cfgs.METHOD == 'R': 38 | delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) 39 | theta_indices = delta_theta < 15 40 | positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices 41 | else: 42 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 43 | 44 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) 45 | 46 | else: 47 | positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD 48 | ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices 49 | 50 | anchor_states[ignore_indices] = -1 51 | anchor_states[positive_indices] = 1 52 | 53 | # compute target class labels 54 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 55 | else: 56 | # no annotations? then everything is background 57 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 58 | 59 | if cfgs.METHOD == 'H': 60 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 61 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 62 | h = anchors[:, 2] - anchors[:, 0] + 1 63 | w = anchors[:, 3] - anchors[:, 1] + 1 64 | theta = -90 * np.ones_like(x_c) 65 | anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() 66 | 67 | if cfgs.ANGLE_RANGE == 180: 68 | anchors = coordinate_present_convert(anchors, mode=-1) 69 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 70 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) 71 | 72 | return np.array(labels, np.float32), np.array(target_delta, np.float32), \ 73 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32) 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from libs.configs import cfgs 3 | from libs.box_utils import bbox_transform 4 | from libs.box_utils import nms_rotate 5 | import tensorflow as tf 6 | 7 | from libs.box_utils.coordinate_convert import coordinate_present_convert, coords_regular 8 | 9 | 10 | def filter_detections(boxes, scores, is_training): 11 | """ 12 | :param boxes: [-1, 4] 13 | :param scores: [-1, ] 14 | :param labels: [-1, ] 15 | :return: 16 | """ 17 | if is_training: 18 | indices = tf.reshape(tf.where(tf.greater(scores, cfgs.VIS_SCORE)), [-1, ]) 19 | else: 20 | indices = tf.reshape(tf.where(tf.greater(scores, cfgs.FILTERED_SCORE)), [-1, ]) 21 | 22 | if cfgs.NMS: 23 | filtered_boxes = tf.gather(boxes, indices) 24 | filtered_scores = tf.gather(scores, indices) 25 | 26 | # perform NMS 27 | 28 | nms_indices = nms_rotate.nms_rotate(decode_boxes=filtered_boxes, 29 | scores=filtered_scores, 30 | iou_threshold=cfgs.NMS_IOU_THRESHOLD, 31 | max_output_size=100 if is_training else 1000, 32 | use_angle_condition=False, 33 | angle_threshold=15, 34 | use_gpu=False) 35 | 36 | # filter indices based on NMS 37 | indices = tf.gather(indices, nms_indices) 38 | 39 | # add indices to list of all indices 40 | return indices 41 | 42 | 43 | def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training): 44 | 45 | if cfgs.METHOD == 'H': 46 | x_c = (anchors[:, 2] + anchors[:, 0]) / 2 47 | y_c = (anchors[:, 3] + anchors[:, 1]) / 2 48 | h = anchors[:, 2] - anchors[:, 0] + 1 49 | w = anchors[:, 3] - anchors[:, 1] + 1 50 | theta = -90 * tf.ones_like(x_c) 51 | anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) 52 | 53 | if cfgs.ANGLE_RANGE == 180: 54 | anchors = tf.py_func(coordinate_present_convert, 55 | inp=[anchors, -1], 56 | Tout=[tf.float32]) 57 | anchors = tf.reshape(anchors, [-1, 5]) 58 | 59 | boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_bbox_pred) 60 | 61 | if cfgs.ANGLE_RANGE == 180: 62 | # boxes_pred = tf.py_func(coords_regular, 63 | # inp=[boxes_pred], 64 | # Tout=[tf.float32]) 65 | # boxes_pred = tf.reshape(boxes_pred, [-1, 5]) 66 | 67 | _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) 68 | indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) 69 | boxes_pred = tf.gather(boxes_pred, indx) 70 | rpn_cls_prob = tf.gather(rpn_cls_prob, indx) 71 | 72 | boxes_pred = tf.py_func(coordinate_present_convert, 73 | inp=[boxes_pred, 1], 74 | Tout=[tf.float32]) 75 | boxes_pred = tf.reshape(boxes_pred, [-1, 5]) 76 | 77 | return_boxes_pred = [] 78 | return_scores = [] 79 | return_labels = [] 80 | for j in range(0, cfgs.CLASS_NUM): 81 | indices = filter_detections(boxes_pred, rpn_cls_prob[:, j], is_training) 82 | tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) 83 | tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [-1, ]) 84 | 85 | return_boxes_pred.append(tmp_boxes_pred) 86 | return_scores.append(tmp_scores) 87 | return_labels.append(tf.ones_like(tmp_scores)*(j+1)) 88 | 89 | return_boxes_pred = tf.concat(return_boxes_pred, axis=0) 90 | return_scores = tf.concat(return_scores, axis=0) 91 | return_labels = tf.concat(return_labels, axis=0) 92 | 93 | return return_boxes_pred, return_scores, return_labels 94 | -------------------------------------------------------------------------------- /libs/detection_oprations/proposal_opr_.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from libs.configs import cfgs 3 | from libs.box_utils import bbox_transform 4 | from libs.box_utils import nms_rotate 5 | import tensorflow as tf 6 | 7 | from libs.box_utils.coordinate_convert import coordinate_present_convert, coords_regular 8 | 9 | 10 | def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training, gpu_id): 11 | 12 | return_boxes_pred = [] 13 | return_scores = [] 14 | return_labels = [] 15 | for j in range(0, cfgs.CLASS_NUM): 16 | scores = rpn_cls_prob[:, j] 17 | if is_training: 18 | indices = tf.reshape(tf.where(tf.greater(scores, cfgs.VIS_SCORE)), [-1, ]) 19 | else: 20 | indices = tf.reshape(tf.where(tf.greater(scores, cfgs.FILTERED_SCORE)), [-1, ]) 21 | 22 | anchors_ = tf.gather(anchors, indices) 23 | rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) 24 | scores = tf.gather(scores, indices) 25 | 26 | if cfgs.METHOD == 'H': 27 | x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 28 | y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 29 | h = anchors_[:, 2] - anchors_[:, 0] + 1 30 | w = anchors_[:, 3] - anchors_[:, 1] + 1 31 | theta = -90 * tf.ones_like(x_c) 32 | anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) 33 | 34 | if cfgs.ANGLE_RANGE == 180: 35 | anchors_ = tf.py_func(coordinate_present_convert, 36 | inp=[anchors_, -1], 37 | Tout=[tf.float32]) 38 | anchors_ = tf.reshape(anchors_, [-1, 5]) 39 | 40 | boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors_, deltas=rpn_bbox_pred_) 41 | 42 | if cfgs.ANGLE_RANGE == 180: 43 | 44 | _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) 45 | indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) 46 | boxes_pred = tf.gather(boxes_pred, indx) 47 | scores = tf.gather(scores, indx) 48 | 49 | boxes_pred = tf.py_func(coordinate_present_convert, 50 | inp=[boxes_pred, 1], 51 | Tout=[tf.float32]) 52 | boxes_pred = tf.reshape(boxes_pred, [-1, 5]) 53 | 54 | max_output_size = 4000 if 'DOTA' in cfgs.NET_NAME else 200 55 | nms_indices = nms_rotate.nms_rotate(decode_boxes=boxes_pred, 56 | scores=scores, 57 | iou_threshold=cfgs.NMS_IOU_THRESHOLD, 58 | max_output_size=100 if is_training else max_output_size, 59 | use_angle_condition=False, 60 | angle_threshold=15, 61 | use_gpu=True, 62 | gpu_id=gpu_id) 63 | 64 | tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) 65 | tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ]) 66 | 67 | return_boxes_pred.append(tmp_boxes_pred) 68 | return_scores.append(tmp_scores) 69 | return_labels.append(tf.ones_like(tmp_scores)*(j+1)) 70 | 71 | return_boxes_pred = tf.concat(return_boxes_pred, axis=0) 72 | return_scores = tf.concat(return_scores, axis=0) 73 | return_labels = tf.concat(return_labels, axis=0) 74 | 75 | return return_boxes_pred, return_scores, return_labels 76 | -------------------------------------------------------------------------------- /libs/detection_oprations/refinebox_target_layer_without_boxweight_dcl.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from libs.configs import cfgs 12 | import numpy as np 13 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps 14 | from libs.box_utils import bbox_transform 15 | 16 | from libs.box_utils.coordinate_convert import coordinate_present_convert 17 | 18 | 19 | def refinebox_target_layer(gt_boxes_r, gt_encode_label, anchors, pos_threshold, neg_threshold, gpu_id=0): 20 | 21 | anchor_states = np.zeros((anchors.shape[0],)) 22 | labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) 23 | if gt_boxes_r.shape[0]: 24 | # [N, M] 25 | 26 | # if cfgs.ANGLE_RANGE == 180: 27 | # gt_boxes_r_ = coordinate_present_convert(gt_boxes_r[:, :-1], 1) 28 | # 29 | # overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 30 | # np.ascontiguousarray(gt_boxes_r_, dtype=np.float32), gpu_id) 31 | # else: 32 | overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), 33 | np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) 34 | 35 | # overlaps = np.clip(overlaps, 0.0, 1.0) 36 | 37 | argmax_overlaps_inds = np.argmax(overlaps, axis=1) 38 | max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] 39 | 40 | # compute box regression targets 41 | target_boxes = gt_boxes_r[argmax_overlaps_inds] 42 | target_encode_label = gt_encode_label[argmax_overlaps_inds] 43 | 44 | positive_indices = max_overlaps >= pos_threshold 45 | ignore_indices = (max_overlaps > neg_threshold) & ~positive_indices 46 | anchor_states[ignore_indices] = -1 47 | anchor_states[positive_indices] = 1 48 | 49 | # compute target class labels 50 | labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 51 | else: 52 | # no annotations? then everything is background 53 | target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) 54 | target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1])) 55 | 56 | if cfgs.ANGLE_RANGE == 180: 57 | anchors = coordinate_present_convert(anchors, mode=-1) 58 | target_boxes = coordinate_present_convert(target_boxes, mode=-1) 59 | 60 | target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes, 61 | scale_factors=cfgs.ANCHOR_SCALE_FACTORS) 62 | 63 | return np.array(labels, np.float32), np.array(target_delta[:, :-1], np.float32), \ 64 | np.array(anchor_states, np.float32), np.array(target_boxes, np.float32), \ 65 | np.array(target_encode_label, np.float32) 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /libs/label_name_dict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/label_name_dict/__init__.py -------------------------------------------------------------------------------- /libs/label_name_dict/coco_dict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | class_names = [ 6 | 'back_ground', 'person', 'bicycle', 'car', 'motorcycle', 7 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 8 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 9 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 10 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 11 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 12 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 13 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 14 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 15 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 16 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 17 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 18 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 19 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 20 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 21 | 'hair drier', 'toothbrush'] 22 | 23 | 24 | classes_originID = { 25 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 26 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 27 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 28 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 29 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 30 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27, 31 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 32 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 33 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 34 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 35 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 36 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 37 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 38 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 39 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 40 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 41 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 42 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 43 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 44 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 45 | 'toothbrush': 90} 46 | 47 | originID_classes = {item: key for key, item in classes_originID.items()} 48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names)))) 49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names)) 50 | 51 | # print (originID_classes) 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /libs/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/losses/__init__.py -------------------------------------------------------------------------------- /libs/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/__init__.py -------------------------------------------------------------------------------- /libs/networks/efficientnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/efficientnet/__init__.py -------------------------------------------------------------------------------- /libs/networks/efficientnet/panda.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/efficientnet/panda.jpg -------------------------------------------------------------------------------- /libs/networks/efficientnet/test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import tensorflow as tf 3 | import os 4 | import sys 5 | 6 | sys.path.append('../../..') 7 | from libs.networks.efficientnet import efficientnet_builder 8 | 9 | os.environ["CUDA_VISIBLE_DEVICES"] = '2' 10 | 11 | def restore_model(sess, ckpt_dir): 12 | """Restore variables from checkpoint dir.""" 13 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 14 | ema = tf.train.ExponentialMovingAverage(decay=0.9999) 15 | ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars') 16 | for v in tf.global_variables(): 17 | if 'moving_mean' in v.name or 'moving_variance' in v.name: 18 | ema_vars.append(v) 19 | ema_vars = list(set(ema_vars)) 20 | var_dict = ema.variables_to_restore(ema_vars) 21 | saver = tf.train.Saver(max_to_keep=1) 22 | saver.restore(sess, checkpoint) 23 | 24 | 25 | images = cv2.imread('/data/yangxue/code/R3Det_Tensorflow/libs/networks/efficientnet/panda.jpg') 26 | images = cv2.resize(images, (112, 112)) 27 | images = tf.expand_dims(tf.constant(images, tf.float32), axis=0) 28 | features, endpoints = efficientnet_builder.build_model_base(images, 'efficientnet-b0', training=True) 29 | print(endpoints.keys()) 30 | 31 | init_op = tf.group( 32 | tf.global_variables_initializer(), 33 | tf.local_variables_initializer() 34 | ) 35 | 36 | tfconfig = tf.ConfigProto( 37 | allow_soft_placement=True, log_device_placement=False) 38 | tfconfig.gpu_options.allow_growth = True 39 | with tf.Session(config=tfconfig) as sess: 40 | sess.run(init_op) 41 | restore_model(sess, '/data/yangxue/code/R3Det_Tensorflow/libs/networks/efficientnet/efficientnet-b0') 42 | features_, endpoints_ = sess.run([features, endpoints]) 43 | print(endpoints['reduction_1']) 44 | print(endpoints['reduction_2']) 45 | print(endpoints['reduction_3']) 46 | print(endpoints['reduction_4']) 47 | print(endpoints['reduction_5']) -------------------------------------------------------------------------------- /libs/networks/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | # Mobilenet V2 2 | This folder contains building code for Mobilenet V2, based on 3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation] 4 | (https://arxiv.org/abs/1801.04381) 5 | 6 | # Pretrained model 7 | TODO 8 | 9 | # Example 10 | TODO 11 | 12 | 13 | -------------------------------------------------------------------------------- /libs/networks/mobilenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/mobilenet/__init__.py -------------------------------------------------------------------------------- /libs/networks/opts.py: -------------------------------------------------------------------------------- 1 | # -*-coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import tensorflow as tf 6 | 7 | 8 | def norm(x, norm_type, is_train, name, G=32, esp=1e-5): 9 | with tf.variable_scope('{}_norm_{}'.format(norm_type, name)): 10 | if norm_type == 'none': 11 | output = x 12 | elif norm_type == 'batch': 13 | output = tf.contrib.layers.batch_norm( 14 | x, center=True, scale=True, decay=0.999, 15 | is_training=is_train, updates_collections=None 16 | ) 17 | elif norm_type == 'group': 18 | # normalize 19 | # tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper 20 | x = tf.transpose(x, [0, 3, 1, 2]) 21 | N, C, H, W = x.get_shape().as_list() 22 | G = min(G, C) 23 | x = tf.reshape(x, [N, G, C // G, H, W]) 24 | mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True) 25 | x = (x - mean) / tf.sqrt(var + esp) 26 | # per channel gamma and beta 27 | gamma = tf.get_variable('gamma', [C], 28 | initializer=tf.constant_initializer(1.0)) 29 | beta = tf.get_variable('beta', [C], 30 | initializer=tf.constant_initializer(0.0)) 31 | gamma = tf.reshape(gamma, [1, C, 1, 1]) 32 | beta = tf.reshape(beta, [1, C, 1, 1]) 33 | 34 | output = tf.reshape(x, [N, C, H, W]) * gamma + beta 35 | # tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper 36 | output = tf.transpose(output, [0, 2, 3, 1]) 37 | else: 38 | raise NotImplementedError 39 | return output 40 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_resnet_v2 import inception_resnet_v2_base 25 | from nets.inception_v1 import inception_v1 26 | from nets.inception_v1 import inception_v1_arg_scope 27 | from nets.inception_v1 import inception_v1_base 28 | from nets.inception_v2 import inception_v2 29 | from nets.inception_v2 import inception_v2_arg_scope 30 | from nets.inception_v2 import inception_v2_base 31 | from nets.inception_v3 import inception_v3 32 | from nets.inception_v3 import inception_v3_arg_scope 33 | from nets.inception_v3 import inception_v3_base 34 | from nets.inception_v4 import inception_v4 35 | from nets.inception_v4 import inception_v4_arg_scope 36 | from nets.inception_v4 import inception_v4_base 37 | # pylint: enable=unused-import 38 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. 44 | is_training: specifies whether or not we're currently training the model. 45 | This variable will determine the behaviour of the dropout layer. 46 | dropout_keep_prob: the percentage of activation values that are retained. 47 | prediction_fn: a function to get predictions out of logits. 48 | scope: Optional variable_scope. 49 | 50 | Returns: 51 | logits: the pre-softmax activations, a tensor of size 52 | [batch_size, `num_classes`] 53 | end_points: a dictionary from components of the network to the corresponding 54 | activation. 55 | """ 56 | end_points = {} 57 | 58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]): 59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1') 60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 63 | net = slim.flatten(net) 64 | end_points['Flatten'] = net 65 | 66 | net = slim.fully_connected(net, 1024, scope='fc3') 67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 68 | scope='dropout3') 69 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 70 | scope='fc4') 71 | 72 | end_points['Logits'] = logits 73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 74 | 75 | return logits, end_points 76 | lenet.default_image_size = 28 77 | 78 | 79 | def lenet_arg_scope(weight_decay=0.0): 80 | """Defines the default lenet argument scope. 81 | 82 | Args: 83 | weight_decay: The weight decay to use for regularizing the model. 84 | 85 | Returns: 86 | An `arg_scope` to use for the inception v3 model. 87 | """ 88 | with slim.arg_scope( 89 | [slim.conv2d, slim.fully_connected], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 92 | activation_fn=tf.nn.relu) as sc: 93 | return sc 94 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.md: -------------------------------------------------------------------------------- 1 | # MobileNet_v1 2 | 3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 4 | 5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature. 6 | 7 | ![alt text](mobilenet_v1.png "MobileNet Graph") 8 | 9 | # Pre-trained Models 10 | 11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the 12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) 13 | image classification dataset. Accuracies were computed by evaluating using a single image crop. 14 | 15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy | 16 | :----:|:------------:|:----------:|:-------:|:-------:| 17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5| 18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9| 19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5| 20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3| 21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2| 22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3| 23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1| 24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6| 25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4| 26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0| 27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5| 28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6| 29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0| 30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6| 31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7| 32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2| 33 | 34 | 35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint: 36 | 37 | ```shell 38 | $ CHECKPOINT_DIR=/tmp/checkpoints 39 | $ mkdir ${CHECKPOINT_DIR} 40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz 41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz 42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR} 43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz 44 | ``` 45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md). 46 | 47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/). 48 | -------------------------------------------------------------------------------- /libs/networks/slim_nets/mobilenet_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/slim_nets/mobilenet_v1.png -------------------------------------------------------------------------------- /libs/networks/slim_nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from nets import nets_factory 25 | 26 | slim = tf.contrib.slim 27 | 28 | 29 | class NetworksTest(tf.test.TestCase): 30 | 31 | def testGetNetworkFn(self): 32 | batch_size = 5 33 | num_classes = 1000 34 | for net in nets_factory.networks_map: 35 | with self.test_session(): 36 | net_fn = nets_factory.get_network_fn(net, num_classes) 37 | # Most networks use 224 as their default_image_size 38 | image_size = getattr(net_fn, 'default_image_size', 224) 39 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 40 | logits, end_points = net_fn(inputs) 41 | self.assertTrue(isinstance(logits, tf.Tensor)) 42 | self.assertTrue(isinstance(end_points, dict)) 43 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 44 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 45 | 46 | def testGetNetworkFnArgScope(self): 47 | batch_size = 5 48 | num_classes = 10 49 | net = 'cifarnet' 50 | with self.test_session(use_gpu=True): 51 | net_fn = nets_factory.get_network_fn(net, num_classes) 52 | image_size = getattr(net_fn, 'default_image_size', 224) 53 | with slim.arg_scope([slim.model_variable, slim.variable], 54 | device='/CPU:0'): 55 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 56 | net_fn(inputs) 57 | weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0] 58 | self.assertDeviceEqual('/CPU:0', weights.device) 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /libs/val_libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/val_libs/__init__.py -------------------------------------------------------------------------------- /output/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/output/.DS_Store -------------------------------------------------------------------------------- /output/trained_weights/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/output/trained_weights/.DS_Store -------------------------------------------------------------------------------- /output/trained_weights/README.md: -------------------------------------------------------------------------------- 1 | Please download trained model by this project, then put it here. -------------------------------------------------------------------------------- /pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/pipeline.png -------------------------------------------------------------------------------- /scalars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/scalars.png -------------------------------------------------------------------------------- /tools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/tools/.DS_Store -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/tools/__init__.py -------------------------------------------------------------------------------- /tsne/tsne.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import tensorflow as tf 4 | from tensorflow.contrib.tensorboard.plugins import projector 5 | from tensorflow.examples.tutorials.mnist import input_data 6 | import sys 7 | sys.path.append("../") 8 | 9 | from libs.configs import cfgs 10 | 11 | 12 | LOG_DIR = './dcl_log/{}'.format(cfgs.VERSION) 13 | SPRITE_FILE = 'dota_sprite.jpg' 14 | META_FIEL = "dcl_meta.tsv" 15 | TENSOR_NAME = "FINAL_LOGITS" 16 | 17 | 18 | # 生成可视化最终输出层向量所需要的日志文件 19 | def visualisation(final_result): 20 | # 使用一个新的变量来保存最终输出层向量的结果,因为embedding是通过Tensorflow中变量完成的,所以PROJECTOR可视化的都是TensorFlow中的变哇。 21 | # 所以这里需要新定义一个变量来保存输出层向量的取值 22 | y = tf.Variable(final_result, name=TENSOR_NAME) 23 | summary_writer = tf.summary.FileWriter(LOG_DIR) 24 | 25 | # 通过project.ProjectorConfig类来帮助生成日志文件 26 | config = projector.ProjectorConfig() 27 | # 增加一个需要可视化的bedding结果 28 | embedding = config.embeddings.add() 29 | # 指定这个embedding结果所对应的Tensorflow变量名称 30 | embedding.tensor_name = y.name 31 | 32 | # Specify where you find the metadata 33 | # 指定embedding结果所对应的原始数据信息。比如这里指定的就是每一张MNIST测试图片对应的真实类别。在单词向量中可以是单词ID对应的单词。 34 | # 这个文件是可选的,如果没有指定那么向量就没有标签。 35 | embedding.metadata_path = META_FIEL 36 | 37 | # Specify where you find the sprite (we will create this later) 38 | # 指定sprite 图像。这个也是可选的,如果没有提供sprite 图像,那么可视化的结果 39 | # 每一个点就是一个小困点,而不是具体的图片。 40 | # embedding.sprite.image_path = SPRITE_FILE 41 | # 在提供sprite图像时,通过single_image_dim可以指定单张图片的大小。 42 | # 这将用于从sprite图像中截取正确的原始图片。 43 | # embedding.sprite.single_image_dim.extend([28, 28]) 44 | 45 | # Say that you want to visualise the embeddings 46 | # 将PROJECTOR所需要的内容写入日志文件。 47 | projector.visualize_embeddings(summary_writer, config) 48 | 49 | # 生成会话,初始化新声明的变量并将需要的日志信息写入文件。 50 | sess = tf.InteractiveSession() 51 | sess.run(tf.global_variables_initializer()) 52 | saver = tf.train.Saver() 53 | saver.save(sess, os.path.join(LOG_DIR, "model")) 54 | 55 | summary_writer.close() 56 | 57 | 58 | # 主函数先调用模型训练的过程,再使用训练好的模型来处理MNIST测试数据, 59 | # 最后将得到的输出层矩阵输出到PROJECTOR需要的日志文件中。 60 | def main(argv=None): 61 | 62 | final_result = np.load(os.path.join(LOG_DIR, "final_logits.npy")) # [:25000, :] 63 | print(final_result.shape) 64 | final_result = tf.constant(final_result, tf.float32) 65 | visualisation(final_result) 66 | 67 | 68 | if __name__ == '__main__': 69 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 70 | main() -------------------------------------------------------------------------------- /tsne/tsv_radius.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append("../") 5 | 6 | from libs.configs import cfgs 7 | 8 | 9 | scr_tsv = './dcl_log/{}/dcl_meta.tsv'.format(cfgs.VERSION) 10 | omega = 180 / 4 11 | 12 | 13 | fr = open(scr_tsv, 'r') 14 | lines = fr.readlines() 15 | fr.close() 16 | 17 | fw_tsv = open(os.path.join('dcl_log/{}'.format(cfgs.VERSION), 'dcl_meta_{}.tsv'.format(omega)), 'w') 18 | # fw_tsv.write("Index\tLabel\n") 19 | for ii, l in enumerate(lines): 20 | index = int(l.split('\t')[-1].split('\n')[0]) // (omega + 5e-5) 21 | # index = min(int(l.split('\t')[-1].split('\n')[0]) // radius, 89) 22 | fw_tsv.write("%.1f\n" % (index * omega)) 23 | fw_tsv.close() 24 | --------------------------------------------------------------------------------