├── .DS_Store
├── .gitignore
├── CSL_DCL.png
├── LICENSE
├── README.md
├── data
    ├── .DS_Store
    ├── __init__.py
    ├── io
    │   ├── .DS_Store
    │   ├── DOTA
    │   │   ├── data_crop.py
    │   │   └── val_set.txt
    │   ├── FDDB
    │   │   ├── fddb.py
    │   │   └── txt2xml.py
    │   ├── HRSC2016
    │   │   ├── __init__.py
    │   │   └── make_test_xml.py
    │   ├── ICDAR2015
    │   │   └── txt2xml.py
    │   ├── MLT
    │   │   ├── filter_box.py
    │   │   └── txt2xml.py
    │   ├── OHD-SJTU
    │   │   ├── .DS_Store
    │   │   ├── data_crop.py
    │   │   ├── divide_data.py
    │   │   ├── filter_dota.py
    │   │   ├── ohd-sjtu-all-testset.txt
    │   │   ├── ohd-sjtu-testset.txt
    │   │   └── vis_op.py
    │   ├── SSDD++
    │   │   └── divide_data.py
    │   ├── UCAS-AOD
    │   │   ├── split_data.py
    │   │   └── txt2xml.py
    │   ├── __init__.py
    │   ├── convert_data_to_tfrecord.py
    │   ├── image_preprocess.py
    │   ├── image_preprocess_multi_gpu.py
    │   ├── image_preprocess_multi_gpu_ohdet.py
    │   ├── read_tfrecord.py
    │   ├── read_tfrecord_multi_gpu.py
    │   └── read_tfrecord_multi_gpu_ohdet.py
    └── pretrained_weights
    │   ├── README.md
    │   ├── efficientnet
    │       ├── README.md
    │       ├── noisy-student-efficientnet-b1
    │       │   └── results.txt
    │       └── noisy_student_efficientnet-b0
    │       │   └── results.txt
    │   └── mobilenet
    │       └── README.md
├── demo1.png
├── eval_devkit
    ├── OHD_SJTU_evaluation_OBB.py
    ├── OHD_SJTU_evaluation_OHD.py
    ├── dota_evaluation_task1.py
    ├── poly_nms_gpu
    │   ├── Makefile
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   ├── poly_nms.hpp
    │   ├── poly_nms.pyx
    │   ├── poly_nms_kernel.cu
    │   ├── poly_nms_test.py
    │   ├── poly_overlaps.hpp
    │   ├── poly_overlaps.pyx
    │   ├── poly_overlaps_kernel.cu
    │   └── setup.py
    ├── polyiou.cpp
    ├── polyiou.h
    ├── polyiou.i
    ├── polyiou.py
    ├── readme.md
    └── setup.py
├── feature_vis.png
├── help_utils
    ├── __init__.py
    ├── densely_coded_label.py
    ├── smooth_label.py
    └── tools.py
├── images.png
├── libs
    ├── .DS_Store
    ├── __init__.py
    ├── box_utils
    │   ├── __init__.py
    │   ├── anchor_utils.py
    │   ├── bbox_transform.py
    │   ├── boxes_utils.py
    │   ├── coordinate_convert.py
    │   ├── cython_utils
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── bbox.c
    │   │   ├── bbox.pyx
    │   │   ├── nms.c
    │   │   ├── nms.pyx
    │   │   └── setup.py
    │   ├── draw_box_in_img.py
    │   ├── generate_anchors.py
    │   ├── generate_rotate_anchors.py
    │   ├── iou.py
    │   ├── iou_cpu.cpp
    │   ├── iou_cpu.pyx
    │   ├── iou_rotate.py
    │   ├── mask_utils.py
    │   ├── nms_rotate.py
    │   ├── rbbox_overlaps.cpp
    │   ├── rbbox_overlaps.hpp
    │   ├── rbbox_overlaps.pyx
    │   ├── rbbox_overlaps_kernel.cu
    │   ├── rotate_anchors.jpg
    │   ├── rotate_gpu_nms.hpp
    │   ├── rotate_polygon_nms.cpp
    │   ├── rotate_polygon_nms.hpp
    │   ├── rotate_polygon_nms.pyx
    │   ├── rotate_polygon_nms_kernel.cu
    │   ├── setup.py
    │   ├── show_box_in_tensor.py
    │   └── tf_ops.py
    ├── configs
    │   ├── DOTA1.0
    │   │   ├── __init__.py
    │   │   ├── baseline
    │   │   │   ├── __init__.py
    │   │   │   ├── cfgs_res50_dota_v15.py
    │   │   │   ├── cfgs_res50_dota_v4.py
    │   │   │   └── cfgs_res50_dota_win_v19.py
    │   │   ├── csl
    │   │   │   ├── cfgs_res50_dota_v1.py
    │   │   │   ├── cfgs_res50_dota_v41.py
    │   │   │   └── cfgs_res50_dota_v45.py
    │   │   ├── dcl
    │   │   │   ├── __init__.py
    │   │   │   ├── cfgs_res101_dota_dcl_v1.py
    │   │   │   ├── cfgs_res152_dota_dcl_v1.py
    │   │   │   ├── cfgs_res50_dota_dcl_v10.py
    │   │   │   └── cfgs_res50_dota_dcl_v5.py
    │   │   ├── dota_train
    │   │   │   ├── cfgs_res50_dotatrain_baseline_v1.py
    │   │   │   ├── cfgs_res50_dotatrain_baseline_v2.py
    │   │   │   ├── cfgs_res50_dotatrain_csl_v1.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v1.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v10.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v11.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v12.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v13.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v14.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v15.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v16.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v17.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v18.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v2.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v3.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v4.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v5.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v6.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v7.py
    │   │   │   ├── cfgs_res50_dotatrain_dcl_v8.py
    │   │   │   └── cfgs_res50_dotatrain_dcl_v9.py
    │   │   └── r3det_dcl
    │   │   │   ├── cfgs_res152_dota_r3det_dcl_v1.py
    │   │   │   ├── cfgs_res50_dota_r3det_dcl_v1.py
    │   │   │   ├── cfgs_res50_dota_refine_dcl_v1.py
    │   │   │   └── cfgs_res50_dota_refine_dcl_v2.py
    │   ├── HRSC2016
    │   │   ├── __init__.py
    │   │   ├── dcl
    │   │   │   └── cfgs_res101_hrsc2016_dcl_v1.py
    │   │   └── r3det_dcl
    │   │   │   ├── __init__.py
    │   │   │   ├── cfgs_res101_hrsc2016_r3det_dcl_v1.py
    │   │   │   ├── cfgs_res101_hrsc2016_r3det_dcl_v2.py
    │   │   │   └── cfgs_res152_hrsc2016_r3det_dcl_v1.py
    │   ├── ICDAR2015
    │   │   ├── baseline
    │   │   │   ├── cfgs_res101_icdar2015_baseline_v2.py
    │   │   │   └── cfgs_res50_icdar2015_baseline_v1.py
    │   │   ├── csl
    │   │   │   └── cfgs_res101_icdar2015_csl_v1.py
    │   │   └── dcl
    │   │   │   ├── cfgs_res101_icdar2015_dcl_v4.py
    │   │   │   ├── cfgs_res50_icdar2015_dcl_v1.py
    │   │   │   ├── cfgs_res50_icdar2015_dcl_v2.py
    │   │   │   └── cfgs_res50_icdar2015_dcl_v3.py
    │   ├── MLT
    │   │   ├── baseline
    │   │   │   └── cfgs_res101_icdar2015_baseline_v1.py
    │   │   ├── csl
    │   │   │   └── cfgs_res101_mlt_csl_v1.py
    │   │   └── dcl
    │   │   │   └── cfgs_res101_mlt_dcl_v1.py
    │   ├── OHD-SJTU
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_csl_ohdet_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_csl_ohdet_v2.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v2.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_csl_v3.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_r3det_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu-all_v2.py
    │   │   ├── cfgs_res101_ohd-sjtu_gwd_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu_r3det_csl_ohdet_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu_r3det_csl_ohdet_v2.py
    │   │   ├── cfgs_res101_ohd-sjtu_r3det_csl_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu_r3det_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu_r3det_v2.py
    │   │   ├── cfgs_res101_ohd-sjtu_v1.py
    │   │   ├── cfgs_res101_ohd-sjtu_v2.py
    │   │   └── dcl
    │   │   │   └── cfgs_res101_ohd-sjtu-all_dcl_v1.py
    │   ├── SSDD++
    │   │   ├── baseline
    │   │   │   └── cfgs_res101_ssdd++_baseline_v1.py
    │   │   └── dcl
    │   │   │   ├── cfgs_res101_ssdd++_dcl_v1.py
    │   │   │   └── cfgs_res101_ssdd++_dcl_v2.py
    │   ├── UCAS-AOD
    │   │   ├── baseline
    │   │   │   └── cfgs_res101_ucas-aod_baseline_v1.py
    │   │   ├── csl
    │   │   │   └── cfgs_res101_ucas-aod_csl_v1.py
    │   │   ├── dcl
    │   │   │   ├── cfgs_res101_ucas-aod_dcl_v1.py
    │   │   │   └── cfgs_res50_ucas-aod_dcl_v1.py
    │   │   └── r3det_dcl
    │   │   │   ├── __init__.py
    │   │   │   └── cfgs_res152_ucas-aod_r3det_dcl_v1.py
    │   ├── __init__.py
    │   └── cfgs.py
    ├── detection_oprations
    │   ├── __init__.py
    │   ├── anchor_target_layer_without_boxweight.py
    │   ├── anchor_target_layer_without_boxweight_.py
    │   ├── anchor_target_layer_without_boxweight_csl.py
    │   ├── anchor_target_layer_without_boxweight_dcl.py
    │   ├── anchor_target_layer_without_boxweight_dcl_batch.py
    │   ├── anchor_target_layer_without_boxweight_win.py
    │   ├── proposal_opr.py
    │   ├── proposal_opr_.py
    │   ├── proposal_opr_csl.py
    │   ├── proposal_opr_csl_tsne.py
    │   ├── proposal_opr_dcl.py
    │   ├── proposal_opr_dcl_tsne.py
    │   ├── proposal_opr_win.py
    │   ├── refine_proposal_opr_dcl.py
    │   ├── refine_proposal_opr_dcl_.py
    │   └── refinebox_target_layer_without_boxweight_dcl.py
    ├── label_name_dict
    │   ├── __init__.py
    │   ├── coco_dict.py
    │   └── label_dict.py
    ├── losses
    │   ├── __init__.py
    │   ├── losses.py
    │   ├── losses_dcl.py
    │   └── losses_win.py
    ├── networks
    │   ├── __init__.py
    │   ├── build_whole_network.py
    │   ├── build_whole_network_csl.py
    │   ├── build_whole_network_csl_tsne.py
    │   ├── build_whole_network_dcl.py
    │   ├── build_whole_network_dcl_batch.py
    │   ├── build_whole_network_dcl_tsne.py
    │   ├── build_whole_network_r3det_dcl.py
    │   ├── build_whole_network_refine_dcl.py
    │   ├── build_whole_network_win.py
    │   ├── efficientnet
    │   │   ├── __init__.py
    │   │   ├── condconv
    │   │   │   ├── condconv_layers.py
    │   │   │   └── efficientnet_condconv_builder.py
    │   │   ├── demo.py
    │   │   ├── efficientnet_builder.py
    │   │   ├── efficientnet_lite_builder.py
    │   │   ├── efficientnet_model.py
    │   │   ├── panda.jpg
    │   │   ├── test.py
    │   │   └── utils.py
    │   ├── mobilenet
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── conv_blocks.py
    │   │   ├── mobilenet.py
    │   │   ├── mobilenet_v2.py
    │   │   └── mobilenet_v2_test.py
    │   ├── mobilenet_v2.py
    │   ├── opts.py
    │   ├── resnet.py
    │   ├── resnet_gluoncv.py
    │   ├── slim_nets
    │   │   ├── __init__.py
    │   │   ├── alexnet.py
    │   │   ├── alexnet_test.py
    │   │   ├── cifarnet.py
    │   │   ├── inception.py
    │   │   ├── inception_resnet_v2.py
    │   │   ├── inception_resnet_v2_test.py
    │   │   ├── inception_utils.py
    │   │   ├── inception_v1.py
    │   │   ├── inception_v1_test.py
    │   │   ├── inception_v2.py
    │   │   ├── inception_v2_test.py
    │   │   ├── inception_v3.py
    │   │   ├── inception_v3_test.py
    │   │   ├── inception_v4.py
    │   │   ├── inception_v4_test.py
    │   │   ├── lenet.py
    │   │   ├── mobilenet_v1.md
    │   │   ├── mobilenet_v1.png
    │   │   ├── mobilenet_v1.py
    │   │   ├── mobilenet_v1_test.py
    │   │   ├── nets_factory.py
    │   │   ├── nets_factory_test.py
    │   │   ├── overfeat.py
    │   │   ├── overfeat_test.py
    │   │   ├── resnet_utils.py
    │   │   ├── resnet_v1.py
    │   │   ├── resnet_v1_test.py
    │   │   ├── resnet_v2.py
    │   │   ├── resnet_v2_test.py
    │   │   ├── vgg.py
    │   │   └── vgg_test.py
    │   ├── xception.bak.py
    │   └── xception.py
    ├── setup.py
    └── val_libs
    │   ├── __init__.py
    │   ├── voc_eval.py
    │   └── voc_eval_r.py
├── output
    ├── .DS_Store
    └── trained_weights
    │   ├── .DS_Store
    │   └── README.md
├── pipeline.png
├── scalars.png
├── tools
    ├── .DS_Store
    ├── __init__.py
    ├── inference.py
    ├── multi_gpu_train.py
    ├── multi_gpu_train_csl.py
    ├── multi_gpu_train_dcl.py
    ├── multi_gpu_train_dcl_batch.py
    ├── multi_gpu_train_r3det_dcl.py
    ├── multi_gpu_train_refine_dcl.py
    ├── multi_gpu_train_win.py
    ├── test_dota_csl_ms.py
    ├── test_dota_dcl_ms.py
    ├── test_dota_ms.py
    ├── test_dota_r3det_dcl_ms.py
    ├── test_dota_refine_dcl_ms.py
    ├── test_hrsc2016.py
    ├── test_hrsc2016_dcl.py
    ├── test_hrsc2016_r3det_dcl_ms.py
    ├── test_icdar2015_csl_ms.py
    ├── test_icdar2015_dcl_ms.py
    ├── test_icdar2015_ms.py
    ├── test_mlt_csl_ms.py
    ├── test_mlt_dcl_ms.py
    ├── test_mlt_ms.py
    ├── test_ohd-sjtu_dcl_ms.py
    ├── test_ohd-sjtu_ms.py
    ├── test_ssdd_dcl_ms.py
    ├── test_ssdd_ms.py
    ├── test_ucas-aod_csl_ms.py
    ├── test_ucas-aod_dcl_ms.py
    ├── test_ucas-aod_ms.py
    └── test_ucas-aod_r3det_dcl_ms.py
└── tsne
    ├── feature_extract_csl.py
    ├── feature_extract_dcl.py
    ├── tsne.py
    └── tsv_radius.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | .idea/
104 | .pyc
105 | .so
106 | *.data-00000-of-00001
107 | *.index
108 | *.meta
109 | events.*
110 | checkpoint
111 | __pycache__/
112 | *.json
113 | *.zip
114 | *.pb
115 | *.pbtxt
116 | *.tflite
117 | *info.txt
118 | *events.out.tfevents*
119 | *.ckpt
120 | *.pb
121 | *.tfrecord*
122 | *plusplus_.py*
123 | *flops_param*
124 | *r3det_loss**
125 | *refine_feature_multi_stage*
126 | *mining_loss*
127 | *ablation_study*
128 | *tmp*
129 | *head*
130 | *.mp4*
131 | *.MP4*
132 | *inld*
133 | 
134 | demo
135 | tools/demo.py
136 | tools/demos/*
137 | tools/test_dota/*
138 | tools/test_icdar2015/*
139 | tools/test_ohd_sjtu/*
140 | output/summary/*
141 | data/tfrecord/*
142 | 


--------------------------------------------------------------------------------
/CSL_DCL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/CSL_DCL.png


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/.DS_Store


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/__init__.py


--------------------------------------------------------------------------------
/data/io/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/.DS_Store


--------------------------------------------------------------------------------
/data/io/HRSC2016/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/HRSC2016/__init__.py


--------------------------------------------------------------------------------
/data/io/MLT/filter_box.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | filter_thres = 0.45
 5 | res_files = os.listdir('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007')
 6 | filter_res_path = '../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres)
 7 | 
 8 | if not os.path.exists('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres)):
 9 |     os.makedirs('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}'.format(filter_thres))
10 | 
11 | for rf in res_files:
12 |     fr = open('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007/{}'.format(rf), 'r')
13 |     fw = open('../../../tools/test_mlt/RetinaNet_MLT_CSL_2x_20201007_{}/{}'.format(filter_thres, rf), 'w')
14 |     lines = fr.readlines()
15 |     for line in lines:
16 |         if float(line.split(',')[-1].split('\n')[0])>filter_thres:
17 |             fw.write(line)
18 |     fr.close()
19 |     fw.close()


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/OHD-SJTU/.DS_Store


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/divide_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | import sys
 4 | sys.path.append('../../')
 5 | import shutil
 6 | import os
 7 | import random
 8 | import math
 9 | 
10 | 
11 | def mkdir(path):
12 |     if not os.path.exists(path):
13 |         os.makedirs(path)
14 | 
15 | 
16 | divide_rate = 0.7
17 | 
18 | root_path = '/data/yangxue/dataset/OHD-SJTU'
19 | 
20 | image_path = root_path + '/all_data/images'
21 | rotation_txt_path = root_path + '/all_data/rotation_txt'
22 | polygon_txt_path = root_path + '/all_data/polygon_txt'
23 | 
24 | image_list = os.listdir(image_path)
25 | 
26 | image_name = [n.split('.')[0] for n in image_list]
27 | 
28 | random.shuffle(image_name)
29 | 
30 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)]
31 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):]
32 | 
33 | image_output_train = os.path.join(root_path, 'trainval/images')
34 | mkdir(image_output_train)
35 | image_output_test = os.path.join(root_path, 'test/images')
36 | mkdir(image_output_test)
37 | 
38 | polygon_txt_output_train = os.path.join(root_path, 'trainval/polygon_txt')
39 | mkdir(polygon_txt_output_train)
40 | polygon_txt_output_test = os.path.join(root_path, 'test/polygon_txt')
41 | mkdir(polygon_txt_output_test)
42 | 
43 | rotation_txt_output_train = os.path.join(root_path, 'trainval/rotation_txt')
44 | mkdir(rotation_txt_output_train)
45 | rotation_txt_output_test = os.path.join(root_path, 'test/rotation_txt')
46 | mkdir(rotation_txt_output_test)
47 | 
48 | 
49 | count = 0
50 | for i in train_image:
51 |     shutil.copy(os.path.join(image_path, i + '.jpg'), os.path.join(image_output_train, 'P{}.jpg'.format(count)))
52 |     shutil.copy(os.path.join(polygon_txt_path, i + '.txt'), os.path.join(polygon_txt_output_train, 'P{}.txt'.format(count)))
53 |     shutil.copy(os.path.join(rotation_txt_path, i + '.txt'), os.path.join(rotation_txt_output_train, 'P{}.txt'.format(count)))
54 |     if count % 10 == 0:
55 |         print("process step {}".format(count))
56 |     count += 1
57 | 
58 | for i in test_image:
59 |     shutil.copy(os.path.join(image_path, i + '.jpg'), os.path.join(image_output_test, 'P{}.jpg'.format(count)))
60 |     shutil.copy(os.path.join(polygon_txt_path, i + '.txt'), os.path.join(polygon_txt_output_test, 'P{}.txt'.format(count)))
61 |     shutil.copy(os.path.join(rotation_txt_path, i + '.txt'), os.path.join(rotation_txt_output_test, 'P{}.txt'.format(count)))
62 |     if count % 10 == 0:
63 |         print("process step {}".format(count))
64 |     count += 1
65 | 
66 | 


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/filter_dota.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | image_dir = '/data/yangxue/dataset/DOTA/val/images/images'
 5 | txt_dir = '/data/yangxue/dataset/DOTA/val/labelTxt/labelTxt'
 6 | 
 7 | save_image_dir = '/data/yangxue/dataset/OHD-SJTU-LARGE/test/images'
 8 | save_txt_dir = '/data/yangxue/dataset/OHD-SJTU-LARGE/test/rotation_txt'
 9 | 
10 | 
11 | class_list = ['plane', 'small-vehicle', 'large-vehicle', 'ship', 'harbor', 'helicopter']
12 | 
13 | all_txt = os.listdir(txt_dir)
14 | 
15 | for t in all_txt:
16 |     fr = open(os.path.join(txt_dir, t), 'r')
17 |     lines = fr.readlines()
18 |     fw = open(os.path.join(save_txt_dir, t), 'w')
19 |     cnt = 0
20 |     for line in lines:
21 |         if len(line.split(' ')) < 9:
22 |             continue
23 | 
24 |         label = line.split(' ')[8]
25 |         if label not in class_list:
26 |             continue
27 | 
28 |         box = [int(xy) for xy in line.split(' ')[:8]]
29 | 
30 |         difficult = line.split(' ')[-1]
31 | 
32 |         new_line = '{} {} {} {} {} {} {} {} {} {} {} {}'.format(box[0], box[1], box[2], box[3],
33 |                                                                 box[4], box[5], box[6], box[7],
34 |                                                                 (box[0] + box[2]) // 2,
35 |                                                                 (box[1] + box[3]) // 2,
36 |                                                                 label, difficult)
37 |         fw.write(new_line)
38 |         cnt += 1
39 |     fw.close()
40 |     fr.close()
41 | 
42 |     if cnt == 0:
43 |         os.remove(os.path.join(save_txt_dir, t))
44 |     else:
45 |         shutil.copy(os.path.join(image_dir, t.replace('.txt', '.png')), os.path.join(save_image_dir, t.replace('.txt', '.jpg')))


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/ohd-sjtu-all-testset.txt:
--------------------------------------------------------------------------------
  1 | P1529
  2 | P0964
  3 | P1007
  4 | P40
  5 | P1598
  6 | P1122
  7 | P0970
  8 | P0019
  9 | P0249
 10 | P0060
 11 | P0199
 12 | P0962
 13 | P0998
 14 | P0168
 15 | P2059
 16 | P0764
 17 | P1266
 18 | P2068
 19 | P1090
 20 | P1604
 21 | P2789
 22 | P1095
 23 | P0564
 24 | P0623
 25 | P2587
 26 | P2429
 27 | P0189
 28 | P2050
 29 | P1940
 30 | P1384
 31 | P1738
 32 | P2608
 33 | P0837
 34 | P2582
 35 | P1283
 36 | P0486
 37 | P1091
 38 | P2126
 39 | P0559
 40 | P2598
 41 | P1212
 42 | P0348
 43 | P0929
 44 | P1178
 45 | P0261
 46 | P2155
 47 | P1398
 48 | P1904
 49 | P0382
 50 | P1982
 51 | P0550
 52 | P2097
 53 | P1219
 54 | P1957
 55 | P1390
 56 | P0420
 57 | P1512
 58 | P0841
 59 | P0712
 60 | P0056
 61 | P0414
 62 | P1742
 63 | P0795
 64 | P0342
 65 | P0003
 66 | P1278
 67 | P0329
 68 | P1257
 69 | P0007
 70 | P2181
 71 | P1825
 72 | P2111
 73 | P2625
 74 | P1478
 75 | P0454
 76 | P1143
 77 | P0004
 78 | P2124
 79 | P0704
 80 | P1128
 81 | P39
 82 | P0796
 83 | P0660
 84 | P1133
 85 | P1315
 86 | P0683
 87 | P2197
 88 | P1088
 89 | P0989
 90 | P0725
 91 | P1860
 92 | P1518
 93 | P0117
 94 | P0128
 95 | P1386
 96 | P1376
 97 | P2198
 98 | P0969
 99 | P2689
100 | P2678
101 | P1023
102 | P0763
103 | P1099
104 | P2236
105 | P1138
106 | P0259
107 | P1137
108 | P1838
109 | P1492
110 | P0961
111 | P1268
112 | P1179
113 | P0476
114 | P0524
115 | P0968
116 | P2082
117 | P0300
118 | P1147
119 | P1051
120 | P1275
121 | P2802
122 | P2027
123 | P0990
124 | P0237
125 | P0647
126 | P2285
127 | P0932
128 | P0816
129 | P0170
130 | P0590
131 | P0557
132 | P1878
133 | P2630
134 | P0547
135 | P1101
136 | P0262
137 | P2218
138 | P1513
139 | P1065
140 | P2239
141 | P0086
142 | P0974
143 | P2617
144 | P1672
145 | P31
146 | P1473
147 | P0173
148 | P1410
149 | P0305
150 | P2610
151 | P0801
152 | P1880
153 | P1156
154 | P2645
155 | P2771
156 | P1601
157 | P0949
158 | P1269
159 | P0684
160 | P1983
161 | P1332
162 | P2781
163 | P1809
164 | P1995
165 | P1134
166 | P2310
167 | P0309
168 | P0347
169 | P1541
170 | P2701
171 | P0385
172 | P2754
173 | P0936
174 | P0613
175 | P0526
176 | P0411
177 | P1476
178 | P34
179 | P0518
180 | P1234
181 | P0665
182 | P37
183 | P0053
184 | P1314
185 | P1397
186 | P0027
187 | P0336
188 | P0352
189 | P1508
190 | P0706
191 | P1452
192 | P0179
193 | P2331
194 | P2599
195 | P0217
196 | P1911
197 | P1066
198 | P1029
199 | P1787
200 | P0643
201 | P36
202 | P0577
203 | P1992
204 | P1474
205 | P2322
206 | P2042
207 | P0206
208 | P38
209 | P0761
210 | P1610
211 | P2271
212 | P1184
213 | P0353
214 | P42
215 | P1005
216 | P0368
217 | P1950
218 | P1903
219 | P0904
220 | P0110
221 | P0787
222 | P1014
223 | P0715
224 | P0791
225 | P2093
226 | P1075
227 | P0161
228 | P1021
229 | P0833
230 | P1342
231 | P0882
232 | P1189
233 | P2242
234 | P1924
235 | P35
236 | P2726
237 | P1623
238 | P32
239 | P0910
240 | P0589
241 | P2709
242 | P0858
243 | P0615
244 | P0374
245 | P0789
246 | P2791
247 | P2166
248 | P2230
249 | P0977
250 | P0375
251 | P2220
252 | P0525
253 | P1958
254 | P1973
255 | P1225
256 | P0945
257 | P1030
258 | P2733
259 | P2779
260 | P2214
261 | P2286
262 | P0583
263 | P1356
264 | P1246
265 | P1213
266 | P0814
267 | P41
268 | P33
269 | P1434
270 | P2294
271 | P2215
272 | P0579
273 | P2003
274 | P1806
275 | P1566
276 | P1149
277 | P0729
278 | P1009
279 | P1022
280 | P0749
281 | P1960
282 | P1483
283 | P1786
284 | P1770
285 | P0887
286 | P2758
287 | P1829
288 | P1749
289 | P0457
290 | P1103
291 | P2778
292 | P2766
293 | P0551
294 | P0654
295 | P1877
296 | P1978
297 | P0622
298 | P0432
299 | P1905
300 | P1373
301 | P2570
302 | P0897
303 | P0604
304 | P0839
305 | P0650
306 | P0213
307 | P2231
308 | P30
309 | P0558
310 | P1154
311 | P0175
312 | P0246
313 | P0331
314 | P0543
315 | P0123
316 | P1854
317 | P1230
318 | P1273
319 | P0104
320 | P0953
321 | P0799
322 | P1242
323 | P2241
324 | P2794
325 | P0257
326 | P1821
327 | P0864
328 | P0194
329 | P0838
330 | P2088
331 | P1429
332 | P1583
333 | P1888
334 | P0696
335 | P0499
336 | 


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/ohd-sjtu-testset.txt:
--------------------------------------------------------------------------------
 1 | P34
 2 | P32
 3 | P30
 4 | P42
 5 | P40
 6 | P31
 7 | P35
 8 | P33
 9 | P41
10 | P38
11 | P37
12 | P39
13 | P36
14 | 


--------------------------------------------------------------------------------
/data/io/OHD-SJTU/vis_op.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | 
 6 | img_path = '/data/yangxue/dataset/OHD-SJTU/all_data/images'
 7 | images_vis_path = '/data/yangxue/dataset/OHD-SJTU/all_data/images_vis'
 8 | txt_path = '/data/yangxue/dataset/OHD-SJTU/all_data/polygon_txt'
 9 | 
10 | 
11 | all_txt = os.listdir(txt_path)
12 | 
13 | 
14 | for t in all_txt:
15 | 
16 |     img = cv2.imread(os.path.join(img_path, t.replace('txt', 'jpg')))
17 |     fr = open(os.path.join(txt_path, t), 'r')
18 |     data = fr.readlines()
19 |     fr.close()
20 |     print(len(data))
21 | 
22 |     for d in data:
23 |         dd = [int(float(xy)) for xy in d.split(' ')[:-1]]
24 | 
25 |         if d.split(' ')[-1] == 'ship\n':
26 |             color = (0, 255, 0)
27 |         else:
28 |             color = (255, 0, 0)
29 |         dd_ = np.array(dd).reshape(-1, 2)
30 |         cv2.polylines(img, [dd_], thickness=3, color=color, isClosed=True)
31 |         cv2.line(img, (dd[0], dd[1]), (dd[0], dd[1]), thickness=10, color=(0, 0, 255))
32 |     cv2.imwrite(os.path.join(images_vis_path, t.replace('txt', 'jpg')), img)
33 | 


--------------------------------------------------------------------------------
/data/io/SSDD++/divide_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | import sys
 4 | sys.path.append('../../')
 5 | import shutil
 6 | import os
 7 | import random
 8 | import math
 9 | 
10 | 
11 | def mkdir(path):
12 |     if not os.path.exists(path):
13 |         os.makedirs(path)
14 | 
15 | 
16 | divide_rate = 0.8
17 | 
18 | root_path = '/data2/yangxue/dataset'
19 | 
20 | image_path = root_path + '/SSDD++/JPEGImages'
21 | xml_path = root_path + '/SSDD++/Annotations'
22 | 
23 | image_list = os.listdir(image_path)
24 | 
25 | image_name = [n.split('.')[0] for n in image_list]
26 | 
27 | random.shuffle(image_name)
28 | 
29 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)]
30 | print('train image number:', len(train_image))
31 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):]
32 | print('test image number:', len(test_image))
33 | 
34 | image_output_train = os.path.join(root_path, 'SSDD++/train/JPEGImages')
35 | mkdir(image_output_train)
36 | image_output_test = os.path.join(root_path, 'SSDD++/test/JPEGImages')
37 | mkdir(image_output_test)
38 | 
39 | xml_train = os.path.join(root_path, 'SSDD++/train/Annotations')
40 | mkdir(xml_train)
41 | xml_test = os.path.join(root_path, 'SSDD++/test/Annotations')
42 | mkdir(xml_test)
43 | 
44 | 
45 | count = 0
46 | for i in train_image:
47 |     shutil.copy(os.path.join(image_path, i + '.jpg'), image_output_train)
48 |     shutil.copy(os.path.join(xml_path, i + '.xml'), xml_train)
49 |     if count % 1000 == 0:
50 |         print("process step {}".format(count))
51 |     count += 1
52 | 
53 | for i in test_image:
54 |     shutil.copy(os.path.join(image_path, i + '.jpg'), image_output_test)
55 |     shutil.copy(os.path.join(xml_path, i + '.xml'), xml_test)
56 |     if count % 1000 == 0:
57 |         print("process step {}".format(count))
58 |     count += 1
59 | 


--------------------------------------------------------------------------------
/data/io/UCAS-AOD/split_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | import sys
 4 | sys.path.append('../../')
 5 | import shutil
 6 | import os
 7 | import random
 8 | import math
 9 | 
10 | 
11 | def mkdir(path):
12 |     if not os.path.exists(path):
13 |         os.makedirs(path)
14 | 
15 | 
16 | divide_rate = 0.7351
17 | 
18 | root_path = '/data/dataset/UCAS-AOD'
19 | 
20 | image_path = root_path + '/images'
21 | xml_path = root_path + '/label-xml'
22 | 
23 | image_list = os.listdir(image_path)
24 | 
25 | image_name = [n.split('.')[0] for n in image_list]
26 | 
27 | random.shuffle(image_name)
28 | 
29 | train_image = image_name[:int(math.ceil(len(image_name)) * divide_rate)]
30 | test_image = image_name[int(math.ceil(len(image_name)) * divide_rate):]
31 | 
32 | image_output_train = os.path.join(root_path, 'VOCdevkit_train/JPEGImages')
33 | mkdir(image_output_train)
34 | image_output_test = os.path.join(root_path, 'VOCdevkit_test/JPEGImages')
35 | mkdir(image_output_test)
36 | 
37 | xml_train = os.path.join(root_path, 'VOCdevkit_train/Annotations')
38 | mkdir(xml_train)
39 | xml_test = os.path.join(root_path, 'VOCdevkit_test/Annotations')
40 | mkdir(xml_test)
41 | 
42 | 
43 | count = 0
44 | for i in train_image:
45 |     shutil.copy(os.path.join(image_path, i + '.png'), image_output_train)
46 |     shutil.copy(os.path.join(xml_path, i + '.xml'), xml_train)
47 |     if count % 100 == 0:
48 |         print("process step {}".format(count))
49 |     count += 1
50 | 
51 | for i in test_image:
52 |     shutil.copy(os.path.join(image_path, i + '.png'), image_output_test)
53 |     shutil.copy(os.path.join(xml_path, i + '.xml'), xml_test)
54 |     if count % 100 == 0:
55 |         print("process step {}".format(count))
56 |     count += 1
57 | 


--------------------------------------------------------------------------------
/data/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/data/io/__init__.py


--------------------------------------------------------------------------------
/data/io/image_preprocess.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import division
 6 | 
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | 
10 | from libs.configs import cfgs
11 | 
12 | 
13 | def max_length_limitation(length, length_limitation):
14 |     return tf.cond(tf.less(length, length_limitation),
15 |                    true_fn=lambda: length,
16 |                    false_fn=lambda: length_limitation)
17 | 
18 | 
19 | def short_side_resize(img_tensor, gtboxes_and_label, target_shortside_len, length_limitation=1200):
20 |     '''
21 | 
22 |     :param img_tensor:[h, w, c], gtboxes_and_label:[-1, 5].  gtboxes: [xmin, ymin, xmax, ymax]
23 |     :param target_shortside_len:
24 |     :param length_limitation: set max length to avoid OUT OF MEMORY
25 |     :return:
26 |     '''
27 |     img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
28 |     new_h, new_w = tf.cond(tf.less(img_h, img_w),
29 |                            true_fn=lambda: (target_shortside_len,
30 |                                             max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
31 |                            false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
32 |                                              target_shortside_len))
33 | 
34 |     img_tensor = tf.expand_dims(img_tensor, axis=0)
35 |     img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
36 | 
37 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
38 | 
39 |     new_xmin, new_ymin = xmin * new_w // img_w, ymin * new_h // img_h
40 |     new_xmax, new_ymax = xmax * new_w // img_w, ymax * new_h // img_h
41 |     img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
42 | 
43 |     return img_tensor, tf.transpose(tf.stack([new_xmin, new_ymin, new_xmax, new_ymax, label], axis=0))
44 | 
45 | 
46 | def short_side_resize_for_inference_data(img_tensor, target_shortside_len, length_limitation=1200, is_resize=True):
47 |     if is_resize:
48 |         img_h, img_w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
49 | 
50 |         new_h, new_w = tf.cond(tf.less(img_h, img_w),
51 |                                true_fn=lambda: (target_shortside_len,
52 |                                                 max_length_limitation(target_shortside_len * img_w // img_h, length_limitation)),
53 |                                false_fn=lambda: (max_length_limitation(target_shortside_len * img_h // img_w, length_limitation),
54 |                                                  target_shortside_len))
55 | 
56 |         img_tensor = tf.expand_dims(img_tensor, axis=0)
57 |         img_tensor = tf.image.resize_bilinear(img_tensor, [new_h, new_w])
58 | 
59 |         img_tensor = tf.squeeze(img_tensor, axis=0)  # ensure image tensor rank is 3
60 |     return img_tensor
61 | 
62 | 
63 | def flip_left_to_right(img_tensor, gtboxes_and_label):
64 | 
65 |     h, w = tf.shape(img_tensor)[0], tf.shape(img_tensor)[1]
66 | 
67 |     img_tensor = tf.image.flip_left_right(img_tensor)
68 | 
69 |     xmin, ymin, xmax, ymax, label = tf.unstack(gtboxes_and_label, axis=1)
70 |     new_xmax = w - xmin
71 |     new_xmin = w - xmax
72 | 
73 |     return img_tensor, tf.transpose(tf.stack([new_xmin, ymin, new_xmax, ymax, label], axis=0))
74 | 
75 | 
76 | def random_flip_left_right(img_tensor, gtboxes_and_label):
77 |     img_tensor, gtboxes_and_label = tf.cond(tf.less(tf.random_uniform(shape=[], minval=0, maxval=1), 0.5),
78 |                                             lambda: flip_left_to_right(img_tensor, gtboxes_and_label),
79 |                                             lambda: (img_tensor, gtboxes_and_label))
80 | 
81 |     return img_tensor,  gtboxes_and_label
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/README.md:
--------------------------------------------------------------------------------
 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet.       
 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF).    
 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9.          
 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing)  
 5 | 3. Path tree of pretrained_weight 
 6 | ```
 7 | ├── pretrained_weight
 8 | │   ├── efficientnet
 9 | │       ├── efficientnet-b0
10 | │           ├── checkpoint
11 | │           ├── model.ckpt.data-00000-of-00001
12 | │           ├── model.ckpt.index
13 | │           ├── model.ckpt.meta
14 | │    ├── mobilenet
15 | │       ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001
16 | │       ├── mobilenet_v1_0.25_128.ckpt.index
17 | │       ├── mobilenet_v1_0.25_128.ckpt.meta
18 | │       ├── mobilenet_v1_0.25_128.tflite
19 | │       ├── mobilenet_v1_0.25_128_eval.pbtxt
20 | │       ├── mobilenet_v1_0.25_128_frozen.pb
21 | │       ├── mobilenet_v1_0.25_128_info.txt
22 | │    ├── resnet_v1_50.ckpt    
23 | │    ├── resnet50_v1d.ckpt.index    
24 | │    ├── resnet50_v1d.ckpt.data-00000-of-00001    
25 | │    ├── resnet50_v1d.ckpt.meta    
26 | ```  
27 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/efficientnet/README.md:
--------------------------------------------------------------------------------
 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet.       
 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF).    
 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9.          
 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing)  
 5 | 3. Path tree of pretrained_weight 
 6 | ```
 7 | ├── pretrained_weight
 8 | │   ├── efficientnet
 9 | │       ├── efficientnet-b0
10 | │           ├── checkpoint
11 | │           ├── model.ckpt.data-00000-of-00001
12 | │           ├── model.ckpt.index
13 | │           ├── model.ckpt.meta
14 | │    ├── mobilenet
15 | │       ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001
16 | │       ├── mobilenet_v1_0.25_128.ckpt.index
17 | │       ├── mobilenet_v1_0.25_128.ckpt.meta
18 | │       ├── mobilenet_v1_0.25_128.tflite
19 | │       ├── mobilenet_v1_0.25_128_eval.pbtxt
20 | │       ├── mobilenet_v1_0.25_128_frozen.pb
21 | │       ├── mobilenet_v1_0.25_128_info.txt
22 | │    ├── resnet_v1_50.ckpt    
23 | │    ├── resnet50_v1d.ckpt.index    
24 | │    ├── resnet50_v1d.ckpt.data-00000-of-00001    
25 | │    ├── resnet50_v1d.ckpt.meta    
26 | ```  
27 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/efficientnet/noisy-student-efficientnet-b1/results.txt:
--------------------------------------------------------------------------------
1 | {'top_1_accuracy': 0.81528, 'top_5_accuracy': 0.95792}
2 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/efficientnet/noisy_student_efficientnet-b0/results.txt:
--------------------------------------------------------------------------------
1 | {'top_1_accuracy': 0.78844, 'top_5_accuracy': 0.9451}
2 | 


--------------------------------------------------------------------------------
/data/pretrained_weights/mobilenet/README.md:
--------------------------------------------------------------------------------
 1 | 1. Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz), [resnet152_v1](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz), [efficientnet](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), [mobilenet_v2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) pre-trained models on Imagenet.       
 2 | 2. **(Recommend in this repo)** Or you can choose to use a better backbone (resnet_v1d), refer to [gluon2TF](https://github.com/yangJirui/gluon2TF).    
 3 | * [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9.          
 4 | * [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing)  
 5 | 3. Path tree of pretrained_weight 
 6 | ```
 7 | ├── pretrained_weight
 8 | │   ├── efficientnet
 9 | │       ├── efficientnet-b0
10 | │           ├── checkpoint
11 | │           ├── model.ckpt.data-00000-of-00001
12 | │           ├── model.ckpt.index
13 | │           ├── model.ckpt.meta
14 | │    ├── mobilenet
15 | │       ├── mobilenet_v1_0.25_128.ckpt.data-00000-of-00001
16 | │       ├── mobilenet_v1_0.25_128.ckpt.index
17 | │       ├── mobilenet_v1_0.25_128.ckpt.meta
18 | │       ├── mobilenet_v1_0.25_128.tflite
19 | │       ├── mobilenet_v1_0.25_128_eval.pbtxt
20 | │       ├── mobilenet_v1_0.25_128_frozen.pb
21 | │       ├── mobilenet_v1_0.25_128_info.txt
22 | │    ├── resnet_v1_50.ckpt    
23 | │    ├── resnet50_v1d.ckpt.index    
24 | │    ├── resnet50_v1d.ckpt.data-00000-of-00001    
25 | │    ├── resnet50_v1d.ckpt.meta    
26 | ```  
27 | 


--------------------------------------------------------------------------------
/demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/demo1.png


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/eval_devkit/poly_nms_gpu/__init__.py


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | # from nms.gpu_nms import gpu_nms
 9 | # from nms.cpu_nms import cpu_nms
10 | from .poly_nms import poly_gpu_nms
11 | def poly_nms_gpu(dets, thresh, force_cpu=False):
12 |     """Dispatch to either CPU or GPU NMS implementations."""
13 | 
14 |     if dets.shape[0] == 0:
15 |         return []
16 |     return poly_gpu_nms(dets, thresh, device_id=0)
17 | 
18 | 


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/poly_nms.hpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by dingjian on 18-5-24.
 3 | //
 4 | 
 5 | #ifndef DOTA_DEVKIT_POLY_NMS_HPP
 6 | #define DOTA_DEVKIT_POLY_NMS_HPP
 7 | 
 8 | 
 9 | void _poly_nms(int* keep_out, int* num_out, const float* polys_host, int polys_num,
10 |             int polys_dim, float nms_overlap_thresh, int device_id);
11 | 
12 | #endif //DOTA_DEVKIT_POLY_NMS_HPP
13 | 


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/poly_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | assert sizeof(int) == sizeof(np.int32_t)
 5 | 
 6 | cdef extern from "poly_nms.hpp":
 7 |     void _poly_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
 8 | 
 9 | def poly_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
10 |             np.int32_t device_id=0):
11 |     cdef int boxes_num = dets.shape[0]
12 |     cdef int boxes_dim = dets.shape[1]
13 |     cdef int num_out
14 |     cdef np.ndarray[np.int32_t, ndim=1] \
15 |         keep = np.zeros(boxes_num, dtype=np.int32)
16 |     cdef np.ndarray[np.float32_t, ndim=1] \
17 |         scores = dets[:, 8]
18 |     cdef np.ndarray[np.int_t, ndim=1] \
19 |         order = scores.argsort()[::-1]
20 |     cdef np.ndarray[np.float32_t, ndim=2] \
21 |         sorted_dets = dets[order, :]
22 |     _poly_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
23 |     keep = keep[:num_out]
24 |     return list(order[keep])
25 | 


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/poly_nms_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/eval_devkit/poly_nms_gpu/poly_nms_test.py


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/poly_overlaps.hpp:
--------------------------------------------------------------------------------
1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id);
2 | 


--------------------------------------------------------------------------------
/eval_devkit/poly_nms_gpu/poly_overlaps.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | cdef extern from "poly_overlaps.hpp":
 5 |     void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int)
 6 | 
 7 | def poly_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0):
 8 |     cdef int N = boxes.shape[0]
 9 |     cdef int K = query_boxes.shape[0]
10 |     cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32)
11 |     _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id)
12 |     return overlaps
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/eval_devkit/polyiou.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by dingjian on 18-2-3.
 3 | //
 4 | 
 5 | #ifndef POLYIOU_POLYIOU_H
 6 | #define POLYIOU_POLYIOU_H
 7 | 
 8 | #include <vector>
 9 | double iou_poly(std::vector<double> p, std::vector<double> q);
10 | #endif //POLYIOU_POLYIOU_H
11 | 


--------------------------------------------------------------------------------
/eval_devkit/polyiou.i:
--------------------------------------------------------------------------------
 1 | %module polyiou
 2 | %include "std_vector.i"
 3 | 
 4 | namespace std {
 5 |     %template(VectorDouble) vector<double>;
 6 | };
 7 | 
 8 | %{
 9 | #define SWIG_FILE_WITH_INIT
10 | #include<cstdio>
11 | #include<iostream>
12 | #include<algorithm>
13 | #include <vector>
14 | 
15 | #include "polyiou.h"
16 | %}
17 | 
18 | %include "polyiou.h"
19 | 
20 | 


--------------------------------------------------------------------------------
/eval_devkit/readme.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 1. install swig
 3 | ```
 4 |     sudo apt-get install swig
 5 | ```
 6 | 2. create the c++ extension for python
 7 | ```
 8 |     swig -c++ -python polyiou.i
 9 |     python setup.py build_ext --inplace
10 | ```
11 | 


--------------------------------------------------------------------------------
/eval_devkit/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     setup.py file for SWIG example
 3 | """
 4 | from distutils.core import setup, Extension
 5 | import numpy
 6 | 
 7 | polyiou_module = Extension('_polyiou',
 8 |                            sources=['polyiou_wrap.cxx', 'polyiou.cpp'],
 9 |                            )
10 | setup(name = 'polyiou',
11 |       version = '0.1',
12 |       author = "SWIG Docs",
13 |       description = """Simple swig example from docs""",
14 |       ext_modules = [polyiou_module],
15 |       py_modules = ["polyiou"],
16 | )
17 | 


--------------------------------------------------------------------------------
/feature_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/feature_vis.png


--------------------------------------------------------------------------------
/help_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/help_utils/__init__.py


--------------------------------------------------------------------------------
/help_utils/smooth_label.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | import numpy as np
 5 | import math
 6 | 
 7 | 
 8 | def gaussian_label(label, num_class, u=0, sig=4.0):
 9 |     x = np.array(range(math.floor(-num_class/2), math.ceil(num_class/2), 1))
10 |     y_sig = np.exp(-(x - u) ** 2 / (2 * sig ** 2))
11 |     return np.concatenate([y_sig[math.ceil(num_class/2)-label:],
12 |                            y_sig[:math.ceil(num_class/2)-label]], axis=0)
13 | 
14 | 
15 | def rectangular_label(label, num_class, raduius=4):
16 |     x = np.zeros([num_class])
17 |     x[:raduius+1] = 1
18 |     x[-raduius:] = 1
19 |     y_sig = np.concatenate([x[-label:], x[:-label]], axis=0)
20 |     return y_sig
21 | 
22 | 
23 | def pulse_label(label, num_class):
24 |     x = np.zeros([num_class])
25 |     x[label] = 1
26 |     return x
27 | 
28 | 
29 | def triangle_label(label, num_class, raduius=4):
30 |     y_sig = np.zeros([num_class])
31 |     x = np.array(range(raduius+1))
32 |     y = -1/(raduius+1) * x + 1
33 |     y_sig[:raduius+1] = y
34 |     y_sig[-raduius:] = y[-1:0:-1]
35 | 
36 |     return np.concatenate([y_sig[-label:], y_sig[:-label]], axis=0)
37 | 
38 | 
39 | def get_all_smooth_label(num_label, label_type=0, raduius=4):
40 |     all_smooth_label = []
41 | 
42 |     if label_type == 0:
43 |         for i in range(num_label):
44 |             all_smooth_label.append(gaussian_label(i, num_label, sig=raduius))
45 |     elif label_type == 1:
46 |         for i in range(num_label):
47 |             all_smooth_label.append(rectangular_label(i, num_label, raduius=raduius))
48 |     elif label_type == 2:
49 |         for i in range(num_label):
50 |             all_smooth_label.append(pulse_label(i, num_label))
51 |     elif label_type == 3:
52 |         for i in range(num_label):
53 |             all_smooth_label.append(triangle_label(i, num_label, raduius=raduius))
54 |     else:
55 |         raise Exception('Only support gaussian, rectangular, triangle and pulse label')
56 |     return np.array(all_smooth_label)
57 | 
58 | 
59 | def angle_smooth_label(angle_label, angle_range=90, label_type=0, raduius=4, omega=1):
60 |     """
61 |     :param angle_label: [-90,0) or [-90, 0)
62 |     :param angle_range: 90 or 180
63 |     :return:
64 |     """
65 | 
66 |     assert angle_range % omega == 0, 'wrong omega'
67 | 
68 |     angle_range /= omega
69 |     angle_label /= omega
70 | 
71 |     angle_label = np.array(-np.round(angle_label), np.int32)
72 |     all_smooth_label = get_all_smooth_label(int(angle_range), label_type, raduius)
73 |     inx = angle_label == angle_range
74 |     angle_label[inx] = angle_range - 1
75 |     smooth_label = all_smooth_label[angle_label]
76 |     return np.array(smooth_label, np.float32)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     import matplotlib.pyplot as plt
81 | 
82 |     # angle_label = np.array([-89.9, -45.2, -0.3, -1.9])
83 |     # smooth_label = angle_smooth_label(angle_label)
84 | 
85 |     # y_sig = triangle_label(30, 180, raduius=8)
86 |     # y_sig = gaussian_label(30, 180, sig=0.1)
87 |     # y_sig = pulse_label(30, 180)
88 |     y_sig = triangle_label(0, 90)
89 |     x = np.array(range(0, 90, 1))
90 |     plt.plot(x, y_sig, "r-", linewidth=2)
91 |     plt.grid(True)
92 |     plt.show()
93 |     print(y_sig)
94 | 


--------------------------------------------------------------------------------
/help_utils/tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function, absolute_import
 3 | import math
 4 | import sys
 5 | import os
 6 | import cv2
 7 | 
 8 | from libs.configs import cfgs
 9 | 
10 | 
11 | def view_bar(message, num, total):
12 |     rate = num / total
13 |     rate_num = int(rate * 40)
14 |     rate_nums = math.ceil(rate * 100)
15 |     r = '\r%s:[%s%s]%d%%\t%d/%d' % (message, ">" * rate_num, " " * (40 - rate_num), rate_nums, num, total,)
16 |     sys.stdout.write(r)
17 |     sys.stdout.flush()
18 | 
19 | 
20 | def mkdir(path):
21 |     if not os.path.exists(path):
22 |         os.makedirs(path)
23 | 
24 | 
25 | def get_feature_map_size(src_len):
26 |     feature_map_size = []
27 |     src_len /= 2 ** (int(cfgs.LEVEL[0][-1])-1)
28 |     for _ in range(len(cfgs.LEVEL)):
29 |         src_len = math.ceil(src_len / 2)
30 |         feature_map_size.append((src_len, src_len))
31 | 
32 |     return feature_map_size
33 | 
34 | 
35 | def get_dota_short_names(label):
36 |     DOTA_SHORT_NAMES = {
37 |         'roundabout': 'RA',
38 |         'tennis-court': 'TC',
39 |         'swimming-pool': 'SP',
40 |         'storage-tank': 'ST',
41 |         'soccer-ball-field': 'SBF',
42 |         'small-vehicle': 'SV',
43 |         'ship': 'SH',
44 |         'plane': 'PL',
45 |         'large-vehicle': 'LV',
46 |         'helicopter': 'HC',
47 |         'harbor': 'HA',
48 |         'ground-track-field': 'GTF',
49 |         'bridge': 'BR',
50 |         'basketball-court': 'BC',
51 |         'baseball-diamond': 'BD'
52 |     }
53 | 
54 |     return DOTA_SHORT_NAMES[label]
55 | 
56 | 
57 | def read_dota_gt_and_vis(img, gt_txt):
58 |     txt_data = open(gt_txt, 'r').readlines()
59 |     for i in txt_data:
60 |         if len(i.split(' ')) < 9:
61 |             continue
62 | 
63 |         gt_box = [int(xy) for xy in i.split(' ')[:8]]
64 |         # gt_label = i.split(' ')[8]
65 |         cv2.line(img, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), color=(0, 0, 255), thickness=3)
66 |         cv2.line(img, (gt_box[2], gt_box[3]), (gt_box[4], gt_box[5]), color=(0, 0, 255), thickness=3)
67 |         cv2.line(img, (gt_box[4], gt_box[5]), (gt_box[6], gt_box[7]), color=(0, 0, 255), thickness=3)
68 |         cv2.line(img, (gt_box[6], gt_box[7]), (gt_box[0], gt_box[1]), color=(0, 0, 255), thickness=3)
69 |     return img
70 | 


--------------------------------------------------------------------------------
/images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/images.png


--------------------------------------------------------------------------------
/libs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/.DS_Store


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/anchor_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | import tensorflow as tf
 5 | import sys
 6 | sys.path.append('../..')
 7 | 
 8 | from libs.configs import cfgs
 9 | 
10 | 
11 | def make_anchors(base_anchor_size, anchor_scales, anchor_ratios,
12 |                  featuremap_height, featuremap_width,
13 |                  stride, name='make_anchors'):
14 |     '''
15 |     :param base_anchor_size:256
16 |     :param anchor_scales:
17 |     :param anchor_ratios:
18 |     :param featuremap_height:
19 |     :param featuremap_width:
20 |     :param stride:
21 |     :return:
22 |     '''
23 |     with tf.variable_scope(name):
24 |         base_anchor = tf.constant([0, 0, base_anchor_size, base_anchor_size], tf.float32)  # [x_center, y_center, w, h]
25 | 
26 |         ws, hs = enum_ratios(enum_scales(base_anchor, anchor_scales),
27 |                              anchor_ratios)  # per locations ws and hs
28 | 
29 |         x_centers = tf.range(featuremap_width, dtype=tf.float32) * stride
30 |         y_centers = tf.range(featuremap_height, dtype=tf.float32) * stride
31 | 
32 |         if cfgs.USE_CENTER_OFFSET:
33 |             x_centers += stride / 2.
34 |             y_centers += stride / 2.
35 | 
36 |         x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
37 | 
38 |         ws, x_centers = tf.meshgrid(ws, x_centers)
39 |         hs, y_centers = tf.meshgrid(hs, y_centers)
40 | 
41 |         anchor_centers = tf.stack([x_centers, y_centers], 2)
42 |         anchor_centers = tf.reshape(anchor_centers, [-1, 2])
43 | 
44 |         box_sizes = tf.stack([ws, hs], axis=2)
45 |         box_sizes = tf.reshape(box_sizes, [-1, 2])
46 |         # anchors = tf.concat([anchor_centers, box_sizes], axis=1)
47 |         anchors = tf.concat([anchor_centers - 0.5*box_sizes,
48 |                              anchor_centers + 0.5*box_sizes], axis=1)
49 |         return anchors
50 | 
51 | 
52 | def enum_scales(base_anchor, anchor_scales):
53 | 
54 |     anchor_scales = base_anchor * tf.constant(anchor_scales, dtype=tf.float32, shape=(len(anchor_scales), 1))
55 | 
56 |     return anchor_scales
57 | 
58 | 
59 | def enum_ratios(anchors, anchor_ratios):
60 |     '''
61 |     ratio = h /w
62 |     :param anchors:
63 |     :param anchor_ratios:
64 |     :return:
65 |     '''
66 |     ws = anchors[:, 2]  # for base anchor: w == h
67 |     hs = anchors[:, 3]
68 |     sqrt_ratios = tf.sqrt(tf.constant(anchor_ratios))
69 | 
70 |     ws = tf.reshape(ws / sqrt_ratios[:, tf.newaxis], [-1, 1])
71 |     hs = tf.reshape(hs * sqrt_ratios[:, tf.newaxis], [-1, 1])
72 | 
73 |     return ws, hs
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     import os
78 |     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
79 |     base_anchor_size = 32
80 |     anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
81 |     anchor_ratios = [0.5, 2.0, 1.0]
82 |     anchors = make_anchors(base_anchor_size=base_anchor_size, anchor_ratios=anchor_ratios,
83 |                            anchor_scales=anchor_scales,
84 |                            featuremap_width=512,
85 |                            featuremap_height=512,
86 |                            stride=8)
87 |     init = tf.global_variables_initializer()
88 |     with tf.Session() as sess:
89 |         sess.run(init)
90 |         anchor_result = sess.run(anchors)
91 |     print(anchor_result[:10])
92 | 
93 | 


--------------------------------------------------------------------------------
/libs/box_utils/boxes_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | def ious_calu(boxes_1, boxes_2):
 11 |     '''
 12 | 
 13 |     :param boxes_1: [N, 4] [xmin, ymin, xmax, ymax]
 14 |     :param boxes_2: [M, 4] [xmin, ymin. xmax, ymax]
 15 |     :return:
 16 |     '''
 17 |     boxes_1 = tf.cast(boxes_1, tf.float32)
 18 |     boxes_2 = tf.cast(boxes_2, tf.float32)
 19 |     xmin_1, ymin_1, xmax_1, ymax_1 = tf.split(boxes_1, 4, axis=1)  # xmin_1 shape is [N, 1]..
 20 |     xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1)  # xmin_2 shape is [M, ]..
 21 | 
 22 |     max_xmin = tf.maximum(xmin_1, xmin_2)
 23 |     min_xmax = tf.minimum(xmax_1, xmax_2)
 24 | 
 25 |     max_ymin = tf.maximum(ymin_1, ymin_2)
 26 |     min_ymax = tf.minimum(ymax_1, ymax_2)
 27 | 
 28 |     overlap_h = tf.maximum(0., min_ymax - max_ymin)  # avoid h < 0
 29 |     overlap_w = tf.maximum(0., min_xmax - max_xmin)
 30 | 
 31 |     overlaps = overlap_h * overlap_w
 32 | 
 33 |     area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
 34 |     area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
 35 | 
 36 |     ious = overlaps / (area_1 + area_2 - overlaps)
 37 | 
 38 |     return ious
 39 | 
 40 | 
 41 | def clip_boxes_to_img_boundaries(boxes, img_shape):
 42 |     '''
 43 | 
 44 |     :param decode_boxes:
 45 |     :return: decode boxes, and already clip to boundaries
 46 |     '''
 47 | 
 48 |     with tf.name_scope('clip_boxes_to_img_boundaries'):
 49 | 
 50 |         # xmin, ymin, xmax, ymax = tf.unstack(decode_boxes, axis=1)
 51 |         xmin = boxes[:, 0]
 52 |         ymin = boxes[:, 1]
 53 |         xmax = boxes[:, 2]
 54 |         ymax = boxes[:, 3]
 55 |         img_h, img_w = img_shape[1], img_shape[2]
 56 | 
 57 |         img_h, img_w = tf.cast(img_h, tf.float32), tf.cast(img_w, tf.float32)
 58 | 
 59 |         xmin = tf.maximum(tf.minimum(xmin, img_w-1.), 0.)
 60 |         ymin = tf.maximum(tf.minimum(ymin, img_h-1.), 0.)
 61 | 
 62 |         xmax = tf.maximum(tf.minimum(xmax, img_w-1.), 0.)
 63 |         ymax = tf.maximum(tf.minimum(ymax, img_h-1.), 0.)
 64 | 
 65 |         return tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
 66 | 
 67 | 
 68 | def filter_outside_boxes(boxes, img_h, img_w):
 69 |     '''
 70 |     :param anchors:boxes with format [xmin, ymin, xmax, ymax]
 71 |     :param img_h: height of image
 72 |     :param img_w: width of image
 73 |     :return: indices of anchors that inside the image boundary
 74 |     '''
 75 | 
 76 |     with tf.name_scope('filter_outside_boxes'):
 77 |         xmin, ymin, xmax, ymax = tf.unstack(boxes, axis=1)
 78 | 
 79 |         xmin_index = tf.greater_equal(xmin, 0)
 80 |         ymin_index = tf.greater_equal(ymin, 0)
 81 |         xmax_index = tf.less_equal(xmax, tf.cast(img_w, tf.float32))
 82 |         ymax_index = tf.less_equal(ymax, tf.cast(img_h, tf.float32))
 83 | 
 84 |         indices = tf.transpose(tf.stack([xmin_index, ymin_index, xmax_index, ymax_index]))
 85 |         indices = tf.cast(indices, dtype=tf.int32)
 86 |         indices = tf.reduce_sum(indices, axis=1)
 87 |         indices = tf.where(tf.equal(indices, 4))
 88 |         # indices = tf.equal(indices, 4)
 89 |         return tf.reshape(indices, [-1])
 90 | 
 91 | 
 92 | def padd_boxes_with_zeros(boxes, scores, max_num_of_boxes):
 93 | 
 94 |     '''
 95 |     num of boxes less than max num of boxes, so it need to pad with zeros[0, 0, 0, 0]
 96 |     :param boxes:
 97 |     :param scores: [-1]
 98 |     :param max_num_of_boxes:
 99 |     :return:
100 |     '''
101 | 
102 |     pad_num = tf.cast(max_num_of_boxes, tf.int32) - tf.shape(boxes)[0]
103 | 
104 |     zero_boxes = tf.zeros(shape=[pad_num, 4], dtype=boxes.dtype)
105 |     zero_scores = tf.zeros(shape=[pad_num], dtype=scores.dtype)
106 | 
107 |     final_boxes = tf.concat([boxes, zero_boxes], axis=0)
108 | 
109 |     final_scores = tf.concat([scores, zero_scores], axis=0)
110 | 
111 |     return final_boxes, final_scores


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | clean:
5 | 	rm -rf */*.pyc
6 | 	rm -rf */*.so
7 | 


--------------------------------------------------------------------------------
/libs/box_utils/cython_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/cython_utils/__init__.py


--------------------------------------------------------------------------------
/libs/box_utils/iou.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | 
 11 | 
 12 | def iou_calculate(boxes_1, boxes_2):
 13 | 
 14 |     with tf.name_scope('iou_caculate'):
 15 | 
 16 |         xmin_1, ymin_1, xmax_1, ymax_1 = tf.unstack(boxes_1, axis=1)  # ymin_1 shape is [N, 1]..
 17 | 
 18 |         xmin_2, ymin_2, xmax_2, ymax_2 = tf.unstack(boxes_2, axis=1)  # ymin_2 shape is [M, ]..
 19 | 
 20 |         max_xmin = tf.maximum(xmin_1, xmin_2)
 21 |         min_xmax = tf.minimum(xmax_1, xmax_2)
 22 | 
 23 |         max_ymin = tf.maximum(ymin_1, ymin_2)
 24 |         min_ymax = tf.minimum(ymax_1, ymax_2)
 25 | 
 26 |         overlap_h = tf.maximum(0., min_ymax - max_ymin)  # avoid h < 0
 27 |         overlap_w = tf.maximum(0., min_xmax - max_xmin)
 28 | 
 29 |         overlaps = overlap_h * overlap_w
 30 | 
 31 |         area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
 32 |         area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
 33 | 
 34 |         iou = overlaps / (area_1 + area_2 - overlaps)
 35 | 
 36 |         return iou
 37 | 
 38 | 
 39 | def iou_calculate_np(boxes_1, boxes_2):
 40 |     xmin_1, ymin_1, xmax_1, ymax_1 = np.split(boxes_1, 4, axis=1)
 41 |     # xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3]
 42 | 
 43 |     xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3]
 44 | 
 45 |     max_xmin = np.maximum(xmin_1, xmin_2)
 46 |     min_xmax = np.minimum(xmax_1, xmax_2)
 47 | 
 48 |     max_ymin = np.maximum(ymin_1, ymin_2)
 49 |     min_ymax = np.minimum(ymax_1, ymax_2)
 50 | 
 51 |     overlap_h = np.maximum(0., min_ymax - max_ymin)  # avoid h < 0
 52 |     overlap_w = np.maximum(0., min_xmax - max_xmin)
 53 | 
 54 |     overlaps = overlap_h * overlap_w
 55 | 
 56 |     area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
 57 |     area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
 58 | 
 59 |     iou = overlaps / (area_1 + area_2 - overlaps)
 60 | 
 61 |     return iou
 62 | 
 63 | 
 64 | def iou_calculate1(boxes_1, boxes_2):
 65 | 
 66 |     xmin_1, ymin_1, xmax_1, ymax_1 = boxes_1[:, 0], boxes_1[:, 1], boxes_1[:, 2], boxes_1[:, 3]
 67 | 
 68 |     xmin_2, ymin_2, xmax_2, ymax_2 = boxes_2[:, 0], boxes_2[:, 1], boxes_2[:, 2], boxes_2[:, 3]
 69 | 
 70 |     max_xmin = np.maximum(xmin_1, xmin_2)
 71 |     min_xmax = np.minimum(xmax_1, xmax_2)
 72 | 
 73 |     max_ymin = np.maximum(ymin_1, ymin_2)
 74 |     min_ymax = np.minimum(ymax_1, ymax_2)
 75 | 
 76 |     overlap_h = np.maximum(0., min_ymax - max_ymin)  # avoid h < 0
 77 |     overlap_w = np.maximum(0., min_xmax - max_xmin)
 78 | 
 79 |     overlaps = overlap_h * overlap_w
 80 | 
 81 |     area_1 = (xmax_1 - xmin_1) * (ymax_1 - ymin_1)  # [N, 1]
 82 |     area_2 = (xmax_2 - xmin_2) * (ymax_2 - ymin_2)  # [M, ]
 83 | 
 84 |     iou = overlaps / (area_1 + area_2 - overlaps)
 85 | 
 86 |     return iou
 87 | 
 88 | 
 89 | if __name__ == '__main__':
 90 |     import os
 91 |     os.environ["CUDA_VISIBLE_DEVICES"] = '13'
 92 |     boxes1 = np.array([[50, 50, 100, 300],
 93 |                        [60, 60, 100, 200]], np.float32)
 94 | 
 95 |     boxes2 = np.array([[50, 50, 100, 300],
 96 |                        [200, 200, 100, 200]], np.float32)
 97 | 
 98 |     print(iou_calculate_np(boxes1, boxes2))
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/libs/box_utils/iou_cpu.pyx:
--------------------------------------------------------------------------------
 1 | # written by yjr
 2 | 
 3 | cimport cython
 4 | import numpy as np 
 5 | cimport numpy as np
 6 | import cv2
 7 | import time
 8 | 
 9 | DTYPE = np.float32
10 | ctypedef np.float32_t DTYPE_t
11 | ctypedef bint BOOL
12 | 
13 | cdef DTYPE_t two_boxes_iou(np.ndarray[DTYPE_t, ndim=1] rectangle_1, np.ndarray[DTYPE_t, ndim=1] rectangle_2):
14 | 
15 |     """
16 | 	calu rectangle_1 and rectangle_2 iou
17 |     :param rectangle_1: [x, y, w, h, theta]. shape: (5, )
18 |     :param rectangle_2:
19 |     :return:
20 |     """
21 |     cdef DTYPE_t area1 = rectangle_1[2] * rectangle_1[3]
22 |     cdef DTYPE_t area2 = rectangle_2[2] * rectangle_2[3]
23 | 
24 |     rect_1 = ((rectangle_1[0], rectangle_1[1]), (rectangle_1[3], rectangle_1[2]), rectangle_1[-1])
25 |     rect_2 = ((rectangle_2[0], rectangle_2[1]), (rectangle_2[3], rectangle_2[2]), rectangle_2[-1])
26 | 
27 |     inter_points = cv2.rotatedRectangleIntersection(rect_1, rect_2)[1]
28 | 
29 |     cdef np.ndarray[DTYPE_t, ndim=3] order_points
30 |     cdef float inter_area, iou
31 |     if inter_points is not None:
32 |         order_points = cv2.convexHull(inter_points, returnPoints=True)
33 | 
34 |         inter_area = cv2.contourArea(order_points)
35 |         if area1 + area2 == inter_area:
36 |             print ("area1-->", area1)
37 |             print ("area2-->", area2)
38 |             print ("inter_area-->", inter_area)
39 |         iou = inter_area *1.0 / (area1 + area2 - inter_area)
40 |         return <DTYPE_t> iou
41 |     else:
42 |         return <DTYPE_t> 0.0
43 | 
44 | cpdef np.ndarray[DTYPE_t, ndim=2] get_iou_matrix(
45 |     np.ndarray[DTYPE_t, ndim=2] boxes1, # (N, 5)
46 |     np.ndarray[DTYPE_t, ndim=2] boxes2): # (M, 5)
47 |     
48 |     cdef unsigned int num_of_boxes1 = boxes1.shape[0]
49 |     cdef unsigned int num_of_boxes2 = boxes2.shape[0]
50 | 
51 |     cdef np.ndarray[DTYPE_t, ndim=2] iou_matrix = np.zeros((num_of_boxes1, num_of_boxes2), dtype=DTYPE)
52 |     # cdef DTYPE_t box_iou 
53 |     cdef unsigned int n, m
54 |     # st = time.time()
55 |     for n in range(num_of_boxes1):
56 |         for m in range(num_of_boxes2):
57 | 
58 |             iou_matrix[n, m] = two_boxes_iou(boxes1[n], boxes2[m])
59 |     # print "iou_matrix cost time: ", time.time() - st
60 |     return iou_matrix
61 | 
62 | 


--------------------------------------------------------------------------------
/libs/box_utils/mask_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | import numpy as np
 5 | import tfplot as tfp
 6 | import cv2
 7 | 
 8 | 
 9 | def make_gt_mask(fet_h, fet_w, img_h, img_w, gtboxes):
10 |     '''
11 |     :param fet_h:
12 |     :param fet_w:
13 |     :param img_h:
14 |     :param img_w:
15 |     :param gtboxes: [xmin, ymin, xmax, ymax, label]. shape is (N, 5)
16 |     :return:
17 |     '''
18 |     gtboxes = np.reshape(gtboxes, [-1, 5])
19 |     # xmin, ymin, xmax, ymax, label = gtboxes[:, 0], gtboxes[:, 1], gtboxes[:, 2], gtboxes[:, 3], gtboxes[:, 4]
20 | 
21 |     areas = (gtboxes[:, 2]-gtboxes[:, 0])*(gtboxes[:, 3]-gtboxes[:, 1])
22 |     arg_areas = np.argsort(-1*areas)  # sort from large to small
23 |     gtboxes = gtboxes[arg_areas]
24 | 
25 |     fet_h, fet_w = int(fet_h), int(fet_w)
26 |     mask = np.zeros(shape=[fet_h, fet_w], dtype=np.int32)
27 |     for a_box in gtboxes:
28 |         xmin, ymin, xmax, ymax, label = a_box[0], a_box[1], a_box[2], a_box[3], a_box[4]
29 | 
30 |         new_xmin, new_ymin, new_xmax, new_ymax = int(xmin*fet_w/float(img_w)), int(ymin*fet_h/float(img_h)),\
31 |                                                  int(xmax*fet_w/float(img_w)), int(ymax*fet_h/float(img_h))
32 | 
33 |         new_xmin, new_ymin = max(0, new_xmin), max(0, new_ymin)
34 |         new_xmax, new_ymax = min(fet_w, new_xmax), min(fet_h, new_ymax)
35 | 
36 |         mask[new_ymin:new_ymax, new_xmin:new_xmax] = np.int32(label)
37 |     return mask
38 | 
39 | 
40 | def make_r_gt_mask(fet_h, fet_w, img_h, img_w, gtboxes):
41 |     gtboxes = np.reshape(gtboxes, [-1, 6])  # [x, y, w, h, theta, label]
42 | 
43 |     areas = gtboxes[:, 2] * gtboxes[:, 3]
44 |     arg_areas = np.argsort(-1 * areas)  # sort from large to small
45 |     gtboxes = gtboxes[arg_areas]
46 | 
47 |     fet_h, fet_w = int(fet_h), int(fet_w)
48 |     mask = np.zeros(shape=[fet_h, fet_w], dtype=np.int32)
49 |     for a_box in gtboxes:
50 |         # print(a_box)
51 |         box = cv2.boxPoints(((a_box[0], a_box[1]), (a_box[2], a_box[3]), a_box[4]))
52 |         box = np.reshape(box, [-1, ])
53 |         label = a_box[-1]
54 |         new_box = []
55 |         for i in range(8):
56 |             if i % 2 == 0:
57 |                 x = box[i]
58 |                 new_x = int(x * fet_w / float(img_w))
59 |                 new_box.append(new_x)
60 |             else:
61 |                 y = box[i]
62 |                 new_y = int(y*fet_h/float(img_h))
63 |                 new_box.append(new_y)
64 | 
65 |         new_box = np.int0(new_box).reshape([4, 2])
66 |         color = int(label)
67 |         # print(type(color), color)
68 |         cv2.fillConvexPoly(mask, new_box, color=color)
69 |     # print (mask.dtype)
70 |     return mask
71 | 
72 | 
73 | def vis_mask_tfsmry(mask, name):
74 |     '''
75 |     :param mask:[H, W]. It's a tensor, not array
76 |     :return:
77 |     '''
78 | 
79 |     def figure_attention(activation):
80 |         fig, ax = tfp.subplots()
81 |         im = ax.imshow(activation, cmap='jet')
82 |         fig.colorbar(im)
83 |         return fig
84 | 
85 |     heatmap = mask*10
86 | 
87 |     tfp.summary.plot(name, figure_attention, [heatmap])


--------------------------------------------------------------------------------
/libs/box_utils/rbbox_overlaps.hpp:
--------------------------------------------------------------------------------
1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id);
2 | 
3 | 


--------------------------------------------------------------------------------
/libs/box_utils/rbbox_overlaps.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | cdef extern from "rbbox_overlaps.hpp":
 5 |     void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int)
 6 | 
 7 | def rbbx_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0):
 8 |     # boxes: [x, y, w, h, theta]
 9 |     cdef int N = boxes.shape[0]
10 |     cdef int K = query_boxes.shape[0]
11 |     cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32)
12 |     _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id)
13 |     return overlaps
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/libs/box_utils/rotate_anchors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/box_utils/rotate_anchors.jpg


--------------------------------------------------------------------------------
/libs/box_utils/rotate_gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/libs/box_utils/rotate_polygon_nms.hpp:
--------------------------------------------------------------------------------
1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/libs/box_utils/rotate_polygon_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | assert sizeof(int) == sizeof(np.int32_t)
 5 | 
 6 | cdef extern from "rotate_gpu_nms.hpp":
 7 |     void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
 8 | 
 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float_t thresh, np.int32_t device_id=0):
10 |     cdef int boxes_num = dets.shape[0]
11 |     cdef int boxes_dim = dets.shape[1]
12 |     cdef int num_out
13 |     cdef np.ndarray[np.int32_t, ndim=1] \
14 |         keep = np.zeros(boxes_num, dtype=np.int32)
15 |     cdef np.ndarray[np.float32_t, ndim=1] \
16 |         scores = dets[:, 5]
17 |     cdef np.ndarray[np.int_t, ndim=1] \
18 |         order = scores.argsort()[::-1]
19 |     cdef np.ndarray[np.float32_t, ndim=2] \
20 |         sorted_dets = dets[order, :]
21 |     thresh = <np.float>thresh
22 |     _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
23 |     keep = keep[:num_out]
24 |     return order[keep]
25 | 


--------------------------------------------------------------------------------
/libs/box_utils/show_box_in_tensor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from libs.box_utils import draw_box_in_img
10 | 
11 | 
12 | def only_draw_boxes(img_batch, boxes, method, head=None, is_csl=False):
13 | 
14 |     boxes = tf.stop_gradient(boxes)
15 |     img_tensor = tf.squeeze(img_batch, 0)
16 |     img_tensor = tf.cast(img_tensor, tf.float32)
17 |     labels = tf.ones(shape=(tf.shape(boxes)[0], ), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES
18 |     scores = tf.zeros_like(labels, dtype=tf.float32)
19 | 
20 |     if head is None:
21 |         head = tf.ones_like(scores) * -1
22 | 
23 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
24 |                                        inp=[img_tensor, boxes, labels, scores, method, head, is_csl],
25 |                                        Tout=tf.uint8)
26 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))  # [batch_size, h, w, c]
27 | 
28 |     return img_tensor_with_boxes
29 | 
30 | 
31 | def draw_boxes_with_scores(img_batch, boxes, scores, method, head, is_csl=False):
32 | 
33 |     if head is None:
34 |         head = tf.ones_like(scores) * -1
35 | 
36 |     boxes = tf.stop_gradient(boxes)
37 |     scores = tf.stop_gradient(scores)
38 | 
39 |     img_tensor = tf.squeeze(img_batch, 0)
40 |     img_tensor = tf.cast(img_tensor, tf.float32)
41 |     labels = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.int32) * draw_box_in_img.ONLY_DRAW_BOXES_WITH_SCORES
42 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
43 |                                        inp=[img_tensor, boxes, labels, scores, method, head, is_csl],
44 |                                        Tout=[tf.uint8])
45 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
46 |     return img_tensor_with_boxes
47 | 
48 | 
49 | def draw_boxes_with_categories(img_batch, boxes, labels, method, head=None, is_csl=False):
50 | 
51 |     if head is None:
52 |         head = tf.ones_like(labels) * -1
53 | 
54 |     boxes = tf.stop_gradient(boxes)
55 | 
56 |     img_tensor = tf.squeeze(img_batch, 0)
57 |     img_tensor = tf.cast(img_tensor, tf.float32)
58 |     scores = tf.ones(shape=(tf.shape(boxes)[0],), dtype=tf.float32)
59 | 
60 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
61 |                                        inp=[img_tensor, boxes, labels, scores, method, head, is_csl],
62 |                                        Tout=[tf.uint8])
63 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
64 |     return img_tensor_with_boxes
65 | 
66 | 
67 | def draw_boxes_with_categories_and_scores(img_batch, boxes, labels, scores, method, head=None, is_csl=False):
68 | 
69 |     if head is None:
70 |         head = tf.ones_like(labels) * -1
71 | 
72 |     boxes = tf.stop_gradient(boxes)
73 |     scores = tf.stop_gradient(scores)
74 | 
75 |     img_tensor = tf.squeeze(img_batch, 0)
76 |     img_tensor = tf.cast(img_tensor, tf.float32)
77 |     img_tensor_with_boxes = tf.py_func(draw_box_in_img.draw_boxes_with_label_and_scores,
78 |                                        inp=[img_tensor, boxes, labels, scores, method, head, is_csl],
79 |                                        Tout=[tf.uint8])
80 |     img_tensor_with_boxes = tf.reshape(img_tensor_with_boxes, tf.shape(img_batch))
81 |     return img_tensor_with_boxes
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     print (1)
86 | 
87 | 


--------------------------------------------------------------------------------
/libs/box_utils/tf_ops.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | '''
 8 | all of these ops are derived from tenosrflow Object Detection API
 9 | '''
10 | def indices_to_dense_vector(indices,
11 |                             size,
12 |                             indices_value=1.,
13 |                             default_value=0,
14 |                             dtype=tf.float32):
15 |   """Creates dense vector with indices set to specific (the para "indices_value" ) and rest to zeros.
16 | 
17 |   This function exists because it is unclear if it is safe to use
18 |     tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
19 |   with indices which are not ordered.
20 |   This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
21 | 
22 |   Args:
23 |     indices: 1d Tensor with integer indices which are to be set to
24 |         indices_values.
25 |     size: scalar with size (integer) of output Tensor.
26 |     indices_value: values of elements specified by indices in the output vector
27 |     default_value: values of other elements in the output vector.
28 |     dtype: data type.
29 | 
30 |   Returns:
31 |     dense 1D Tensor of shape [size] with indices set to indices_values and the
32 |         rest set to default_value.
33 |   """
34 |   size = tf.to_int32(size)
35 |   zeros = tf.ones([size], dtype=dtype) * default_value
36 |   values = tf.ones_like(indices, dtype=dtype) * indices_value
37 | 
38 |   return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
39 |                            [zeros, values])


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/__init__.py


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/baseline/__init__.py


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/baseline/cfgs_res50_dota_win_v19.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | v4 + windows version
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_DOTA_1x_20200607'
 14 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "0"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 27000
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | 
 47 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 48 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 49 | 
 50 | CLS_WEIGHT = 1.0
 51 | REG_WEIGHT = 1.0 / 5.0
 52 | REG_LOSS_MODE = None
 53 | 
 54 | BATCH_SIZE = 1
 55 | EPSILON = 1e-5
 56 | MOMENTUM = 0.9
 57 | LR = 5e-4
 58 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 59 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 60 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 61 | 
 62 | # -------------------------------------------- Data_preprocess_config
 63 | DATASET_NAME = 'DOTA'  # 'pascal', 'coco'
 64 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 65 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 66 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 67 | IMG_SHORT_SIDE_LEN = 800
 68 | IMG_MAX_LENGTH = 800
 69 | CLASS_NUM = 15
 70 | 
 71 | IMG_ROTATE = False
 72 | RGB2GRAY = False
 73 | VERTICAL_FLIP = False
 74 | HORIZONTAL_FLIP = True
 75 | IMAGE_PYRAMID = False
 76 | 
 77 | # --------------------------------------------- Network_config
 78 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 79 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 80 | PROBABILITY = 0.01
 81 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 82 | WEIGHT_DECAY = 1e-4
 83 | USE_GN = False
 84 | FPN_CHANNEL = 256
 85 | 
 86 | # ---------------------------------------------Anchor config
 87 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 88 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 89 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 90 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 91 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 92 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 93 | ANCHOR_SCALE_FACTORS = None
 94 | USE_CENTER_OFFSET = True
 95 | METHOD = 'H'
 96 | USE_ANGLE_COND = False
 97 | ANGLE_RANGE = 90  # or 180
 98 | 
 99 | # --------------------------------------------RPN config
100 | SHARE_NET = True
101 | USE_P5 = True
102 | IOU_POSITIVE_THRESHOLD = 0.5
103 | IOU_NEGATIVE_THRESHOLD = 0.4
104 | 
105 | NMS = True
106 | NMS_IOU_THRESHOLD = 0.1
107 | MAXIMUM_DETECTIONS = 100
108 | FILTERED_SCORE = 0.05
109 | VIS_SCORE = 0.4
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/dcl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/DOTA1.0/dcl/__init__.py


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/dota_train/cfgs_res50_dotatrain_dcl_v8.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | BCL + OMEGA = 180 / 32. + period loss
  9 | 
 10 | 
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_DOTA_DCL_B_2x_20200921'
 16 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 2000
 27 | SAVE_WEIGHTS_INTE = 20673 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'DOTATrain'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = 800
 73 | IMG_MAX_LENGTH = 800
 74 | CLASS_NUM = 15
 75 | OMEGA = 180 / 32.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = False
 79 | RGB2GRAY = False
 80 | VERTICAL_FLIP = False
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = False
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.4
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/DOTA1.0/r3det_dcl/cfgs_res50_dota_refine_dcl_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 
  9 | 
 10 | 
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_DOTA_Refine_DCL_G_2x_20201026'
 16 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 27000 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | USE_IOU_FACTOR = True
 56 | REG_LOSS_MODE = None
 57 | ALPHA = 1.0
 58 | BETA = 1.0
 59 | 
 60 | BATCH_SIZE = 1
 61 | EPSILON = 1e-5
 62 | MOMENTUM = 0.9
 63 | LR = 5e-4
 64 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 65 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 66 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 67 | 
 68 | # -------------------------------------------- Data_preprocess_config
 69 | DATASET_NAME = 'DOTA'  # 'pascal', 'coco'
 70 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 71 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 72 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 73 | IMG_SHORT_SIDE_LEN = 800
 74 | IMG_MAX_LENGTH = 800
 75 | CLASS_NUM = 15
 76 | OMEGA = 180 / 256.
 77 | ANGLE_MODE = 1
 78 | 
 79 | IMG_ROTATE = False
 80 | RGB2GRAY = False
 81 | VERTICAL_FLIP = False
 82 | HORIZONTAL_FLIP = True
 83 | IMAGE_PYRAMID = False
 84 | 
 85 | # --------------------------------------------- Network_config
 86 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 87 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 88 | PROBABILITY = 0.01
 89 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 90 | WEIGHT_DECAY = 1e-4
 91 | USE_GN = False
 92 | FPN_CHANNEL = 256
 93 | 
 94 | # ---------------------------------------------Anchor config
 95 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 96 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 97 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 98 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 99 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
100 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
101 | ANCHOR_SCALE_FACTORS = None
102 | USE_CENTER_OFFSET = True
103 | METHOD = 'H'
104 | USE_ANGLE_COND = False
105 | ANGLE_RANGE = 180  # 90 or 180
106 | 
107 | # --------------------------------------------RPN config
108 | SHARE_NET = True
109 | USE_P5 = True
110 | IOU_POSITIVE_THRESHOLD = 0.5
111 | IOU_NEGATIVE_THRESHOLD = 0.4
112 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7]
113 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6]
114 | 
115 | NMS = True
116 | NMS_IOU_THRESHOLD = 0.1
117 | MAXIMUM_DETECTIONS = 100
118 | FILTERED_SCORE = 0.05
119 | VIS_SCORE = 0.4
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/libs/configs/HRSC2016/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/HRSC2016/__init__.py


--------------------------------------------------------------------------------
/libs/configs/HRSC2016/dcl/cfgs_res101_hrsc2016_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_HRSC2016_DCL_B_2x_20200930'
 14 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "0,1,2"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 10000
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | EVAL_THRESHOLD = 0.5
 47 | 
 48 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 49 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 50 | 
 51 | CLS_WEIGHT = 1.0
 52 | REG_WEIGHT = 1.0
 53 | ANGLE_WEIGHT = 0.5
 54 | REG_LOSS_MODE = None
 55 | ALPHA = 1.0
 56 | BETA = 1.0
 57 | 
 58 | BATCH_SIZE = 1
 59 | EPSILON = 1e-5
 60 | MOMENTUM = 0.9
 61 | LR = 5e-4
 62 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 63 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 64 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 65 | 
 66 | # -------------------------------------------- Data_preprocess_config
 67 | DATASET_NAME = 'HRSC2016'  # 'pascal', 'coco'
 68 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 70 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 71 | IMG_SHORT_SIDE_LEN = 600
 72 | IMG_MAX_LENGTH = 1000
 73 | CLASS_NUM = 1
 74 | OMEGA = 180 / 128.
 75 | ANGLE_MODE = 0
 76 | 
 77 | IMG_ROTATE = True
 78 | RGB2GRAY = True
 79 | VERTICAL_FLIP = True
 80 | HORIZONTAL_FLIP = True
 81 | IMAGE_PYRAMID = False
 82 | 
 83 | # --------------------------------------------- Network_config
 84 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 85 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 86 | PROBABILITY = 0.01
 87 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 88 | WEIGHT_DECAY = 1e-4
 89 | USE_GN = False
 90 | FPN_CHANNEL = 256
 91 | 
 92 | # ---------------------------------------------Anchor config
 93 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 94 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 95 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 96 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 97 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 98 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 99 | ANCHOR_SCALE_FACTORS = None
100 | USE_CENTER_OFFSET = True
101 | METHOD = 'H'
102 | USE_ANGLE_COND = False
103 | ANGLE_RANGE = 180  # 90 or 180
104 | 
105 | # --------------------------------------------RPN config
106 | SHARE_NET = True
107 | USE_P5 = True
108 | IOU_POSITIVE_THRESHOLD = 0.5
109 | IOU_NEGATIVE_THRESHOLD = 0.4
110 | 
111 | NMS = True
112 | NMS_IOU_THRESHOLD = 0.1
113 | MAXIMUM_DETECTIONS = 100
114 | FILTERED_SCORE = 0.05
115 | VIS_SCORE = 0.4
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/libs/configs/HRSC2016/r3det_dcl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/HRSC2016/r3det_dcl/__init__.py


--------------------------------------------------------------------------------
/libs/configs/HRSC2016/r3det_dcl/cfgs_res101_hrsc2016_r3det_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | FLOPs: 1461458647;    Trainable params: 56011145
  9 | cls : ship|| Recall: 0.9714983713355049 || Precison: 0.2847255369928401|| AP: 0.8846049339336871
 10 | F1:0.9243833400727861 P:0.9180722891566265 R:0.9307817589576547
 11 | mAP is : 0.8846049339336871
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_HRSC2016_R3Det_DCL_B_2x_20201108'
 16 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "1,2,3"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 7500 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | EVAL_THRESHOLD = 0.5
 49 | 
 50 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 51 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 52 | 
 53 | CLS_WEIGHT = 1.0
 54 | REG_WEIGHT = 1.0
 55 | ANGLE_WEIGHT = 0.5
 56 | USE_IOU_FACTOR = True
 57 | REG_LOSS_MODE = None
 58 | ALPHA = 1.0
 59 | BETA = 1.0
 60 | 
 61 | BATCH_SIZE = 1
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | LR = 5e-4
 65 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 66 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 67 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 68 | 
 69 | # -------------------------------------------- Data_preprocess_config
 70 | DATASET_NAME = 'HRSC2016'  # 'pascal', 'coco'
 71 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 73 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 74 | IMG_SHORT_SIDE_LEN = [800, 400, 600, 1000, 1200]
 75 | IMG_MAX_LENGTH = 1200
 76 | CLASS_NUM = 1
 77 | OMEGA = 180 / 64.
 78 | ANGLE_MODE = 0
 79 | 
 80 | IMG_ROTATE = True
 81 | RGB2GRAY = True
 82 | VERTICAL_FLIP = True
 83 | HORIZONTAL_FLIP = True
 84 | IMAGE_PYRAMID = True
 85 | 
 86 | # --------------------------------------------- Network_config
 87 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 88 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 89 | PROBABILITY = 0.01
 90 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 91 | WEIGHT_DECAY = 1e-4
 92 | USE_GN = False
 93 | FPN_CHANNEL = 256
 94 | 
 95 | # ---------------------------------------------Anchor config
 96 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 97 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 98 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 99 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
100 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
101 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
102 | ANCHOR_SCALE_FACTORS = None
103 | USE_CENTER_OFFSET = True
104 | METHOD = 'H'
105 | USE_ANGLE_COND = False
106 | ANGLE_RANGE = 180  # 90 or 180
107 | 
108 | # --------------------------------------------RPN config
109 | SHARE_NET = True
110 | USE_P5 = True
111 | IOU_POSITIVE_THRESHOLD = 0.5
112 | IOU_NEGATIVE_THRESHOLD = 0.4
113 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7]
114 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6]
115 | 
116 | NMS = True
117 | NMS_IOU_THRESHOLD = 0.1
118 | MAXIMUM_DETECTIONS = 100
119 | FILTERED_SCORE = 0.05
120 | VIS_SCORE = 0.4
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/baseline/cfgs_res101_icdar2015_baseline_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 2020-10-01  retinanet	81.49%  83.29%	82.38%
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_ICDAR2015_Baseline_2x_20200929'
 14 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "0,1,2"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 10000 * 2
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | 
 47 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 48 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 49 | 
 50 | CLS_WEIGHT = 1.0
 51 | REG_WEIGHT = 1.0
 52 | ANGLE_WEIGHT = 0.5
 53 | REG_LOSS_MODE = None
 54 | ALPHA = 1.0
 55 | BETA = 1.0
 56 | 
 57 | BATCH_SIZE = 1
 58 | EPSILON = 1e-5
 59 | MOMENTUM = 0.9
 60 | LR = 5e-4
 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 64 | 
 65 | # -------------------------------------------- Data_preprocess_config
 66 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 67 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 69 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 71 | IMG_MAX_LENGTH = 1500
 72 | CLASS_NUM = 1
 73 | 
 74 | IMG_ROTATE = True
 75 | RGB2GRAY = True
 76 | VERTICAL_FLIP = True
 77 | HORIZONTAL_FLIP = True
 78 | IMAGE_PYRAMID = True
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 82 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 83 | PROBABILITY = 0.01
 84 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 85 | WEIGHT_DECAY = 1e-4
 86 | USE_GN = False
 87 | FPN_CHANNEL = 256
 88 | 
 89 | # ---------------------------------------------Anchor config
 90 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 91 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 92 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 93 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 94 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 95 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 96 | ANCHOR_SCALE_FACTORS = None
 97 | USE_CENTER_OFFSET = True
 98 | METHOD = 'H'
 99 | USE_ANGLE_COND = False
100 | ANGLE_RANGE = 180  # 90 or 180
101 | 
102 | # --------------------------------------------RPN config
103 | SHARE_NET = True
104 | USE_P5 = True
105 | IOU_POSITIVE_THRESHOLD = 0.5
106 | IOU_NEGATIVE_THRESHOLD = 0.4
107 | 
108 | NMS = True
109 | NMS_IOU_THRESHOLD = 0.1
110 | MAXIMUM_DETECTIONS = 100
111 | FILTERED_SCORE = 0.05
112 | VIS_SCORE = 0.7
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/baseline/cfgs_res50_icdar2015_baseline_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 2020-09-25  retinanet	72.32%	66.23%	69.14%
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_ICDAR2015_Baseline_2x_20200925'
 14 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "3"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 10000
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | 
 47 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 48 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 49 | 
 50 | CLS_WEIGHT = 1.0
 51 | REG_WEIGHT = 1.0
 52 | ANGLE_WEIGHT = 0.5
 53 | REG_LOSS_MODE = None
 54 | ALPHA = 1.0
 55 | BETA = 1.0
 56 | 
 57 | BATCH_SIZE = 1
 58 | EPSILON = 1e-5
 59 | MOMENTUM = 0.9
 60 | LR = 5e-4
 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 64 | 
 65 | # -------------------------------------------- Data_preprocess_config
 66 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 67 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 69 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | IMG_SHORT_SIDE_LEN = 800
 71 | IMG_MAX_LENGTH = 1000
 72 | CLASS_NUM = 1
 73 | 
 74 | IMG_ROTATE = False
 75 | RGB2GRAY = False
 76 | VERTICAL_FLIP = False
 77 | HORIZONTAL_FLIP = True
 78 | IMAGE_PYRAMID = False
 79 | 
 80 | # --------------------------------------------- Network_config
 81 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 82 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 83 | PROBABILITY = 0.01
 84 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 85 | WEIGHT_DECAY = 1e-4
 86 | USE_GN = False
 87 | FPN_CHANNEL = 256
 88 | 
 89 | # ---------------------------------------------Anchor config
 90 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 91 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 92 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 93 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 94 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 95 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 96 | ANCHOR_SCALE_FACTORS = None
 97 | USE_CENTER_OFFSET = True
 98 | METHOD = 'H'
 99 | USE_ANGLE_COND = False
100 | ANGLE_RANGE = 180  # 90 or 180
101 | 
102 | # --------------------------------------------RPN config
103 | SHARE_NET = True
104 | USE_P5 = True
105 | IOU_POSITIVE_THRESHOLD = 0.5
106 | IOU_NEGATIVE_THRESHOLD = 0.4
107 | 
108 | NMS = True
109 | NMS_IOU_THRESHOLD = 0.1
110 | MAXIMUM_DETECTIONS = 100
111 | FILTERED_SCORE = 0.05
112 | VIS_SCORE = 0.8
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/csl/cfgs_res101_icdar2015_csl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | CSL + gaussian label, omega=1, r=6
  9 | 2020-10-04  CSL	80.50%	87.40%	83.81%
 10 | 
 11 | """
 12 | 
 13 | # ------------------------------------------------
 14 | VERSION = 'RetinaNet_ICDAR2015_CSL_2x_20201001'
 15 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 16 | ADD_BOX_IN_TENSORBOARD = True
 17 | 
 18 | # ---------------------------------------- System_config
 19 | ROOT_PATH = os.path.abspath('../')
 20 | print(20*"++--")
 21 | print(ROOT_PATH)
 22 | GPU_GROUP = "0,1,2"
 23 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 24 | SHOW_TRAIN_INFO_INTE = 20
 25 | SMRY_ITER = 2000
 26 | SAVE_WEIGHTS_INTE = 10000 * 2
 27 | 
 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 30 | 
 31 | if NET_NAME.startswith("resnet"):
 32 |     weights_name = NET_NAME
 33 | elif NET_NAME.startswith("MobilenetV2"):
 34 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 35 | else:
 36 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 37 | 
 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | FIXED_BLOCKS = 1  # allow 0~3
 45 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 46 | USE_07_METRIC = True
 47 | 
 48 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 49 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 50 | 
 51 | CLS_WEIGHT = 1.0
 52 | REG_WEIGHT = 1.0
 53 | ANGLE_WEIGHT = 0.5
 54 | REG_LOSS_MODE = None
 55 | 
 56 | BATCH_SIZE = 1
 57 | EPSILON = 1e-5
 58 | MOMENTUM = 0.9
 59 | LR = 5e-4
 60 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 61 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 62 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 63 | 
 64 | # -------------------------------------------- Data_preprocess_config
 65 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 66 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 67 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 68 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 69 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 70 | IMG_MAX_LENGTH = 1500
 71 | CLASS_NUM = 1
 72 | LABEL_TYPE = 0
 73 | RADUIUS = 6
 74 | OMEGA = 1
 75 | 
 76 | IMG_ROTATE = True
 77 | RGB2GRAY = True
 78 | VERTICAL_FLIP = True
 79 | HORIZONTAL_FLIP = True
 80 | IMAGE_PYRAMID = True
 81 | 
 82 | # --------------------------------------------- Network_config
 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 85 | PROBABILITY = 0.01
 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 87 | WEIGHT_DECAY = 1e-4
 88 | USE_GN = False
 89 | FPN_CHANNEL = 256
 90 | 
 91 | # ---------------------------------------------Anchor config
 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 98 | ANCHOR_SCALE_FACTORS = None
 99 | USE_CENTER_OFFSET = True
100 | METHOD = 'H'
101 | USE_ANGLE_COND = False
102 | ANGLE_RANGE = 180  # 90 or 180
103 | 
104 | # --------------------------------------------RPN config
105 | SHARE_NET = True
106 | USE_P5 = True
107 | IOU_POSITIVE_THRESHOLD = 0.5
108 | IOU_NEGATIVE_THRESHOLD = 0.4
109 | 
110 | NMS = True
111 | NMS_IOU_THRESHOLD = 0.1
112 | MAXIMUM_DETECTIONS = 100
113 | FILTERED_SCORE = 0.05
114 | VIS_SCORE = 0.7
115 | 
116 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/dcl/cfgs_res101_icdar2015_dcl_v4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 64.
  9 | 
 10 | 2020-09-29  DCL	81.61%	84.79%	83.17%
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200928'
 16 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 73 | IMG_MAX_LENGTH = 1500
 74 | CLASS_NUM = 1
 75 | OMEGA = 180 / 64.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = True
 79 | RGB2GRAY = True
 80 | VERTICAL_FLIP = True
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = True
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.6
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 256.
  9 | FLOPs: 478813602;    Trainable params: 32664081
 10 | 2020-09-25  DCL	67.21%	71.15%	69.13%
 11 | """
 12 | 
 13 | # ------------------------------------------------
 14 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200924'
 15 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 16 | ADD_BOX_IN_TENSORBOARD = True
 17 | 
 18 | # ---------------------------------------- System_config
 19 | ROOT_PATH = os.path.abspath('../')
 20 | print(20*"++--")
 21 | print(ROOT_PATH)
 22 | GPU_GROUP = "3"
 23 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 24 | SHOW_TRAIN_INFO_INTE = 20
 25 | SMRY_ITER = 200
 26 | SAVE_WEIGHTS_INTE = 10000
 27 | 
 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 30 | 
 31 | if NET_NAME.startswith("resnet"):
 32 |     weights_name = NET_NAME
 33 | elif NET_NAME.startswith("MobilenetV2"):
 34 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 35 | else:
 36 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 37 | 
 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 40 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | FIXED_BLOCKS = 1  # allow 0~3
 45 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 46 | USE_07_METRIC = True
 47 | 
 48 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 49 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 50 | 
 51 | CLS_WEIGHT = 1.0
 52 | REG_WEIGHT = 1.0
 53 | ANGLE_WEIGHT = 0.5
 54 | REG_LOSS_MODE = None
 55 | ALPHA = 1.0
 56 | BETA = 1.0
 57 | 
 58 | BATCH_SIZE = 4
 59 | EPSILON = 1e-5
 60 | MOMENTUM = 0.9
 61 | LR = 5e-4
 62 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 63 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 64 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 65 | 
 66 | # -------------------------------------------- Data_preprocess_config
 67 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 68 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 69 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 70 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 71 | IMG_SHORT_SIDE_LEN = 800
 72 | IMG_MAX_LENGTH = 1000
 73 | CLASS_NUM = 1
 74 | OMEGA = 180 / 256.
 75 | ANGLE_MODE = 0
 76 | 
 77 | IMG_ROTATE = False
 78 | RGB2GRAY = False
 79 | VERTICAL_FLIP = False
 80 | HORIZONTAL_FLIP = True
 81 | IMAGE_PYRAMID = False
 82 | 
 83 | # --------------------------------------------- Network_config
 84 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 85 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 86 | PROBABILITY = 0.01
 87 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 88 | WEIGHT_DECAY = 1e-4
 89 | USE_GN = False
 90 | FPN_CHANNEL = 256
 91 | 
 92 | # ---------------------------------------------Anchor config
 93 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 94 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 95 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 96 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 97 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 98 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 99 | ANCHOR_SCALE_FACTORS = None
100 | USE_CENTER_OFFSET = True
101 | METHOD = 'H'
102 | USE_ANGLE_COND = False
103 | ANGLE_RANGE = 180  # 90 or 180
104 | 
105 | # --------------------------------------------RPN config
106 | SHARE_NET = True
107 | USE_P5 = True
108 | IOU_POSITIVE_THRESHOLD = 0.5
109 | IOU_NEGATIVE_THRESHOLD = 0.4
110 | 
111 | NMS = True
112 | NMS_IOU_THRESHOLD = 0.1
113 | MAXIMUM_DETECTIONS = 100
114 | FILTERED_SCORE = 0.05
115 | VIS_SCORE = 0.75
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 256.
  9 | 
 10 | 2020-09-26  DCL	71.79%	68.58%	70.15%
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200926'
 16 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "3"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = 800
 73 | IMG_MAX_LENGTH = 1000
 74 | CLASS_NUM = 1
 75 | OMEGA = 180 / 256.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = False
 79 | RGB2GRAY = False
 80 | VERTICAL_FLIP = False
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = False
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.78
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/ICDAR2015/dcl/cfgs_res50_icdar2015_dcl_v3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 64.
  9 | 
 10 | 2020-09-27  DCL	71.40%	69.53%	70.45%
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_ICDAR2015_DCL_B_2x_20200927'
 16 | NET_NAME = 'resnet50_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "3"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'ICDAR2015'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = 800
 73 | IMG_MAX_LENGTH = 1000
 74 | CLASS_NUM = 1
 75 | OMEGA = 180 / 64.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = False
 79 | RGB2GRAY = False
 80 | VERTICAL_FLIP = False
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = False
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.85
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/MLT/baseline/cfgs_res101_icdar2015_baseline_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 2020-10-11  CSL	64.01%	75.24%	55.70%	51.13% (0.45)
  9 | 2020-10-11  CSL	63.81%	71.78%	57.43%	52.40% (0.4)
 10 | 2020-10-11  CSL	61.25%	62.30%	60.24%	54.30% (0.3)
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_MLT_Baseline_2x_20201002'
 16 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'MLT'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 73 | IMG_MAX_LENGTH = 1500
 74 | CLASS_NUM = 1
 75 | 
 76 | IMG_ROTATE = True
 77 | RGB2GRAY = True
 78 | VERTICAL_FLIP = True
 79 | HORIZONTAL_FLIP = True
 80 | IMAGE_PYRAMID = True
 81 | 
 82 | # --------------------------------------------- Network_config
 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 85 | PROBABILITY = 0.01
 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 87 | WEIGHT_DECAY = 1e-4
 88 | USE_GN = False
 89 | FPN_CHANNEL = 256
 90 | 
 91 | # ---------------------------------------------Anchor config
 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 98 | ANCHOR_SCALE_FACTORS = None
 99 | USE_CENTER_OFFSET = True
100 | METHOD = 'H'
101 | USE_ANGLE_COND = False
102 | ANGLE_RANGE = 180  # 90 or 180
103 | 
104 | # --------------------------------------------RPN config
105 | SHARE_NET = True
106 | USE_P5 = True
107 | IOU_POSITIVE_THRESHOLD = 0.5
108 | IOU_NEGATIVE_THRESHOLD = 0.4
109 | 
110 | NMS = True
111 | NMS_IOU_THRESHOLD = 0.1
112 | MAXIMUM_DETECTIONS = 100
113 | FILTERED_SCORE = 0.05
114 | VIS_SCORE = 0.1
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/libs/configs/MLT/csl/cfgs_res101_mlt_csl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 2020-10-07  DCL	62.67%	64.24%	61.18%	55.51% (0.3)
  9 | 2020-10-07	DCL	65.08%	73.62%	58.32%	53.52% (0.4)
 10 | 2020-10-07  DCL	65.23%	77.12%	56.52%	52.16% (0.45)
 11 | 
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_MLT_CSL_2x_20201007'
 16 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'MLT'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 73 | IMG_MAX_LENGTH = 1500
 74 | CLASS_NUM = 1
 75 | LABEL_TYPE = 0
 76 | RADUIUS = 6
 77 | OMEGA = 1
 78 | 
 79 | IMG_ROTATE = True
 80 | RGB2GRAY = True
 81 | VERTICAL_FLIP = True
 82 | HORIZONTAL_FLIP = True
 83 | IMAGE_PYRAMID = True
 84 | 
 85 | # --------------------------------------------- Network_config
 86 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 87 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 88 | PROBABILITY = 0.01
 89 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 90 | WEIGHT_DECAY = 1e-4
 91 | USE_GN = False
 92 | FPN_CHANNEL = 256
 93 | 
 94 | # ---------------------------------------------Anchor config
 95 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 96 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 97 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 98 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 99 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
100 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
101 | ANCHOR_SCALE_FACTORS = None
102 | USE_CENTER_OFFSET = True
103 | METHOD = 'H'
104 | USE_ANGLE_COND = False
105 | ANGLE_RANGE = 180  # 90 or 180
106 | 
107 | # --------------------------------------------RPN config
108 | SHARE_NET = True
109 | USE_P5 = True
110 | IOU_POSITIVE_THRESHOLD = 0.5
111 | IOU_NEGATIVE_THRESHOLD = 0.4
112 | 
113 | NMS = True
114 | NMS_IOU_THRESHOLD = 0.1
115 | MAXIMUM_DETECTIONS = 100
116 | FILTERED_SCORE = 0.05
117 | VIS_SCORE = 0.1
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/libs/configs/MLT/dcl/cfgs_res101_mlt_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 64.
  9 | 
 10 | 2020-10-06  retinanet	62.60%	63.34%	61.88%	56.45% (0.3)
 11 | 2020-10-06	retinanet	65.26%	73.14%	58.91%	54.41% (0.4)
 12 | """
 13 | 
 14 | # ------------------------------------------------
 15 | VERSION = 'RetinaNet_MLT_DCL_B_2x_20200928'
 16 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 17 | ADD_BOX_IN_TENSORBOARD = True
 18 | 
 19 | # ---------------------------------------- System_config
 20 | ROOT_PATH = os.path.abspath('../')
 21 | print(20*"++--")
 22 | print(ROOT_PATH)
 23 | GPU_GROUP = "0,1,2"
 24 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 25 | SHOW_TRAIN_INFO_INTE = 20
 26 | SMRY_ITER = 200
 27 | SAVE_WEIGHTS_INTE = 10000 * 2
 28 | 
 29 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 30 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 31 | 
 32 | if NET_NAME.startswith("resnet"):
 33 |     weights_name = NET_NAME
 34 | elif NET_NAME.startswith("MobilenetV2"):
 35 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 36 | else:
 37 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 38 | 
 39 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 40 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 41 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 42 | 
 43 | # ------------------------------------------ Train config
 44 | RESTORE_FROM_RPN = False
 45 | FIXED_BLOCKS = 1  # allow 0~3
 46 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 47 | USE_07_METRIC = True
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'MLT'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200]
 73 | IMG_MAX_LENGTH = 1500
 74 | CLASS_NUM = 1
 75 | OMEGA = 180 / 64.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = True
 79 | RGB2GRAY = True
 80 | VERTICAL_FLIP = True
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = True
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.1
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_gwd_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_OHD-SJTU_GWD_2x_20200728'
 14 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "1,2,3"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 5000 * 2
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | 
 47 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 48 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 49 | 
 50 | CLS_WEIGHT = 1.0
 51 | REG_WEIGHT = 1.0
 52 | REG_LOSS_MODE = 4
 53 | ALPHA = 1.0
 54 | BETA = 1.0
 55 | 
 56 | BATCH_SIZE = 1
 57 | EPSILON = 1e-5
 58 | MOMENTUM = 0.9
 59 | LR = 1e-4
 60 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 61 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 62 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE)
 63 | 
 64 | # -------------------------------------------- Data_preprocess_config
 65 | DATASET_NAME = 'OHD-SJTU-600'  # 'pascal', 'coco'
 66 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 67 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 68 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 69 | IMG_SHORT_SIDE_LEN = 800
 70 | IMG_MAX_LENGTH = 800
 71 | CLASS_NUM = 2
 72 | 
 73 | IMG_ROTATE = True
 74 | RGB2GRAY = True
 75 | VERTICAL_FLIP = True
 76 | HORIZONTAL_FLIP = True
 77 | IMAGE_PYRAMID = False
 78 | 
 79 | # --------------------------------------------- Network_config
 80 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 81 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 82 | PROBABILITY = 0.01
 83 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 84 | WEIGHT_DECAY = 1e-4
 85 | USE_GN = False
 86 | FPN_CHANNEL = 256
 87 | 
 88 | # ---------------------------------------------Anchor config
 89 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 90 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 91 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 92 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 93 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.]
 94 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 95 | ANCHOR_SCALE_FACTORS = None
 96 | USE_CENTER_OFFSET = True
 97 | METHOD = 'H'
 98 | USE_ANGLE_COND = False
 99 | ANGLE_RANGE = 90  # or 180
100 | 
101 | # --------------------------------------------RPN config
102 | SHARE_NET = True
103 | USE_P5 = True
104 | IOU_POSITIVE_THRESHOLD = 0.5
105 | IOU_NEGATIVE_THRESHOLD = 0.4
106 | 
107 | NMS = True
108 | NMS_IOU_THRESHOLD = 0.1
109 | MAXIMUM_DETECTIONS = 100
110 | FILTERED_SCORE = 0.05
111 | VIS_SCORE = 0.4
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | classname: plane
  9 | npos num: 223
 10 | ap:  0.9086680761099367
 11 | classname: ship
 12 | npos num: 1025
 13 | ap:  0.7680000964611735
 14 | map: 0.8383340862855551
 15 | classaps:  [90.86680761 76.80000965]
 16 | 
 17 | [0.8383340862855551, 0.8294369391086178, 0.8213729593604102, 0.7846857109313947, 0.7710299710411961,
 18 |  0.6591302619122246, 0.5440679304087981, 0.34149128761647274, 0.11706834092408161, 0.006581439393939394]
 19 |  0.571319892698269
 20 | 
 21 | """
 22 | 
 23 | # ------------------------------------------------
 24 | VERSION = 'RetinaNet_OHD-SJTU_2x_20200728'
 25 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 26 | ADD_BOX_IN_TENSORBOARD = True
 27 | 
 28 | # ---------------------------------------- System_config
 29 | ROOT_PATH = os.path.abspath('../')
 30 | print(20*"++--")
 31 | print(ROOT_PATH)
 32 | GPU_GROUP = "0,1"
 33 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 34 | SHOW_TRAIN_INFO_INTE = 20
 35 | SMRY_ITER = 200
 36 | SAVE_WEIGHTS_INTE = 5000 * 2
 37 | 
 38 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 39 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 40 | 
 41 | if NET_NAME.startswith("resnet"):
 42 |     weights_name = NET_NAME
 43 | elif NET_NAME.startswith("MobilenetV2"):
 44 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 45 | else:
 46 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 47 | 
 48 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 49 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 50 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 51 | 
 52 | # ------------------------------------------ Train config
 53 | RESTORE_FROM_RPN = False
 54 | FIXED_BLOCKS = 1  # allow 0~3
 55 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 56 | USE_07_METRIC = True
 57 | 
 58 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 59 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 60 | 
 61 | CLS_WEIGHT = 1.0
 62 | REG_WEIGHT = 1.0
 63 | REG_LOSS_MODE = None
 64 | 
 65 | BATCH_SIZE = 1
 66 | EPSILON = 1e-5
 67 | MOMENTUM = 0.9
 68 | LR = 5e-4  # * NUM_GPU * BATCH_SIZE
 69 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 70 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 71 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE)
 72 | 
 73 | # -------------------------------------------- Data_preprocess_config
 74 | DATASET_NAME = 'OHD-SJTU-600'  # 'pascal', 'coco'
 75 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 76 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 77 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 78 | IMG_SHORT_SIDE_LEN = 800
 79 | IMG_MAX_LENGTH = 800
 80 | CLASS_NUM = 2
 81 | 
 82 | IMG_ROTATE = True
 83 | RGB2GRAY = True
 84 | VERTICAL_FLIP = True
 85 | HORIZONTAL_FLIP = True
 86 | IMAGE_PYRAMID = False
 87 | 
 88 | # --------------------------------------------- Network_config
 89 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 90 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 91 | PROBABILITY = 0.01
 92 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 93 | WEIGHT_DECAY = 1e-4
 94 | USE_GN = False
 95 | FPN_CHANNEL = 256
 96 | 
 97 | # ---------------------------------------------Anchor config
 98 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 99 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
100 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
101 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
102 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.]
103 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
104 | ANCHOR_SCALE_FACTORS = None
105 | USE_CENTER_OFFSET = True
106 | METHOD = 'H'
107 | USE_ANGLE_COND = False
108 | ANGLE_RANGE = 90  # or 180
109 | 
110 | # --------------------------------------------RPN config
111 | SHARE_NET = True
112 | USE_P5 = True
113 | IOU_POSITIVE_THRESHOLD = 0.5
114 | IOU_NEGATIVE_THRESHOLD = 0.4
115 | 
116 | NMS = True
117 | NMS_IOU_THRESHOLD = 0.1
118 | MAXIMUM_DETECTIONS = 100
119 | FILTERED_SCORE = 0.05
120 | VIS_SCORE = 0.4
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/libs/configs/OHD-SJTU/cfgs_res101_ohd-sjtu_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | classname: plane
  9 | npos num: 223
 10 | ap:  0.907399577167019
 11 | classname: ship
 12 | npos num: 1025
 13 | ap:  0.8853244888009996
 14 | map: 0.8963620329840093
 15 | classaps:  [90.73995772 88.53244888]
 16 | 
 17 | [0.8963620329840093, 0.896220113688575, 0.894407255037192, 0.8910001332998732, 0.836621416086192,
 18 |  0.7461668460602382, 0.532126970665127, 0.29881340556372865, 0.06862263630283386, 0.004134429400386847]
 19 |  0.6064475239088156
 20 | 
 21 | """
 22 | 
 23 | # ------------------------------------------------
 24 | VERSION = 'RetinaNet_OHD-SJTU_2x_20200729'
 25 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 26 | ADD_BOX_IN_TENSORBOARD = True
 27 | 
 28 | # ---------------------------------------- System_config
 29 | ROOT_PATH = os.path.abspath('../')
 30 | print(20*"++--")
 31 | print(ROOT_PATH)
 32 | GPU_GROUP = "1,2,3"
 33 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 34 | SHOW_TRAIN_INFO_INTE = 20
 35 | SMRY_ITER = 200
 36 | SAVE_WEIGHTS_INTE = 5000 * 2
 37 | 
 38 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 39 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 40 | 
 41 | if NET_NAME.startswith("resnet"):
 42 |     weights_name = NET_NAME
 43 | elif NET_NAME.startswith("MobilenetV2"):
 44 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 45 | else:
 46 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 47 | 
 48 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 49 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 50 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 51 | 
 52 | # ------------------------------------------ Train config
 53 | RESTORE_FROM_RPN = False
 54 | FIXED_BLOCKS = 1  # allow 0~3
 55 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 56 | USE_07_METRIC = True
 57 | 
 58 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 59 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 60 | 
 61 | CLS_WEIGHT = 1.0
 62 | REG_WEIGHT = 1.0
 63 | REG_LOSS_MODE = None
 64 | 
 65 | BATCH_SIZE = 1
 66 | EPSILON = 1e-5
 67 | MOMENTUM = 0.9
 68 | LR = 5e-4  # * NUM_GPU * BATCH_SIZE
 69 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 70 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 71 | WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE)
 72 | 
 73 | # -------------------------------------------- Data_preprocess_config
 74 | DATASET_NAME = 'OHD-SJTU-600'  # 'pascal', 'coco'
 75 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 76 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 77 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 78 | IMG_SHORT_SIDE_LEN = 800
 79 | IMG_MAX_LENGTH = 800
 80 | CLASS_NUM = 2
 81 | 
 82 | IMG_ROTATE = True
 83 | RGB2GRAY = True
 84 | VERTICAL_FLIP = True
 85 | HORIZONTAL_FLIP = True
 86 | IMAGE_PYRAMID = False
 87 | 
 88 | # --------------------------------------------- Network_config
 89 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 90 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 91 | PROBABILITY = 0.01
 92 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 93 | WEIGHT_DECAY = 1e-4
 94 | USE_GN = False
 95 | FPN_CHANNEL = 256
 96 | 
 97 | # ---------------------------------------------Anchor config
 98 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 99 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
100 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
101 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
102 | ANCHOR_RATIOS = [1, 1 / 3., 3., 5., 1 / 5.]
103 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
104 | ANCHOR_SCALE_FACTORS = None
105 | USE_CENTER_OFFSET = True
106 | METHOD = 'R'
107 | USE_ANGLE_COND = False
108 | ANGLE_RANGE = 90  # or 180
109 | 
110 | # --------------------------------------------RPN config
111 | SHARE_NET = True
112 | USE_P5 = True
113 | IOU_POSITIVE_THRESHOLD = 0.5
114 | IOU_NEGATIVE_THRESHOLD = 0.4
115 | 
116 | NMS = True
117 | NMS_IOU_THRESHOLD = 0.1
118 | MAXIMUM_DETECTIONS = 100
119 | FILTERED_SCORE = 0.05
120 | VIS_SCORE = 0.4
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/libs/configs/OHD-SJTU/dcl/cfgs_res101_ohd-sjtu-all_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_OHD-SJTU-ALL_DCL_G_2x_20200910'
 14 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "0,1,2"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 20000 * 2
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | 
 47 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 48 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 49 | 
 50 | CLS_WEIGHT = 1.0
 51 | REG_WEIGHT = 1.0
 52 | ANGLE_WEIGHT = 0.5
 53 | REG_LOSS_MODE = None
 54 | ALPHA = 1.0
 55 | BETA = 1.0
 56 | 
 57 | BATCH_SIZE = 1
 58 | EPSILON = 1e-5
 59 | MOMENTUM = 0.9
 60 | LR = 5e-4
 61 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 62 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 63 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 64 | 
 65 | # -------------------------------------------- Data_preprocess_config
 66 | DATASET_NAME = 'OHD-SJTU-ALL-600'  # 'pascal', 'coco'
 67 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 68 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 69 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | IMG_SHORT_SIDE_LEN = 800
 71 | IMG_MAX_LENGTH = 800
 72 | CLASS_NUM = 6
 73 | OMEGA = 180 / 256.
 74 | ANGLE_MODE = 1
 75 | 
 76 | IMG_ROTATE = False
 77 | RGB2GRAY = False
 78 | VERTICAL_FLIP = False
 79 | HORIZONTAL_FLIP = True
 80 | IMAGE_PYRAMID = False
 81 | 
 82 | # --------------------------------------------- Network_config
 83 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 84 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 85 | PROBABILITY = 0.01
 86 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 87 | WEIGHT_DECAY = 1e-4
 88 | USE_GN = False
 89 | FPN_CHANNEL = 256
 90 | 
 91 | # ---------------------------------------------Anchor config
 92 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 93 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 94 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 95 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 96 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 97 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
 98 | ANCHOR_SCALE_FACTORS = None
 99 | USE_CENTER_OFFSET = True
100 | METHOD = 'H'
101 | USE_ANGLE_COND = False
102 | ANGLE_RANGE = 180  # 90 or 180
103 | 
104 | # --------------------------------------------RPN config
105 | SHARE_NET = True
106 | USE_P5 = True
107 | IOU_POSITIVE_THRESHOLD = 0.5
108 | IOU_NEGATIVE_THRESHOLD = 0.4
109 | 
110 | NMS = True
111 | NMS_IOU_THRESHOLD = 0.1
112 | MAXIMUM_DETECTIONS = 100
113 | FILTERED_SCORE = 0.05
114 | VIS_SCORE = 0.4
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/libs/configs/SSDD++/baseline/cfgs_res101_ssdd++_baseline_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | cls : ship|| Recall: 0.8970873786407767 || Precison: 0.6834319526627219|| AP: 0.7866073337921715
  9 | F1:0.9000989119683481 P:0.9173387096774194 R:0.883495145631068
 10 | mAP is : 0.7866073337921715
 11 | 
 12 | 
 13 | """
 14 | 
 15 | # ------------------------------------------------
 16 | VERSION = 'RetinaNet_SSDD++_Baseline_2x_20201012'
 17 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 18 | ADD_BOX_IN_TENSORBOARD = True
 19 | 
 20 | # ---------------------------------------- System_config
 21 | ROOT_PATH = os.path.abspath('../')
 22 | print(20*"++--")
 23 | print(ROOT_PATH)
 24 | GPU_GROUP = "0,1,2"
 25 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 26 | SHOW_TRAIN_INFO_INTE = 20
 27 | SMRY_ITER = 200
 28 | SAVE_WEIGHTS_INTE = 2000 * 2
 29 | 
 30 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 31 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 32 | 
 33 | if NET_NAME.startswith("resnet"):
 34 |     weights_name = NET_NAME
 35 | elif NET_NAME.startswith("MobilenetV2"):
 36 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 37 | else:
 38 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 39 | 
 40 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 41 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 42 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 43 | 
 44 | # ------------------------------------------ Train config
 45 | RESTORE_FROM_RPN = False
 46 | FIXED_BLOCKS = 1  # allow 0~3
 47 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 48 | USE_07_METRIC = True
 49 | EVAL_THRESHOLD = 0.5
 50 | 
 51 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 52 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 53 | 
 54 | CLS_WEIGHT = 1.0
 55 | REG_WEIGHT = 1.0
 56 | ANGLE_WEIGHT = 0.5
 57 | REG_LOSS_MODE = None
 58 | ALPHA = 1.0
 59 | BETA = 1.0
 60 | 
 61 | BATCH_SIZE = 1
 62 | EPSILON = 1e-5
 63 | MOMENTUM = 0.9
 64 | LR = 5e-4
 65 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 66 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 67 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 68 | 
 69 | # -------------------------------------------- Data_preprocess_config
 70 | DATASET_NAME = 'SSDD++'  # 'pascal', 'coco'
 71 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 73 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 74 | IMG_SHORT_SIDE_LEN = 600
 75 | IMG_MAX_LENGTH = 1200
 76 | CLASS_NUM = 1
 77 | 
 78 | IMG_ROTATE = False
 79 | RGB2GRAY = False
 80 | VERTICAL_FLIP = False
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = False
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.1
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/SSDD++/dcl/cfgs_res101_ssdd++_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | 180 / 64.
  9 | FLOPs: 1299327161;    Trainable params: 51507175
 10 | 
 11 | """
 12 | 
 13 | # ------------------------------------------------
 14 | VERSION = 'RetinaNet_SSDD++_DCL_B_2x_20201011'
 15 | NET_NAME = 'resnet101_v1d'  # 'MobilenetV2'
 16 | ADD_BOX_IN_TENSORBOARD = True
 17 | 
 18 | # ---------------------------------------- System_config
 19 | ROOT_PATH = os.path.abspath('../')
 20 | print(20*"++--")
 21 | print(ROOT_PATH)
 22 | GPU_GROUP = "0,1,2"
 23 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 24 | SHOW_TRAIN_INFO_INTE = 20
 25 | SMRY_ITER = 200
 26 | SAVE_WEIGHTS_INTE = 2000 * 2
 27 | 
 28 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 29 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 30 | 
 31 | if NET_NAME.startswith("resnet"):
 32 |     weights_name = NET_NAME
 33 | elif NET_NAME.startswith("MobilenetV2"):
 34 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 35 | else:
 36 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 37 | 
 38 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 39 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 40 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 41 | 
 42 | # ------------------------------------------ Train config
 43 | RESTORE_FROM_RPN = False
 44 | FIXED_BLOCKS = 1  # allow 0~3
 45 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 46 | USE_07_METRIC = True
 47 | EVAL_THRESHOLD = 0.5
 48 | 
 49 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 50 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 51 | 
 52 | CLS_WEIGHT = 1.0
 53 | REG_WEIGHT = 1.0
 54 | ANGLE_WEIGHT = 0.5
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'SSDD++'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = 600
 73 | IMG_MAX_LENGTH = 1200
 74 | CLASS_NUM = 1
 75 | OMEGA = 180 / 64.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = True
 79 | RGB2GRAY = True
 80 | VERTICAL_FLIP = True
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = False
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | 
112 | NMS = True
113 | NMS_IOU_THRESHOLD = 0.1
114 | MAXIMUM_DETECTIONS = 100
115 | FILTERED_SCORE = 0.05
116 | VIS_SCORE = 0.4
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/libs/configs/UCAS-AOD/r3det_dcl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/UCAS-AOD/r3det_dcl/__init__.py


--------------------------------------------------------------------------------
/libs/configs/UCAS-AOD/r3det_dcl/cfgs_res152_ucas-aod_r3det_dcl_v1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division, print_function, absolute_import
  3 | import os
  4 | import tensorflow as tf
  5 | import math
  6 | 
  7 | """
  8 | FLOPs: 1440221956;    Trainable params: 71664033
  9 | 
 10 | """
 11 | 
 12 | # ------------------------------------------------
 13 | VERSION = 'RetinaNet_UCAS-AOD_R3Det_DCL_B_2x_20201026'
 14 | NET_NAME = 'resnet152_v1d'  # 'MobilenetV2'
 15 | ADD_BOX_IN_TENSORBOARD = True
 16 | 
 17 | # ---------------------------------------- System_config
 18 | ROOT_PATH = os.path.abspath('../')
 19 | print(20*"++--")
 20 | print(ROOT_PATH)
 21 | GPU_GROUP = "2,3"
 22 | NUM_GPU = len(GPU_GROUP.strip().split(','))
 23 | SHOW_TRAIN_INFO_INTE = 20
 24 | SMRY_ITER = 200
 25 | SAVE_WEIGHTS_INTE = 5000 * 2
 26 | 
 27 | SUMMARY_PATH = ROOT_PATH + '/output/summary'
 28 | TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
 29 | 
 30 | if NET_NAME.startswith("resnet"):
 31 |     weights_name = NET_NAME
 32 | elif NET_NAME.startswith("MobilenetV2"):
 33 |     weights_name = "mobilenet/mobilenet_v2_1.0_224"
 34 | else:
 35 |     raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
 36 | 
 37 | PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
 38 | TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
 39 | EVALUATE_R_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
 40 | 
 41 | # ------------------------------------------ Train config
 42 | RESTORE_FROM_RPN = False
 43 | FIXED_BLOCKS = 1  # allow 0~3
 44 | FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
 45 | USE_07_METRIC = True
 46 | EVAL_THRESHOLD = 0.5
 47 | 
 48 | MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
 49 | GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip
 50 | 
 51 | CLS_WEIGHT = 1.0
 52 | REG_WEIGHT = 1.0
 53 | ANGLE_WEIGHT = 0.5
 54 | USE_IOU_FACTOR = True
 55 | REG_LOSS_MODE = None
 56 | ALPHA = 1.0
 57 | BETA = 1.0
 58 | 
 59 | BATCH_SIZE = 1
 60 | EPSILON = 1e-5
 61 | MOMENTUM = 0.9
 62 | LR = 5e-4
 63 | DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20]
 64 | MAX_ITERATION = SAVE_WEIGHTS_INTE*20
 65 | WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE)
 66 | 
 67 | # -------------------------------------------- Data_preprocess_config
 68 | DATASET_NAME = 'UCAS-AOD'  # 'pascal', 'coco'
 69 | PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 70 | PIXEL_MEAN_ = [0.485, 0.456, 0.406]
 71 | PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
 72 | IMG_SHORT_SIDE_LEN = [800, 400, 600, 1000, 1200]
 73 | IMG_MAX_LENGTH = 1200
 74 | CLASS_NUM = 2
 75 | OMEGA = 180 / 256.
 76 | ANGLE_MODE = 0
 77 | 
 78 | IMG_ROTATE = True
 79 | RGB2GRAY = True
 80 | VERTICAL_FLIP = True
 81 | HORIZONTAL_FLIP = True
 82 | IMAGE_PYRAMID = True
 83 | 
 84 | # --------------------------------------------- Network_config
 85 | SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
 86 | SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
 87 | PROBABILITY = 0.01
 88 | FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
 89 | WEIGHT_DECAY = 1e-4
 90 | USE_GN = False
 91 | FPN_CHANNEL = 256
 92 | 
 93 | # ---------------------------------------------Anchor config
 94 | LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
 95 | BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
 96 | ANCHOR_STRIDE = [8, 16, 32, 64, 128]
 97 | ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
 98 | ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.]
 99 | ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
100 | ANCHOR_SCALE_FACTORS = None
101 | USE_CENTER_OFFSET = True
102 | METHOD = 'H'
103 | USE_ANGLE_COND = False
104 | ANGLE_RANGE = 180  # 90 or 180
105 | 
106 | # --------------------------------------------RPN config
107 | SHARE_NET = True
108 | USE_P5 = True
109 | IOU_POSITIVE_THRESHOLD = 0.5
110 | IOU_NEGATIVE_THRESHOLD = 0.4
111 | REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7]
112 | REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6]
113 | 
114 | NMS = True
115 | NMS_IOU_THRESHOLD = 0.1
116 | MAXIMUM_DETECTIONS = 100
117 | FILTERED_SCORE = 0.05
118 | VIS_SCORE = 0.4
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/configs/__init__.py


--------------------------------------------------------------------------------
/libs/detection_oprations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/detection_oprations/__init__.py


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
15 | from libs.box_utils.iou_cpu import get_iou_matrix
16 | from libs.box_utils import bbox_transform
17 | from libs.box_utils.coordinate_convert import coordinate_present_convert
18 | 
19 | 
20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0):
21 | 
22 |     anchor_states = np.zeros((anchors.shape[0],))
23 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
24 |     if gt_boxes_r.shape[0]:
25 |         # [N, M]
26 | 
27 |         if cfgs.METHOD == 'H':
28 |             overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
29 |                                      np.ascontiguousarray(gt_boxes_h, dtype=np.float))
30 |         else:
31 |             overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
32 |                                      np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
33 | 
34 |             # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32),
35 |             #                           np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32))
36 | 
37 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
38 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
39 | 
40 |         # compute box regression targets
41 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
42 | 
43 |         if cfgs.USE_ANGLE_COND:
44 |             if cfgs.METHOD == 'R':
45 |                 delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
46 |                 theta_indices = delta_theta < 15
47 |                 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
48 |             else:
49 |                 positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
50 | 
51 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)
52 | 
53 |         else:
54 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
55 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
56 | 
57 |         anchor_states[ignore_indices] = -1
58 |         anchor_states[positive_indices] = 1
59 | 
60 |         # compute target class labels
61 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
62 |     else:
63 |         # no annotations? then everything is background
64 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
65 | 
66 |     if cfgs.METHOD == 'H':
67 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
68 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
69 |         h = anchors[:, 2] - anchors[:, 0] + 1
70 |         w = anchors[:, 3] - anchors[:, 1] + 1
71 |         theta = -90 * np.ones_like(x_c)
72 |         anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
73 | 
74 |     if cfgs.ANGLE_RANGE == 180:
75 |         anchors = coordinate_present_convert(anchors, mode=-1)
76 |         target_boxes = coordinate_present_convert(target_boxes, mode=-1)
77 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
78 | 
79 |     return np.array(labels, np.float32), np.array(target_delta, np.float32), \
80 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32)
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight_.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
15 | from libs.box_utils.iou_cpu import get_iou_matrix
16 | from libs.box_utils import bbox_transform
17 | from libs.box_utils.coordinate_convert import coordinate_present_convert
18 | 
19 | 
20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0):
21 | 
22 |     anchor_states = np.zeros((anchors.shape[0],))
23 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
24 |     if gt_boxes_r.shape[0]:
25 |         # [N, M]
26 | 
27 |         if cfgs.METHOD == 'H':
28 |             overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
29 |                                      np.ascontiguousarray(gt_boxes_h, dtype=np.float))
30 |         else:
31 |             overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
32 |                                      np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
33 | 
34 |             # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32),
35 |             #                           np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32))
36 | 
37 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
38 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
39 | 
40 |         # compute box regression targets
41 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
42 | 
43 |         if cfgs.USE_ANGLE_COND:
44 |             if cfgs.METHOD == 'R':
45 |                 delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
46 |                 theta_indices = delta_theta < 15
47 |                 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
48 |             else:
49 |                 positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
50 | 
51 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)
52 | 
53 |         else:
54 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
55 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
56 | 
57 |         anchor_states[ignore_indices] = -1
58 |         anchor_states[positive_indices] = 1
59 | 
60 |         # compute target class labels
61 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
62 |     else:
63 |         # no annotations? then everything is background
64 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
65 | 
66 |     if cfgs.METHOD == 'H':
67 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
68 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
69 |         h = anchors[:, 2] - anchors[:, 0] + 1
70 |         w = anchors[:, 3] - anchors[:, 1] + 1
71 |         theta = -90 * np.ones_like(x_c)
72 |         anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
73 | 
74 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
75 | 
76 |     return np.array(labels, np.float32), np.array(target_delta, np.float32), \
77 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32)
78 | 
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight_csl.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
15 | from libs.box_utils.iou_cpu import get_iou_matrix
16 | from libs.box_utils import bbox_transform
17 | from libs.box_utils.coordinate_convert import coordinate_present_convert
18 | 
19 | 
20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, gt_smooth_label, anchors, gpu_id=0):
21 | 
22 |     anchor_states = np.zeros((anchors.shape[0],))
23 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
24 |     if gt_boxes_r.shape[0]:
25 |         # [N, M]
26 | 
27 |         if cfgs.METHOD == 'H':
28 |             overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
29 |                                      np.ascontiguousarray(gt_boxes_h, dtype=np.float))
30 |         else:
31 |             overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
32 |                                      np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
33 | 
34 |             # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32),
35 |             #                           np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32))
36 | 
37 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
38 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
39 | 
40 |         # compute box regression targets
41 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
42 |         target_smooth_label = gt_smooth_label[argmax_overlaps_inds]
43 | 
44 |         if cfgs.USE_ANGLE_COND:
45 |             if cfgs.METHOD == 'R':
46 |                 delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
47 |                 theta_indices = delta_theta < 15
48 |                 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
49 |             else:
50 |                 positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
51 | 
52 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)
53 | 
54 |         else:
55 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
56 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
57 | 
58 |         anchor_states[ignore_indices] = -1
59 |         anchor_states[positive_indices] = 1
60 | 
61 |         # compute target class labels
62 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
63 |     else:
64 |         # no annotations? then everything is background
65 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
66 |         target_smooth_label = np.zeros((anchors.shape[0], gt_smooth_label.shape[1]))
67 | 
68 |     if cfgs.METHOD == 'H':
69 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
70 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
71 |         h = anchors[:, 2] - anchors[:, 0] + 1
72 |         w = anchors[:, 3] - anchors[:, 1] + 1
73 |         theta = -90 * np.ones_like(x_c)
74 |         anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
75 | 
76 |     if cfgs.ANGLE_RANGE == 180:
77 |         anchors = coordinate_present_convert(anchors, mode=-1)
78 |         target_boxes = coordinate_present_convert(target_boxes, mode=-1)
79 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
80 | 
81 |     return np.array(labels, np.float32), np.array(target_delta, np.float32), \
82 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32),\
83 |            np.array(target_smooth_label, np.float32)
84 | 
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight_dcl.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
15 | from libs.box_utils.iou_cpu import get_iou_matrix
16 | from libs.box_utils import bbox_transform
17 | from libs.box_utils.coordinate_convert import coordinate_present_convert
18 | 
19 | 
20 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, gt_encode_label, anchors, gpu_id=0):
21 | 
22 |     anchor_states = np.zeros((anchors.shape[0],))
23 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
24 |     if gt_boxes_r.shape[0]:
25 |         # [N, M]
26 | 
27 |         if cfgs.METHOD == 'H':
28 |             overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
29 |                                      np.ascontiguousarray(gt_boxes_h, dtype=np.float))
30 |         else:
31 |             overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
32 |                                      np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
33 | 
34 |             # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32),
35 |             #                           np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32))
36 | 
37 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
38 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
39 | 
40 |         # compute box regression targets
41 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
42 |         target_encode_label = gt_encode_label[argmax_overlaps_inds]
43 | 
44 |         if cfgs.USE_ANGLE_COND:
45 |             if cfgs.METHOD == 'R':
46 |                 delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
47 |                 theta_indices = delta_theta < 15
48 |                 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
49 |             else:
50 |                 positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
51 | 
52 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)
53 | 
54 |         else:
55 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
56 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
57 | 
58 |         anchor_states[ignore_indices] = -1
59 |         anchor_states[positive_indices] = 1
60 | 
61 |         # compute target class labels
62 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
63 |     else:
64 |         # no annotations? then everything is background
65 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
66 |         target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1]))
67 | 
68 |     if cfgs.METHOD == 'H':
69 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
70 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
71 |         h = anchors[:, 2] - anchors[:, 0] + 1
72 |         w = anchors[:, 3] - anchors[:, 1] + 1
73 |         theta = -90 * np.ones_like(x_c)
74 |         anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
75 | 
76 |     if cfgs.ANGLE_RANGE == 180:
77 |         anchors = coordinate_present_convert(anchors, mode=-1)
78 |         target_boxes = coordinate_present_convert(target_boxes, mode=-1)
79 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
80 | 
81 |     return np.array(labels, np.float32), np.array(target_delta[:, :-1], np.float32), \
82 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32), \
83 |            np.array(target_encode_label, np.float32)
84 | 
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight_dcl_batch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps
14 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
15 | from libs.box_utils import bbox_transform
16 | from libs.box_utils.coordinate_convert import coordinate_present_convert
17 | 
18 | 
19 | def anchor_target_layer(gt_boxes_h_batch, gt_boxes_r_batch, gt_encode_label_batch, anchor_batch, gpu_id=0):
20 | 
21 |     all_labels, all_target_delta, all_anchor_states, all_target_boxes, all_target_encode_label = [], [], [], [], []
22 |     for i in range(cfgs.BATCH_SIZE):
23 |         anchors = np.array(anchor_batch[i], np.float32)
24 |         gt_boxes_h = gt_boxes_h_batch[i, :, :]
25 |         gt_boxes_r = gt_boxes_r_batch[i, :, :]
26 |         gt_encode_label = gt_encode_label_batch[i, :, :]
27 |         anchor_states = np.zeros((anchors.shape[0],))
28 |         labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
29 |         if gt_boxes_r.shape[0]:
30 |             # [N, M]
31 | 
32 |             if cfgs.METHOD == 'H':
33 |                 overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
34 |                                          np.ascontiguousarray(gt_boxes_h, dtype=np.float))
35 |             else:
36 |                 overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
37 |                                          np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
38 | 
39 |             argmax_overlaps_inds = np.argmax(overlaps, axis=1)
40 |             max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
41 | 
42 |             # compute box regression targets
43 |             target_boxes = gt_boxes_r[argmax_overlaps_inds]
44 |             target_encode_label = gt_encode_label[argmax_overlaps_inds]
45 | 
46 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
47 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
48 | 
49 |             anchor_states[ignore_indices] = -1
50 |             anchor_states[positive_indices] = 1
51 | 
52 |             # compute target class labels
53 |             labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
54 |         else:
55 |             # no annotations? then everything is background
56 |             target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
57 |             target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1]))
58 | 
59 |         if cfgs.METHOD == 'H':
60 |             x_c = (anchors[:, 2] + anchors[:, 0]) / 2
61 |             y_c = (anchors[:, 3] + anchors[:, 1]) / 2
62 |             h = anchors[:, 2] - anchors[:, 0] + 1
63 |             w = anchors[:, 3] - anchors[:, 1] + 1
64 |             theta = -90 * np.ones_like(x_c)
65 |             anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
66 | 
67 |         if cfgs.ANGLE_RANGE == 180:
68 |             anchors = coordinate_present_convert(anchors, mode=-1)
69 |             target_boxes = coordinate_present_convert(target_boxes, mode=-1)
70 |         target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
71 | 
72 |         all_labels.append(labels)
73 |         all_target_delta.append(target_delta)
74 |         all_anchor_states.append(anchor_states)
75 |         all_target_boxes.append(target_boxes)
76 |         all_target_encode_label.append(target_encode_label)
77 | 
78 |     return np.array(all_labels, np.float32), np.array(all_target_delta, np.float32)[:, :, :-1], \
79 |            np.array(all_anchor_states, np.float32), np.array(all_target_boxes, np.float32), \
80 |            np.array(all_target_encode_label, np.float32)
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/anchor_target_layer_without_boxweight_win.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.iou import iou_calculate_np
14 | from libs.box_utils import bbox_transform
15 | from libs.box_utils.coordinate_convert import coordinate_present_convert
16 | 
17 | 
18 | def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0):
19 | 
20 |     anchor_states = np.zeros((anchors.shape[0],))
21 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
22 |     if gt_boxes_r.shape[0]:
23 |         # [N, M]
24 | 
25 |         if cfgs.METHOD == 'H':
26 |             overlaps = iou_calculate_np(np.ascontiguousarray(anchors, dtype=np.float),
27 |                                         np.ascontiguousarray(gt_boxes_h, dtype=np.float))
28 |         else:
29 |             raise Exception('Do not support mode=R in windows version')
30 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
31 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
32 | 
33 |         # compute box regression targets
34 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
35 | 
36 |         if cfgs.USE_ANGLE_COND:
37 |             if cfgs.METHOD == 'R':
38 |                 delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1])
39 |                 theta_indices = delta_theta < 15
40 |                 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices
41 |             else:
42 |                 positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
43 | 
44 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD)
45 | 
46 |         else:
47 |             positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD
48 |             ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices
49 | 
50 |         anchor_states[ignore_indices] = -1
51 |         anchor_states[positive_indices] = 1
52 | 
53 |         # compute target class labels
54 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
55 |     else:
56 |         # no annotations? then everything is background
57 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
58 | 
59 |     if cfgs.METHOD == 'H':
60 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
61 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
62 |         h = anchors[:, 2] - anchors[:, 0] + 1
63 |         w = anchors[:, 3] - anchors[:, 1] + 1
64 |         theta = -90 * np.ones_like(x_c)
65 |         anchors = np.vstack([x_c, y_c, w, h, theta]).transpose()
66 | 
67 |     if cfgs.ANGLE_RANGE == 180:
68 |         anchors = coordinate_present_convert(anchors, mode=-1)
69 |         target_boxes = coordinate_present_convert(target_boxes, mode=-1)
70 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes)
71 | 
72 |     return np.array(labels, np.float32), np.array(target_delta, np.float32), \
73 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32)
74 | 
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_opr.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | from libs.configs import cfgs
 3 | from libs.box_utils import bbox_transform
 4 | from libs.box_utils import nms_rotate
 5 | import tensorflow as tf
 6 | 
 7 | from libs.box_utils.coordinate_convert import coordinate_present_convert, coords_regular
 8 | 
 9 | 
10 | def filter_detections(boxes, scores, is_training):
11 |     """
12 |     :param boxes: [-1, 4]
13 |     :param scores: [-1, ]
14 |     :param labels: [-1, ]
15 |     :return:
16 |     """
17 |     if is_training:
18 |         indices = tf.reshape(tf.where(tf.greater(scores, cfgs.VIS_SCORE)), [-1, ])
19 |     else:
20 |         indices = tf.reshape(tf.where(tf.greater(scores, cfgs.FILTERED_SCORE)), [-1, ])
21 | 
22 |     if cfgs.NMS:
23 |         filtered_boxes = tf.gather(boxes, indices)
24 |         filtered_scores = tf.gather(scores, indices)
25 | 
26 |         # perform NMS
27 | 
28 |         nms_indices = nms_rotate.nms_rotate(decode_boxes=filtered_boxes,
29 |                                             scores=filtered_scores,
30 |                                             iou_threshold=cfgs.NMS_IOU_THRESHOLD,
31 |                                             max_output_size=100 if is_training else 1000,
32 |                                             use_angle_condition=False,
33 |                                             angle_threshold=15,
34 |                                             use_gpu=False)
35 | 
36 |         # filter indices based on NMS
37 |         indices = tf.gather(indices, nms_indices)
38 | 
39 |     # add indices to list of all indices
40 |     return indices
41 | 
42 | 
43 | def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training):
44 | 
45 |     if cfgs.METHOD == 'H':
46 |         x_c = (anchors[:, 2] + anchors[:, 0]) / 2
47 |         y_c = (anchors[:, 3] + anchors[:, 1]) / 2
48 |         h = anchors[:, 2] - anchors[:, 0] + 1
49 |         w = anchors[:, 3] - anchors[:, 1] + 1
50 |         theta = -90 * tf.ones_like(x_c)
51 |         anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
52 | 
53 |     if cfgs.ANGLE_RANGE == 180:
54 |         anchors = tf.py_func(coordinate_present_convert,
55 |                              inp=[anchors, -1],
56 |                              Tout=[tf.float32])
57 |         anchors = tf.reshape(anchors, [-1, 5])
58 | 
59 |     boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_bbox_pred)
60 | 
61 |     if cfgs.ANGLE_RANGE == 180:
62 |         # boxes_pred = tf.py_func(coords_regular,
63 |         #                         inp=[boxes_pred],
64 |         #                         Tout=[tf.float32])
65 |         # boxes_pred = tf.reshape(boxes_pred, [-1, 5])
66 | 
67 |         _, _, _, _, theta = tf.unstack(boxes_pred, axis=1)
68 |         indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ])
69 |         boxes_pred = tf.gather(boxes_pred, indx)
70 |         rpn_cls_prob = tf.gather(rpn_cls_prob, indx)
71 | 
72 |         boxes_pred = tf.py_func(coordinate_present_convert,
73 |                                 inp=[boxes_pred, 1],
74 |                                 Tout=[tf.float32])
75 |         boxes_pred = tf.reshape(boxes_pred, [-1, 5])
76 | 
77 |     return_boxes_pred = []
78 |     return_scores = []
79 |     return_labels = []
80 |     for j in range(0, cfgs.CLASS_NUM):
81 |         indices = filter_detections(boxes_pred, rpn_cls_prob[:, j], is_training)
82 |         tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5])
83 |         tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [-1, ])
84 | 
85 |         return_boxes_pred.append(tmp_boxes_pred)
86 |         return_scores.append(tmp_scores)
87 |         return_labels.append(tf.ones_like(tmp_scores)*(j+1))
88 | 
89 |     return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
90 |     return_scores = tf.concat(return_scores, axis=0)
91 |     return_labels = tf.concat(return_labels, axis=0)
92 | 
93 |     return return_boxes_pred, return_scores, return_labels
94 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/proposal_opr_.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | from libs.configs import cfgs
 3 | from libs.box_utils import bbox_transform
 4 | from libs.box_utils import nms_rotate
 5 | import tensorflow as tf
 6 | 
 7 | from libs.box_utils.coordinate_convert import coordinate_present_convert, coords_regular
 8 | 
 9 | 
10 | def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training, gpu_id):
11 | 
12 |     return_boxes_pred = []
13 |     return_scores = []
14 |     return_labels = []
15 |     for j in range(0, cfgs.CLASS_NUM):
16 |         scores = rpn_cls_prob[:, j]
17 |         if is_training:
18 |             indices = tf.reshape(tf.where(tf.greater(scores, cfgs.VIS_SCORE)), [-1, ])
19 |         else:
20 |             indices = tf.reshape(tf.where(tf.greater(scores, cfgs.FILTERED_SCORE)), [-1, ])
21 | 
22 |         anchors_ = tf.gather(anchors, indices)
23 |         rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices)
24 |         scores = tf.gather(scores, indices)
25 | 
26 |         if cfgs.METHOD == 'H':
27 |             x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2
28 |             y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2
29 |             h = anchors_[:, 2] - anchors_[:, 0] + 1
30 |             w = anchors_[:, 3] - anchors_[:, 1] + 1
31 |             theta = -90 * tf.ones_like(x_c)
32 |             anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
33 | 
34 |         if cfgs.ANGLE_RANGE == 180:
35 |             anchors_ = tf.py_func(coordinate_present_convert,
36 |                                   inp=[anchors_, -1],
37 |                                   Tout=[tf.float32])
38 |             anchors_ = tf.reshape(anchors_, [-1, 5])
39 | 
40 |         boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors_, deltas=rpn_bbox_pred_)
41 | 
42 |         if cfgs.ANGLE_RANGE == 180:
43 | 
44 |             _, _, _, _, theta = tf.unstack(boxes_pred, axis=1)
45 |             indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ])
46 |             boxes_pred = tf.gather(boxes_pred, indx)
47 |             scores = tf.gather(scores, indx)
48 | 
49 |             boxes_pred = tf.py_func(coordinate_present_convert,
50 |                                     inp=[boxes_pred, 1],
51 |                                     Tout=[tf.float32])
52 |             boxes_pred = tf.reshape(boxes_pred, [-1, 5])
53 | 
54 |         max_output_size = 4000 if 'DOTA' in cfgs.NET_NAME else 200
55 |         nms_indices = nms_rotate.nms_rotate(decode_boxes=boxes_pred,
56 |                                             scores=scores,
57 |                                             iou_threshold=cfgs.NMS_IOU_THRESHOLD,
58 |                                             max_output_size=100 if is_training else max_output_size,
59 |                                             use_angle_condition=False,
60 |                                             angle_threshold=15,
61 |                                             use_gpu=True,
62 |                                             gpu_id=gpu_id)
63 | 
64 |         tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5])
65 |         tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ])
66 | 
67 |         return_boxes_pred.append(tmp_boxes_pred)
68 |         return_scores.append(tmp_scores)
69 |         return_labels.append(tf.ones_like(tmp_scores)*(j+1))
70 | 
71 |     return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
72 |     return_scores = tf.concat(return_scores, axis=0)
73 |     return_labels = tf.concat(return_labels, axis=0)
74 | 
75 |     return return_boxes_pred, return_scores, return_labels
76 | 


--------------------------------------------------------------------------------
/libs/detection_oprations/refinebox_target_layer_without_boxweight_dcl.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from libs.configs import cfgs
12 | import numpy as np
13 | from libs.box_utils.rbbox_overlaps import rbbx_overlaps
14 | from libs.box_utils import bbox_transform
15 | 
16 | from libs.box_utils.coordinate_convert import coordinate_present_convert
17 | 
18 | 
19 | def refinebox_target_layer(gt_boxes_r, gt_encode_label, anchors, pos_threshold, neg_threshold, gpu_id=0):
20 | 
21 |     anchor_states = np.zeros((anchors.shape[0],))
22 |     labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM))
23 |     if gt_boxes_r.shape[0]:
24 |         # [N, M]
25 | 
26 |         # if cfgs.ANGLE_RANGE == 180:
27 |         #     gt_boxes_r_ = coordinate_present_convert(gt_boxes_r[:, :-1], 1)
28 |         #
29 |         #     overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
30 |         #                              np.ascontiguousarray(gt_boxes_r_, dtype=np.float32), gpu_id)
31 |         # else:
32 |         overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32),
33 |                                  np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id)
34 | 
35 |         # overlaps = np.clip(overlaps, 0.0, 1.0)
36 | 
37 |         argmax_overlaps_inds = np.argmax(overlaps, axis=1)
38 |         max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
39 | 
40 |         # compute box regression targets
41 |         target_boxes = gt_boxes_r[argmax_overlaps_inds]
42 |         target_encode_label = gt_encode_label[argmax_overlaps_inds]
43 | 
44 |         positive_indices = max_overlaps >= pos_threshold
45 |         ignore_indices = (max_overlaps > neg_threshold) & ~positive_indices
46 |         anchor_states[ignore_indices] = -1
47 |         anchor_states[positive_indices] = 1
48 | 
49 |         # compute target class labels
50 |         labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1
51 |     else:
52 |         # no annotations? then everything is background
53 |         target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1]))
54 |         target_encode_label = np.zeros((anchors.shape[0], gt_encode_label.shape[1]))
55 | 
56 |     if cfgs.ANGLE_RANGE == 180:
57 |         anchors = coordinate_present_convert(anchors, mode=-1)
58 |         target_boxes = coordinate_present_convert(target_boxes, mode=-1)
59 | 
60 |     target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes,
61 |                                                   scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
62 | 
63 |     return np.array(labels, np.float32), np.array(target_delta[:, :-1], np.float32), \
64 |            np.array(anchor_states, np.float32), np.array(target_boxes, np.float32), \
65 |            np.array(target_encode_label, np.float32)
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/libs/label_name_dict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/label_name_dict/__init__.py


--------------------------------------------------------------------------------
/libs/label_name_dict/coco_dict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | class_names = [
 6 |     'back_ground', 'person', 'bicycle', 'car', 'motorcycle',
 7 |     'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 8 |     'fire hydrant', 'stop sign', 'parking meter', 'bench',
 9 |     'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
10 |     'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
11 |     'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
12 |     'sports ball', 'kite', 'baseball bat', 'baseball glove',
13 |     'skateboard', 'surfboard', 'tennis racket', 'bottle',
14 |     'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
15 |     'banana', 'apple', 'sandwich', 'orange', 'broccoli',
16 |     'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
17 |     'couch', 'potted plant', 'bed', 'dining table', 'toilet',
18 |     'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
19 |     'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
20 |     'book', 'clock', 'vase', 'scissors', 'teddy bear',
21 |     'hair drier', 'toothbrush']
22 | 
23 | 
24 | classes_originID = {
25 |     'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4,
26 |     'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9,
27 |     'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13,
28 |     'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
29 |     'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22,
30 |     'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27,
31 |     'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
32 |     'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37,
33 |     'kite': 38, 'baseball bat': 39, 'baseball glove': 40,
34 |     'skateboard': 41, 'surfboard': 42, 'tennis racket': 43,
35 |     'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48,
36 |     'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53,
37 |     'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57,
38 |     'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61,
39 |     'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65,
40 |     'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73,
41 |     'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77,
42 |     'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
43 |     'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86,
44 |     'scissors': 87, 'teddy bear': 88, 'hair drier': 89,
45 |     'toothbrush': 90}
46 | 
47 | originID_classes = {item: key for key, item in classes_originID.items()}
48 | NAME_LABEL_MAP = dict(zip(class_names, range(len(class_names))))
49 | LABEL_NAME_MAP = dict(zip(range(len(class_names)), class_names))
50 | 
51 | # print (originID_classes)
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/libs/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/losses/__init__.py


--------------------------------------------------------------------------------
/libs/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/__init__.py


--------------------------------------------------------------------------------
/libs/networks/efficientnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/efficientnet/__init__.py


--------------------------------------------------------------------------------
/libs/networks/efficientnet/panda.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/efficientnet/panda.jpg


--------------------------------------------------------------------------------
/libs/networks/efficientnet/test.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import tensorflow as tf
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.append('../../..')
 7 | from libs.networks.efficientnet import efficientnet_builder
 8 | 
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = '2'
10 | 
11 | def restore_model(sess, ckpt_dir):
12 |     """Restore variables from checkpoint dir."""
13 |     checkpoint = tf.train.latest_checkpoint(ckpt_dir)
14 |     ema = tf.train.ExponentialMovingAverage(decay=0.9999)
15 |     ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars')
16 |     for v in tf.global_variables():
17 |         if 'moving_mean' in v.name or 'moving_variance' in v.name:
18 |             ema_vars.append(v)
19 |     ema_vars = list(set(ema_vars))
20 |     var_dict = ema.variables_to_restore(ema_vars)
21 |     saver = tf.train.Saver(max_to_keep=1)
22 |     saver.restore(sess, checkpoint)
23 | 
24 | 
25 | images = cv2.imread('/data/yangxue/code/R3Det_Tensorflow/libs/networks/efficientnet/panda.jpg')
26 | images = cv2.resize(images, (112, 112))
27 | images = tf.expand_dims(tf.constant(images, tf.float32), axis=0)
28 | features, endpoints = efficientnet_builder.build_model_base(images, 'efficientnet-b0', training=True)
29 | print(endpoints.keys())
30 | 
31 | init_op = tf.group(
32 |             tf.global_variables_initializer(),
33 |             tf.local_variables_initializer()
34 |         )
35 | 
36 | tfconfig = tf.ConfigProto(
37 |     allow_soft_placement=True, log_device_placement=False)
38 | tfconfig.gpu_options.allow_growth = True
39 | with tf.Session(config=tfconfig) as sess:
40 |     sess.run(init_op)
41 |     restore_model(sess, '/data/yangxue/code/R3Det_Tensorflow/libs/networks/efficientnet/efficientnet-b0')
42 |     features_, endpoints_ = sess.run([features, endpoints])
43 |     print(endpoints['reduction_1'])
44 |     print(endpoints['reduction_2'])
45 |     print(endpoints['reduction_3'])
46 |     print(endpoints['reduction_4'])
47 |     print(endpoints['reduction_5'])


--------------------------------------------------------------------------------
/libs/networks/mobilenet/README.md:
--------------------------------------------------------------------------------
 1 | # Mobilenet V2
 2 | This folder contains building code for Mobilenet V2, based on
 3 | [Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation]
 4 | (https://arxiv.org/abs/1801.04381)
 5 | 
 6 | # Pretrained model
 7 | TODO
 8 | 
 9 | # Example
10 | TODO
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/libs/networks/mobilenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/mobilenet/__init__.py


--------------------------------------------------------------------------------
/libs/networks/opts.py:
--------------------------------------------------------------------------------
 1 | # -*-coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def norm(x, norm_type, is_train, name, G=32, esp=1e-5):
 9 |     with tf.variable_scope('{}_norm_{}'.format(norm_type, name)):
10 |         if norm_type == 'none':
11 |             output = x
12 |         elif norm_type == 'batch':
13 |             output = tf.contrib.layers.batch_norm(
14 |                 x, center=True, scale=True, decay=0.999,
15 |                 is_training=is_train, updates_collections=None
16 |             )
17 |         elif norm_type == 'group':
18 |             # normalize
19 |             # tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper
20 |             x = tf.transpose(x, [0, 3, 1, 2])
21 |             N, C, H, W = x.get_shape().as_list()
22 |             G = min(G, C)
23 |             x = tf.reshape(x, [N, G, C // G, H, W])
24 |             mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
25 |             x = (x - mean) / tf.sqrt(var + esp)
26 |             # per channel gamma and beta
27 |             gamma = tf.get_variable('gamma', [C],
28 |                                     initializer=tf.constant_initializer(1.0))
29 |             beta = tf.get_variable('beta', [C],
30 |                                    initializer=tf.constant_initializer(0.0))
31 |             gamma = tf.reshape(gamma, [1, C, 1, 1])
32 |             beta = tf.reshape(beta, [1, C, 1, 1])
33 | 
34 |             output = tf.reshape(x, [N, C, H, W]) * gamma + beta
35 |             # tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper
36 |             output = tf.transpose(output, [0, 2, 3, 1])
37 |         else:
38 |             raise NotImplementedError
39 |         return output
40 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings all inception models under one namespace."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | from nets.inception_resnet_v2 import inception_resnet_v2_base
25 | from nets.inception_v1 import inception_v1
26 | from nets.inception_v1 import inception_v1_arg_scope
27 | from nets.inception_v1 import inception_v1_base
28 | from nets.inception_v2 import inception_v2
29 | from nets.inception_v2 import inception_v2_arg_scope
30 | from nets.inception_v2 import inception_v2_base
31 | from nets.inception_v3 import inception_v3
32 | from nets.inception_v3 import inception_v3_arg_scope
33 | from nets.inception_v3 import inception_v3_base
34 | from nets.inception_v4 import inception_v4
35 | from nets.inception_v4 import inception_v4_arg_scope
36 | from nets.inception_v4 import inception_v4_base
37 | # pylint: enable=unused-import
38 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/inception_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 | 
17 | Usage of arg scope:
18 |   with slim.arg_scope(inception_arg_scope()):
19 |     logits, end_points = inception.inception_v3(images, num_classes,
20 |                                                 is_training=is_training)
21 | 
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def inception_arg_scope(weight_decay=0.00004,
33 |                         use_batch_norm=True,
34 |                         batch_norm_decay=0.9997,
35 |                         batch_norm_epsilon=0.001):
36 |   """Defines the default arg scope for inception models.
37 | 
38 |   Args:
39 |     weight_decay: The weight decay to use for regularizing the model.
40 |     use_batch_norm: "If `True`, batch_norm is applied after each convolution.
41 |     batch_norm_decay: Decay for batch norm moving average.
42 |     batch_norm_epsilon: Small float added to variance to avoid dividing by zero
43 |       in batch norm.
44 | 
45 |   Returns:
46 |     An `arg_scope` to use for the inception models.
47 |   """
48 |   batch_norm_params = {
49 |       # Decay for the moving averages.
50 |       'decay': batch_norm_decay,
51 |       # epsilon to prevent 0s in variance.
52 |       'epsilon': batch_norm_epsilon,
53 |       # collection containing update_ops.
54 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
55 |   }
56 |   if use_batch_norm:
57 |     normalizer_fn = slim.batch_norm
58 |     normalizer_params = batch_norm_params
59 |   else:
60 |     normalizer_fn = None
61 |     normalizer_params = {}
62 |   # Set weight_decay for weights in Conv and FC layers.
63 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
64 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
65 |     with slim.arg_scope(
66 |         [slim.conv2d],
67 |         weights_initializer=slim.variance_scaling_initializer(),
68 |         activation_fn=tf.nn.relu,
69 |         normalizer_fn=normalizer_fn,
70 |         normalizer_params=normalizer_params) as sc:
71 |       return sc
72 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/lenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the LeNet model definition."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def lenet(images, num_classes=10, is_training=False,
27 |           dropout_keep_prob=0.5,
28 |           prediction_fn=slim.softmax,
29 |           scope='LeNet'):
30 |   """Creates a variant of the LeNet model.
31 | 
32 |   Note that since the output is a set of 'logits', the values fall in the
33 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
34 |   probability distribution over the characters, one will need to convert them
35 |   using the softmax function:
36 | 
37 |         logits = lenet.lenet(images, is_training=False)
38 |         probabilities = tf.nn.softmax(logits)
39 |         predictions = tf.argmax(logits, 1)
40 | 
41 |   Args:
42 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
43 |     num_classes: the number of classes in the dataset.
44 |     is_training: specifies whether or not we're currently training the model.
45 |       This variable will determine the behaviour of the dropout layer.
46 |     dropout_keep_prob: the percentage of activation values that are retained.
47 |     prediction_fn: a function to get predictions out of logits.
48 |     scope: Optional variable_scope.
49 | 
50 |   Returns:
51 |     logits: the pre-softmax activations, a tensor of size
52 |       [batch_size, `num_classes`]
53 |     end_points: a dictionary from components of the network to the corresponding
54 |       activation.
55 |   """
56 |   end_points = {}
57 | 
58 |   with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
59 |     net = slim.conv2d(images, 32, [5, 5], scope='conv1')
60 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
61 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
62 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
63 |     net = slim.flatten(net)
64 |     end_points['Flatten'] = net
65 | 
66 |     net = slim.fully_connected(net, 1024, scope='fc3')
67 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
68 |                        scope='dropout3')
69 |     logits = slim.fully_connected(net, num_classes, activation_fn=None,
70 |                                   scope='fc4')
71 | 
72 |   end_points['Logits'] = logits
73 |   end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
74 | 
75 |   return logits, end_points
76 | lenet.default_image_size = 28
77 | 
78 | 
79 | def lenet_arg_scope(weight_decay=0.0):
80 |   """Defines the default lenet argument scope.
81 | 
82 |   Args:
83 |     weight_decay: The weight decay to use for regularizing the model.
84 | 
85 |   Returns:
86 |     An `arg_scope` to use for the inception v3 model.
87 |   """
88 |   with slim.arg_scope(
89 |       [slim.conv2d, slim.fully_connected],
90 |       weights_regularizer=slim.l2_regularizer(weight_decay),
91 |       weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
92 |       activation_fn=tf.nn.relu) as sc:
93 |     return sc
94 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.md:
--------------------------------------------------------------------------------
 1 | # MobileNet_v1
 2 | 
 3 | [MobileNets](https://arxiv.org/abs/1704.04861) are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. MobileNets can be run efficiently on mobile devices with [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
 4 | 
 5 | MobileNets trade off between latency, size and accuracy while comparing favorably with popular models from the literature.
 6 | 
 7 | ![alt text](mobilenet_v1.png "MobileNet Graph")
 8 | 
 9 | # Pre-trained Models
10 | 
11 | Choose the right MobileNet model to fit your latency and size budget. The size of the network in memory and on disk is proportional to the number of parameters. The latency and power usage of the network scales with the number of Multiply-Accumulates (MACs) which measures the number of fused Multiplication and Addition operations. These MobileNet models have been trained on the
12 | [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/)
13 | image classification dataset. Accuracies were computed by evaluating using a single image crop.
14 | 
15 | Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy |
16 | :----:|:------------:|:----------:|:-------:|:-------:|
17 | [MobileNet_v1_1.0_224](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|569|4.24|70.7|89.5|
18 | [MobileNet_v1_1.0_192](http://download.tensorflow.org/models/mobilenet_v1_1.0_192_2017_06_14.tar.gz)|418|4.24|69.3|88.9|
19 | [MobileNet_v1_1.0_160](http://download.tensorflow.org/models/mobilenet_v1_1.0_160_2017_06_14.tar.gz)|291|4.24|67.2|87.5|
20 | [MobileNet_v1_1.0_128](http://download.tensorflow.org/models/mobilenet_v1_1.0_128_2017_06_14.tar.gz)|186|4.24|64.1|85.3|
21 | [MobileNet_v1_0.75_224](http://download.tensorflow.org/models/mobilenet_v1_0.75_224_2017_06_14.tar.gz)|317|2.59|68.4|88.2|
22 | [MobileNet_v1_0.75_192](http://download.tensorflow.org/models/mobilenet_v1_0.75_192_2017_06_14.tar.gz)|233|2.59|67.4|87.3|
23 | [MobileNet_v1_0.75_160](http://download.tensorflow.org/models/mobilenet_v1_0.75_160_2017_06_14.tar.gz)|162|2.59|65.2|86.1|
24 | [MobileNet_v1_0.75_128](http://download.tensorflow.org/models/mobilenet_v1_0.75_128_2017_06_14.tar.gz)|104|2.59|61.8|83.6|
25 | [MobileNet_v1_0.50_224](http://download.tensorflow.org/models/mobilenet_v1_0.50_224_2017_06_14.tar.gz)|150|1.34|64.0|85.4|
26 | [MobileNet_v1_0.50_192](http://download.tensorflow.org/models/mobilenet_v1_0.50_192_2017_06_14.tar.gz)|110|1.34|62.1|84.0|
27 | [MobileNet_v1_0.50_160](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|77|1.34|59.9|82.5|
28 | [MobileNet_v1_0.50_128](http://download.tensorflow.org/models/mobilenet_v1_0.50_128_2017_06_14.tar.gz)|49|1.34|56.2|79.6|
29 | [MobileNet_v1_0.25_224](http://download.tensorflow.org/models/mobilenet_v1_0.25_224_2017_06_14.tar.gz)|41|0.47|50.6|75.0|
30 | [MobileNet_v1_0.25_192](http://download.tensorflow.org/models/mobilenet_v1_0.25_192_2017_06_14.tar.gz)|34|0.47|49.0|73.6|
31 | [MobileNet_v1_0.25_160](http://download.tensorflow.org/models/mobilenet_v1_0.25_160_2017_06_14.tar.gz)|21|0.47|46.0|70.7|
32 | [MobileNet_v1_0.25_128](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|14|0.47|41.3|66.2|
33 | 
34 | 
35 | Here is an example of how to download the MobileNet_v1_1.0_224 checkpoint:
36 | 
37 | ```shell
38 | $ CHECKPOINT_DIR=/tmp/checkpoints
39 | $ mkdir ${CHECKPOINT_DIR}
40 | $ wget http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz
41 | $ tar -xvf mobilenet_v1_1.0_224_2017_06_14.tar.gz
42 | $ mv mobilenet_v1_1.0_224.ckpt.* ${CHECKPOINT_DIR}
43 | $ rm mobilenet_v1_1.0_224_2017_06_14.tar.gz
44 | ```
45 | More information on integrating MobileNets into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/slim/README.md).
46 | 
47 | To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
48 | 


--------------------------------------------------------------------------------
/libs/networks/slim_nets/mobilenet_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/networks/slim_nets/mobilenet_v1.png


--------------------------------------------------------------------------------
/libs/networks/slim_nets/nets_factory_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for slim.inception."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | import tensorflow as tf
23 | 
24 | from nets import nets_factory
25 | 
26 | slim = tf.contrib.slim
27 | 
28 | 
29 | class NetworksTest(tf.test.TestCase):
30 | 
31 |   def testGetNetworkFn(self):
32 |     batch_size = 5
33 |     num_classes = 1000
34 |     for net in nets_factory.networks_map:
35 |       with self.test_session():
36 |         net_fn = nets_factory.get_network_fn(net, num_classes)
37 |         # Most networks use 224 as their default_image_size
38 |         image_size = getattr(net_fn, 'default_image_size', 224)
39 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
40 |         logits, end_points = net_fn(inputs)
41 |         self.assertTrue(isinstance(logits, tf.Tensor))
42 |         self.assertTrue(isinstance(end_points, dict))
43 |         self.assertEqual(logits.get_shape().as_list()[0], batch_size)
44 |         self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
45 | 
46 |   def testGetNetworkFnArgScope(self):
47 |     batch_size = 5
48 |     num_classes = 10
49 |     net = 'cifarnet'
50 |     with self.test_session(use_gpu=True):
51 |       net_fn = nets_factory.get_network_fn(net, num_classes)
52 |       image_size = getattr(net_fn, 'default_image_size', 224)
53 |       with slim.arg_scope([slim.model_variable, slim.variable],
54 |                           device='/CPU:0'):
55 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
56 |         net_fn(inputs)
57 |       weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0]
58 |       self.assertDeviceEqual('/CPU:0', weights.device)
59 | 
60 | if __name__ == '__main__':
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------
/libs/val_libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/libs/val_libs/__init__.py


--------------------------------------------------------------------------------
/output/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/output/.DS_Store


--------------------------------------------------------------------------------
/output/trained_weights/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/output/trained_weights/.DS_Store


--------------------------------------------------------------------------------
/output/trained_weights/README.md:
--------------------------------------------------------------------------------
1 | Please download trained model by this project, then put it here.


--------------------------------------------------------------------------------
/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/pipeline.png


--------------------------------------------------------------------------------
/scalars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/scalars.png


--------------------------------------------------------------------------------
/tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/tools/.DS_Store


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinklab-SJTU/DCL_RetinaNet_Tensorflow/1d14c9800c3eb1975e8832978f7a263783d171ec/tools/__init__.py


--------------------------------------------------------------------------------
/tsne/tsne.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.tensorboard.plugins import projector
 5 | from tensorflow.examples.tutorials.mnist import input_data
 6 | import sys
 7 | sys.path.append("../")
 8 | 
 9 | from libs.configs import cfgs
10 | 
11 | 
12 | LOG_DIR = './dcl_log/{}'.format(cfgs.VERSION)
13 | SPRITE_FILE = 'dota_sprite.jpg'
14 | META_FIEL = "dcl_meta.tsv"
15 | TENSOR_NAME = "FINAL_LOGITS"
16 | 
17 | 
18 | # 生成可视化最终输出层向量所需要的日志文件
19 | def visualisation(final_result):
20 |     # 使用一个新的变量来保存最终输出层向量的结果，因为embedding是通过Tensorflow中变量完成的，所以PROJECTOR可视化的都是TensorFlow中的变哇。
21 |     # 所以这里需要新定义一个变量来保存输出层向量的取值
22 |     y = tf.Variable(final_result, name=TENSOR_NAME)
23 |     summary_writer = tf.summary.FileWriter(LOG_DIR)
24 | 
25 |     # 通过project.ProjectorConfig类来帮助生成日志文件
26 |     config = projector.ProjectorConfig()
27 |     # 增加一个需要可视化的bedding结果
28 |     embedding = config.embeddings.add()
29 |     # 指定这个embedding结果所对应的Tensorflow变量名称
30 |     embedding.tensor_name = y.name
31 | 
32 |     # Specify where you find the metadata
33 |     # 指定embedding结果所对应的原始数据信息。比如这里指定的就是每一张MNIST测试图片对应的真实类别。在单词向量中可以是单词ID对应的单词。
34 |     # 这个文件是可选的，如果没有指定那么向量就没有标签。
35 |     embedding.metadata_path = META_FIEL
36 | 
37 |     # Specify where you find the sprite (we will create this later)
38 |     # 指定sprite 图像。这个也是可选的，如果没有提供sprite 图像，那么可视化的结果
39 |     # 每一个点就是一个小困点，而不是具体的图片。
40 |     # embedding.sprite.image_path = SPRITE_FILE
41 |     # 在提供sprite图像时，通过single_image_dim可以指定单张图片的大小。
42 |     # 这将用于从sprite图像中截取正确的原始图片。
43 |     # embedding.sprite.single_image_dim.extend([28, 28])
44 | 
45 |     # Say that you want to visualise the embeddings
46 |     # 将PROJECTOR所需要的内容写入日志文件。
47 |     projector.visualize_embeddings(summary_writer, config)
48 | 
49 |     # 生成会话，初始化新声明的变量并将需要的日志信息写入文件。
50 |     sess = tf.InteractiveSession()
51 |     sess.run(tf.global_variables_initializer())
52 |     saver = tf.train.Saver()
53 |     saver.save(sess, os.path.join(LOG_DIR, "model"))
54 | 
55 |     summary_writer.close()
56 | 
57 | 
58 | # 主函数先调用模型训练的过程，再使用训练好的模型来处理MNIST测试数据，
59 | # 最后将得到的输出层矩阵输出到PROJECTOR需要的日志文件中。
60 | def main(argv=None):
61 | 
62 |     final_result = np.load(os.path.join(LOG_DIR, "final_logits.npy")) # [:25000, :]
63 |     print(final_result.shape)
64 |     final_result = tf.constant(final_result, tf.float32)
65 |     visualisation(final_result)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     os.environ["CUDA_VISIBLE_DEVICES"] = '3'
70 |     main()


--------------------------------------------------------------------------------
/tsne/tsv_radius.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | sys.path.append("../")
 5 | 
 6 | from libs.configs import cfgs
 7 | 
 8 | 
 9 | scr_tsv = './dcl_log/{}/dcl_meta.tsv'.format(cfgs.VERSION)
10 | omega = 180 / 4
11 | 
12 | 
13 | fr = open(scr_tsv, 'r')
14 | lines = fr.readlines()
15 | fr.close()
16 | 
17 | fw_tsv = open(os.path.join('dcl_log/{}'.format(cfgs.VERSION), 'dcl_meta_{}.tsv'.format(omega)), 'w')
18 | # fw_tsv.write("Index\tLabel\n")
19 | for ii, l in enumerate(lines):
20 |     index = int(l.split('\t')[-1].split('\n')[0]) // (omega + 5e-5)
21 |     # index = min(int(l.split('\t')[-1].split('\n')[0]) // radius, 89)
22 |     fw_tsv.write("%.1f\n" % (index * omega))
23 | fw_tsv.close()
24 | 


--------------------------------------------------------------------------------