├── Faster-RCNN_TF ├── LICENSE ├── data │ ├── __init__.py │ ├── cache │ │ └── voc_2007_trainval_gt_roidb.pkl │ └── demo │ │ ├── 101.jpg │ │ ├── 102.jpg │ │ ├── 103.jpg │ │ ├── 104.jpg │ │ └── 105.jpg ├── experiments │ ├── README.md │ ├── cfgs │ │ ├── faster_rcnn_alt_opt.yml │ │ ├── faster_rcnn_end2end.yml │ │ └── kitti_rcnn.yml │ ├── logs │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_16-31-08 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_16-54-38 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_17-05-00 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_19-25-31 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_19-34-14 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-24_19-54-43 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_11-59-45 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-18 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-55 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-33-04 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-34-26 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-36-25 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-40-36 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-42-51 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-44-28 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-47-17 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-48-30 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-55-42 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-56-58 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-15-59 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-17-59 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-19-36 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-21-01 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-22-27 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-23-51 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-41-13 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-13-14 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-14-37 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-15-14 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-16-25 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-23-25 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-23-43 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-24-22 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-31-50 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-35-26 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-39-17 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-39-29 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-19 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-56 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-45-13 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-45-50 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-46-53 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-50-47 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-52-47 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-55-33 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-57-56 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-00-36 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-05-57 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-10-19 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-15-38 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-19-09 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_13-28-31 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-37-05 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-37-47 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-47-14 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-48-02 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-59-31 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-26_15-04-16 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-27_20-52-13 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-27_21-03-47 │ │ ├── faster_rcnn_end2end_VGG16_.txt.2017-12-27_21-12-42 │ │ └── faster_rcnn_end2end_VGG16_.txt.2018-01-12_16-06-13 │ └── scripts │ │ └── faster_rcnn_end2end.sh ├── lib │ ├── Makefile │ ├── datasets │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── coco.py │ │ ├── ds_utils.py │ │ ├── ds_utils.pyc │ │ ├── factory.py │ │ ├── factory.pyc │ │ ├── imagenet3d.py │ │ ├── imagenet3d.pyc │ │ ├── imdb.py │ │ ├── imdb.pyc │ │ ├── imdb2.py │ │ ├── kitti.py │ │ ├── kitti.pyc │ │ ├── kitti_tracking.py │ │ ├── kitti_tracking.pyc │ │ ├── nissan.py │ │ ├── nissan.pyc │ │ ├── nthu.py │ │ ├── nthu.pyc │ │ ├── pascal3d.py │ │ ├── pascal3d.pyc │ │ ├── pascal_voc.py │ │ ├── pascal_voc.pyc │ │ ├── pascal_voc2.py │ │ ├── voc_eval.py │ │ └── voc_eval.pyc │ ├── fast_rcnn │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox_transform.py │ │ ├── bbox_transform.pyc │ │ ├── config.py │ │ ├── config.pyc │ │ ├── nms_wrapper.py │ │ ├── nms_wrapper.pyc │ │ ├── test.py │ │ ├── test.pyc │ │ ├── train.py │ │ └── train.pyc │ ├── gt_data_layer │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── layer.py │ │ ├── minibatch.py │ │ ├── roidb.py │ │ └── roidb.pyc │ ├── make.sh │ ├── networks │ │ ├── .VGGnet.py.swo │ │ ├── VGGnet_test.py │ │ ├── VGGnet_test.pyc │ │ ├── VGGnet_train.py │ │ ├── VGGnet_train.pyc │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── factory.py │ │ ├── factory.pyc │ │ ├── network.py │ │ └── network.pyc │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── cpu_nms.pyx │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── nms_kernel.cu │ │ └── py_cpu_nms.py │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── layer.py │ │ ├── layer.pyc │ │ ├── minibatch.py │ │ ├── minibatch.pyc │ │ ├── minibatch2.py │ │ ├── roidb.py │ │ ├── roidb.pyc │ │ └── roidb2.py │ ├── roi_pooling_layer │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── roi_pooling.so │ │ ├── roi_pooling_op.cc │ │ ├── roi_pooling_op.cu.o │ │ ├── roi_pooling_op.py │ │ ├── roi_pooling_op.pyc │ │ ├── roi_pooling_op_gpu.cu.cc │ │ ├── roi_pooling_op_gpu.h │ │ ├── roi_pooling_op_grad.py │ │ ├── roi_pooling_op_grad.pyc │ │ ├── roi_pooling_op_test.py │ │ └── work_sharder.h │ ├── rpn_msr │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── anchor_target_layer.py │ │ ├── anchor_target_layer_tf.py │ │ ├── anchor_target_layer_tf.pyc │ │ ├── generate.py │ │ ├── generate.pyc │ │ ├── generate_anchors.py │ │ ├── generate_anchors.pyc │ │ ├── proposal_layer.py │ │ ├── proposal_layer_tf.py │ │ ├── proposal_layer_tf.pyc │ │ ├── proposal_target_layer_tf.py │ │ └── proposal_target_layer_tf.pyc │ ├── setup.py │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── blob.pyc │ │ ├── boxes_grid.py │ │ ├── boxes_grid.pyc │ │ ├── nms.py │ │ ├── nms.pyx │ │ ├── timer.py │ │ └── timer.pyc └── tools │ ├── _init_paths.py │ ├── _init_paths.pyc │ ├── demo.py │ ├── test_net.py │ └── train_net.py ├── README.md ├── generate_image.py ├── generate_maintxt.py ├── result ├── result166.jpg ├── result195.jpg ├── result202.jpg ├── test.py └── thumbnial.jpg └── xml.py /Faster-RCNN_TF/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Fu-Hsiang Chan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/__init__.py -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/demo/101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/demo/101.jpg -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/demo/102.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/demo/102.jpg -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/demo/103.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/demo/103.jpg -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/demo/104.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/demo/104.jpg -------------------------------------------------------------------------------- /Faster-RCNN_TF/data/demo/105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/data/demo/105.jpg -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/README.md: -------------------------------------------------------------------------------- 1 | Scripts are under `experiments/scripts`. 2 | 3 | Each script saves a log file under `experiments/logs`. 4 | 5 | Configuration override files used in the experiments are stored in `experiments/cfgs`. 6 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/cfgs/faster_rcnn_alt_opt.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_alt_opt 2 | TRAIN: 3 | BG_THRESH_LO: 0.0 4 | TEST: 5 | HAS_RPN: True 6 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/cfgs/faster_rcnn_end2end.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_end2end 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | TEST: 11 | HAS_RPN: True 12 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/cfgs/kitti_rcnn.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: kitti 2 | IS_RPN: False 3 | IS_MULTISCALE: True 4 | IS_EXTRAPOLATING: True 5 | REGION_PROPOSAL: 'RPN' 6 | TRAIN: 7 | LEARNING_RATE: 0.001 8 | MOMENTUM: 0.9 9 | GAMMA: 0.1 10 | STEPSIZE: 30000 11 | SCALES_BASE: !!python/tuple [1.0, 2.0, 3.0, 4.0] 12 | NUM_PER_OCTAVE: 4 13 | IMS_PER_BATCH: 2 14 | FG_FRACTION: 0.25 15 | FG_THRESH: !!python/tuple [0.7, 0.5, 0.5] 16 | BG_THRESH_HI: !!python/tuple [0.7, 0.5, 0.5] 17 | BG_THRESH_LO: !!python/tuple [0.1, 0.1, 0.1] 18 | BBOX_THRESH: !!python/tuple [0.7, 0.5, 0.5] 19 | ROI_THRESHOLD: 0.01 20 | SNAPSHOT_ITERS: 10000 21 | SNAPSHOT_INFIX: kitti 22 | SNAPSHOT_PREFIX: caffenet_fast_rcnn 23 | TEST: 24 | SCALES_BASE: !!python/tuple [1.0, 2.0, 3.0, 4.0] 25 | NUM_PER_OCTAVE: 4 26 | NMS: 0.5 27 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-18: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-18 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-18 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-55: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-55 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-31-55 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-34-26: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-34-26 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-34-26 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-40-36: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-40-36 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-40-36 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-44-28: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-44-28 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-44-28 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-47-17: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-47-17 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-47-17 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-48-30: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-48-30 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_20-48-30 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 20000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=20000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-15-59: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-15-59 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-15-59 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | Traceback (most recent call last): 85 | File "./tools/train_net.py", line 83, in 86 | roidb = get_training_roidb(imdb) 87 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 88 | imdb.append_flipped_images() 89 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 106, in append_flipped_images 90 | widths = self._get_widths() 91 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 102, in _get_widths 92 | for i in xrange(self.num_images)] 93 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 60, in image_path_at 94 | return self.image_path_from_index(self._image_index[i]) 95 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 69, in image_path_from_index 96 | 'Path does not exist: {}'.format(image_path) 97 | AssertionError: Path does not exist: /home/alex/Faster-RCNN_TF/data/VOCdevkit2007/VOC2007/JPEGImages/169.jpg 98 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-17-59: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-17-59 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-17-59 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-19-36: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-19-36 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-19-36 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-21-01: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-21-01 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-21-01 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-22-27: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-22-27 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-25_21-22-27 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-14-37: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-14-37 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-14-37 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-15-14: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-15-14 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-15-14 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-23-25: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-23-25 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-23-25 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 70000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=70000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-39-17: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-39-17 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-39-17 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 100 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=100, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-19: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-19 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-19 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 100 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=100, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | Traceback (most recent call last): 85 | File "./tools/train_net.py", line 83, in 86 | roidb = get_training_roidb(imdb) 87 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 88 | imdb.append_flipped_images() 89 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 106, in append_flipped_images 90 | widths = self._get_widths() 91 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 102, in _get_widths 92 | for i in xrange(self.num_images)] 93 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 60, in image_path_at 94 | return self.image_path_from_index(self._image_index[i]) 95 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 69, in image_path_from_index 96 | 'Path does not exist: {}'.format(image_path) 97 | AssertionError: Path does not exist: /home/alex/Faster-RCNN_TF/data/VOCdevkit2007/VOC2007/JPEGImages/.jpg 98 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-56: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-56 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-43-56 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 100 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=100, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | Traceback (most recent call last): 85 | File "./tools/train_net.py", line 83, in 86 | roidb = get_training_roidb(imdb) 87 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 88 | imdb.append_flipped_images() 89 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 106, in append_flipped_images 90 | widths = self._get_widths() 91 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 102, in _get_widths 92 | for i in xrange(self.num_images)] 93 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 60, in image_path_at 94 | return self.image_path_from_index(self._image_index[i]) 95 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 69, in image_path_from_index 96 | 'Path does not exist: {}'.format(image_path) 97 | AssertionError: Path does not exist: /home/alex/Faster-RCNN_TF/data/VOCdevkit2007/VOC2007/JPEGImages/.jpg 98 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-45-50: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-45-50 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_12-45-50 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 100 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=100, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | Traceback (most recent call last): 85 | File "./tools/train_net.py", line 83, in 86 | roidb = get_training_roidb(imdb) 87 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 88 | imdb.append_flipped_images() 89 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 106, in append_flipped_images 90 | widths = self._get_widths() 91 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 102, in _get_widths 92 | for i in xrange(self.num_images)] 93 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 60, in image_path_at 94 | return self.image_path_from_index(self._image_index[i]) 95 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/pascal_voc.py", line 69, in image_path_from_index 96 | 'Path does not exist: {}'.format(image_path) 97 | AssertionError: Path does not exist: /home/alex/Faster-RCNN_TF/data/VOCdevkit2007/VOC2007/JPEGImages/.jpg 98 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-47-14: -------------------------------------------------------------------------------- 1 | + echo Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-47-14 2 | Logging output to experiments/logs/faster_rcnn_end2end_VGG16_.txt.2017-12-26_14-47-14 3 | + python ./tools/train_net.py --device gpu --device_id 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval --iters 1000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 4 | voc_2007_train 5 | voc_2007_val 6 | voc_2007_trainval 7 | voc_2007_test 8 | kitti_train 9 | kitti_val 10 | kitti_trainval 11 | kitti_test 12 | nthu_71 13 | nthu_370 14 | Called with args: 15 | Namespace(cfg_file='experiments/cfgs/faster_rcnn_end2end.yml', device='gpu', device_id=0, imdb_name='voc_2007_trainval', max_iters=1000, network_name='VGGnet_train', pretrained_model='data/pretrain_model/VGG_imagenet.npy', randomize=False, set_cfgs=None, solver=None) 16 | Using config: 17 | {'DATA_DIR': '/home/alex/Faster-RCNN_TF/data', 18 | 'DEDUP_BOXES': 0.0625, 19 | 'EPS': 1e-14, 20 | 'EXP_DIR': 'faster_rcnn_end2end', 21 | 'GPU_ID': 0, 22 | 'IS_MULTISCALE': False, 23 | 'MATLAB': 'matlab', 24 | 'MODELS_DIR': '/home/alex/Faster-RCNN_TF/models/pascal_voc', 25 | 'PIXEL_MEANS': array([[[ 102.9801, 115.9465, 122.7717]]]), 26 | 'RNG_SEED': 3, 27 | 'ROOT_DIR': '/home/alex/Faster-RCNN_TF', 28 | 'TEST': {'BBOX_REG': True, 29 | 'DEBUG_TIMELINE': False, 30 | 'HAS_RPN': True, 31 | 'MAX_SIZE': 1000, 32 | 'NMS': 0.3, 33 | 'PROPOSAL_METHOD': 'selective_search', 34 | 'RPN_MIN_SIZE': 16, 35 | 'RPN_NMS_THRESH': 0.7, 36 | 'RPN_POST_NMS_TOP_N': 300, 37 | 'RPN_PRE_NMS_TOP_N': 6000, 38 | 'SCALES': [600], 39 | 'SVM': False}, 40 | 'TRAIN': {'ASPECT_GROUPING': True, 41 | 'BATCH_SIZE': 128, 42 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 43 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 44 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 45 | 'BBOX_NORMALIZE_TARGETS': True, 46 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 47 | 'BBOX_REG': True, 48 | 'BBOX_THRESH': 0.5, 49 | 'BG_THRESH_HI': 0.5, 50 | 'BG_THRESH_LO': 0.0, 51 | 'DEBUG_TIMELINE': False, 52 | 'DISPLAY': 10, 53 | 'FG_FRACTION': 0.25, 54 | 'FG_THRESH': 0.5, 55 | 'GAMMA': 0.1, 56 | 'HAS_RPN': True, 57 | 'IMS_PER_BATCH': 1, 58 | 'LEARNING_RATE': 0.001, 59 | 'MAX_SIZE': 1000, 60 | 'MOMENTUM': 0.9, 61 | 'PROPOSAL_METHOD': 'gt', 62 | 'RPN_BATCHSIZE': 256, 63 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 64 | 'RPN_CLOBBER_POSITIVES': False, 65 | 'RPN_FG_FRACTION': 0.5, 66 | 'RPN_MIN_SIZE': 16, 67 | 'RPN_NEGATIVE_OVERLAP': 0.3, 68 | 'RPN_NMS_THRESH': 0.7, 69 | 'RPN_POSITIVE_OVERLAP': 0.7, 70 | 'RPN_POSITIVE_WEIGHT': -1.0, 71 | 'RPN_POST_NMS_TOP_N': 2000, 72 | 'RPN_PRE_NMS_TOP_N': 12000, 73 | 'SCALES': [600], 74 | 'SNAPSHOT_INFIX': '', 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_PREFIX': 'VGGnet_fast_rcnn', 77 | 'STEPSIZE': 50000, 78 | 'USE_FLIPPED': True, 79 | 'USE_PREFETCH': False}, 80 | 'USE_GPU_NMS': True} 81 | > 82 | Loaded dataset `voc_2007_trainval` for training 83 | Appending horizontally-flipped training examples... 84 | voc_2007_trainval gt roidb loaded from /home/alex/Faster-RCNN_TF/data/cache/voc_2007_trainval_gt_roidb.pkl 85 | Traceback (most recent call last): 86 | File "./tools/train_net.py", line 83, in 87 | roidb = get_training_roidb(imdb) 88 | File "/home/alex/Faster-RCNN_TF/tools/../lib/fast_rcnn/train.py", line 205, in get_training_roidb 89 | imdb.append_flipped_images() 90 | File "/home/alex/Faster-RCNN_TF/tools/../lib/datasets/imdb.py", line 113, in append_flipped_images 91 | assert (boxes[:, 2] >= boxes[:, 0]).all() 92 | AssertionError 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/experiments/scripts/faster_rcnn_end2end.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py] 4 | # DATASET is either pascal_voc or coco. 5 | # 6 | # Example: 7 | # ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \ 8 | # --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]" 9 | 10 | set -x 11 | set -e 12 | 13 | export PYTHONUNBUFFERED="True" 14 | 15 | DEV=$1 16 | DEV_ID=$2 17 | NET=$3 18 | DATASET=$4 19 | 20 | array=( $@ ) 21 | len=${#array[@]} 22 | EXTRA_ARGS=${array[@]:4:$len} 23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 24 | 25 | case $DATASET in 26 | pascal_voc) 27 | TRAIN_IMDB="voc_2007_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | PT_DIR="pascal_voc" 30 | ITERS=1000 31 | ;; 32 | coco) 33 | # This is a very long and slow training schedule 34 | # You can probably use fewer iterations and reduce the 35 | # time to the LR drop (set in the solver to 350,000 iterations). 36 | TRAIN_IMDB="coco_2014_train" 37 | TEST_IMDB="coco_2014_minival" 38 | PT_DIR="coco" 39 | ITERS=490000 40 | ;; 41 | *) 42 | echo "No dataset given" 43 | exit 44 | ;; 45 | esac 46 | 47 | LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 48 | exec &> >(tee -a "$LOG") 49 | echo Logging output to "$LOG" 50 | 51 | time python ./tools/train_net.py --device ${DEV} --device_id ${DEV_ID} \ 52 | --weights data/pretrain_model/VGG_imagenet.npy \ 53 | --imdb ${TRAIN_IMDB} \ 54 | --iters ${ITERS} \ 55 | --cfg experiments/cfgs/faster_rcnn_end2end.yml \ 56 | --network VGGnet_train \ 57 | ${EXTRA_ARGS} 58 | 59 | set +x 60 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'` 61 | set -x 62 | 63 | time python ./tools/test_net.py --device ${DEV} --device_id ${DEV_ID} \ 64 | --weights ${NET_FINAL} \ 65 | --imdb ${TEST_IMDB} \ 66 | --cfg experiments/cfgs/faster_rcnn_end2end.yml \ 67 | --network VGGnet_test \ 68 | ${EXTRA_ARGS} 69 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | bash make.sh 5 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .imdb import imdb 9 | from .pascal_voc import pascal_voc 10 | from .pascal3d import pascal3d 11 | from .imagenet3d import imagenet3d 12 | from .kitti import kitti 13 | from .kitti_tracking import kitti_tracking 14 | from .nissan import nissan 15 | from .nthu import nthu 16 | from . import factory 17 | 18 | import os.path as osp 19 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..') 20 | 21 | # We assume your matlab binary is in your path and called `matlab'. 22 | # If either is not true, just add it to your path and alias it as matlab, or 23 | # you could change this file. 24 | MATLAB = 'matlab_r2013b' 25 | 26 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 27 | def _which(program): 28 | import os 29 | def is_exe(fpath): 30 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 31 | 32 | fpath, fname = os.path.split(program) 33 | if fpath: 34 | if is_exe(program): 35 | return program 36 | else: 37 | for path in os.environ["PATH"].split(os.pathsep): 38 | path = path.strip('"') 39 | exe_file = os.path.join(path, program) 40 | if is_exe(exe_file): 41 | return exe_file 42 | 43 | return None 44 | """ 45 | if _which(MATLAB) is None: 46 | msg = ("MATLAB command '{}' not found. " 47 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB) 48 | raise EnvironmentError(msg) 49 | """ 50 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | def unique_boxes(boxes, scale=1.0): 10 | """Return indices of unique boxes.""" 11 | v = np.array([1, 1e3, 1e6, 1e9]) 12 | hashes = np.round(boxes * scale).dot(v) 13 | _, index = np.unique(hashes, return_index=True) 14 | return np.sort(index) 15 | 16 | def xywh_to_xyxy(boxes): 17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 19 | 20 | def xyxy_to_xywh(boxes): 21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 23 | 24 | def validate_boxes(boxes, width=0, height=0): 25 | """Check that a set of boxes are valid.""" 26 | x1 = boxes[:, 0] 27 | y1 = boxes[:, 1] 28 | x2 = boxes[:, 2] 29 | y2 = boxes[:, 3] 30 | assert (x1 >= 0).all() 31 | assert (y1 >= 0).all() 32 | assert (x2 >= x1).all() 33 | assert (y2 >= y1).all() 34 | assert (x2 < width).all() 35 | assert (y2 < height).all() 36 | 37 | def filter_small_boxes(boxes, min_size): 38 | w = boxes[:, 2] - boxes[:, 0] 39 | h = boxes[:, 3] - boxes[:, 1] 40 | keep = np.where((w >= min_size) & (h > min_size))[0] 41 | return keep 42 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/ds_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/ds_utils.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | import datasets.pascal_voc 13 | import datasets.imagenet3d 14 | import datasets.kitti 15 | import datasets.kitti_tracking 16 | import numpy as np 17 | 18 | def _selective_search_IJCV_top_k(split, year, top_k): 19 | """Return an imdb that uses the top k proposals from the selective search 20 | IJCV code. 21 | """ 22 | imdb = datasets.pascal_voc(split, year) 23 | imdb.roidb_handler = imdb.selective_search_IJCV_roidb 24 | imdb.config['top_k'] = top_k 25 | return imdb 26 | 27 | # Set up voc__ using selective search "fast" mode 28 | for year in ['2007', '2012']: 29 | for split in ['train', 'val', 'trainval', 'test']: 30 | name = 'voc_{}_{}'.format(year, split) 31 | __sets[name] = (lambda split=split, year=year: 32 | datasets.pascal_voc(split, year)) 33 | """ 34 | # Set up voc___top_ using selective search "quality" mode 35 | # but only returning the first k boxes 36 | for top_k in np.arange(1000, 11000, 1000): 37 | for year in ['2007', '2012']: 38 | for split in ['train', 'val', 'trainval', 'test']: 39 | name = 'voc_{}_{}_top_{:d}'.format(year, split, top_k) 40 | __sets[name] = (lambda split=split, year=year, top_k=top_k: 41 | _selective_search_IJCV_top_k(split, year, top_k)) 42 | """ 43 | 44 | # Set up voc__ using selective search "fast" mode 45 | for year in ['2007']: 46 | for split in ['train', 'val', 'trainval', 'test']: 47 | name = 'voc_{}_{}'.format(year, split) 48 | print name 49 | __sets[name] = (lambda split=split, year=year: 50 | datasets.pascal_voc(split, year)) 51 | 52 | # KITTI dataset 53 | for split in ['train', 'val', 'trainval', 'test']: 54 | name = 'kitti_{}'.format(split) 55 | print name 56 | __sets[name] = (lambda split=split: 57 | datasets.kitti(split)) 58 | 59 | # Set up coco_2014_ 60 | for year in ['2014']: 61 | for split in ['train', 'val', 'minival', 'valminusminival']: 62 | name = 'coco_{}_{}'.format(year, split) 63 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 64 | 65 | # Set up coco_2015_ 66 | for year in ['2015']: 67 | for split in ['test', 'test-dev']: 68 | name = 'coco_{}_{}'.format(year, split) 69 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 70 | 71 | # NTHU dataset 72 | for split in ['71', '370']: 73 | name = 'nthu_{}'.format(split) 74 | print name 75 | __sets[name] = (lambda split=split: 76 | datasets.nthu(split)) 77 | 78 | 79 | def get_imdb(name): 80 | """Get an imdb (image database) by name.""" 81 | if not __sets.has_key(name): 82 | raise KeyError('Unknown dataset: {}'.format(name)) 83 | return __sets[name]() 84 | 85 | def list_imdbs(): 86 | """List all registered imdbs.""" 87 | return __sets.keys() 88 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/factory.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/imagenet3d.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/imagenet3d.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/imdb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/imdb.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/kitti.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/kitti.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/kitti_tracking.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/kitti_tracking.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/nissan.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/nissan.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/nthu.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/nthu.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/pascal3d.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/pascal3d.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/pascal_voc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/pascal_voc.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/datasets/voc_eval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/datasets/voc_eval.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from . import config 9 | from . import train 10 | from . import test 11 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/bbox_transform.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/config.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | if cfg.USE_GPU_NMS: 10 | from nms.gpu_nms import gpu_nms 11 | from nms.cpu_nms import cpu_nms 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS and not force_cpu: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/nms_wrapper.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/test.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/fast_rcnn/train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/fast_rcnn/train.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/gt_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/gt_data_layer/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/gt_data_layer/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/gt_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | GtDataLayer implements a Caffe Python layer. 11 | """ 12 | 13 | import caffe 14 | from fast_rcnn.config import cfg 15 | from gt_data_layer.minibatch import get_minibatch 16 | import numpy as np 17 | import yaml 18 | from multiprocessing import Process, Queue 19 | 20 | class GtDataLayer(caffe.Layer): 21 | """Fast R-CNN data layer used for training.""" 22 | 23 | def _shuffle_roidb_inds(self): 24 | """Randomly permute the training roidb.""" 25 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 26 | self._cur = 0 27 | 28 | def _get_next_minibatch_inds(self): 29 | """Return the roidb indices for the next minibatch.""" 30 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 31 | self._shuffle_roidb_inds() 32 | 33 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 34 | self._cur += cfg.TRAIN.IMS_PER_BATCH 35 | 36 | """ 37 | # sample images with gt objects 38 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 39 | i = 0 40 | while (i < cfg.TRAIN.IMS_PER_BATCH): 41 | ind = self._perm[self._cur] 42 | num_objs = self._roidb[ind]['boxes'].shape[0] 43 | if num_objs != 0: 44 | db_inds[i] = ind 45 | i += 1 46 | 47 | self._cur += 1 48 | if self._cur >= len(self._roidb): 49 | self._shuffle_roidb_inds() 50 | """ 51 | 52 | return db_inds 53 | 54 | def _get_next_minibatch(self): 55 | """Return the blobs to be used for the next minibatch.""" 56 | db_inds = self._get_next_minibatch_inds() 57 | minibatch_db = [self._roidb[i] for i in db_inds] 58 | return get_minibatch(minibatch_db, self._num_classes) 59 | 60 | # this function is called in training the net 61 | def set_roidb(self, roidb): 62 | """Set the roidb to be used by this layer during training.""" 63 | self._roidb = roidb 64 | self._shuffle_roidb_inds() 65 | 66 | def setup(self, bottom, top): 67 | """Setup the GtDataLayer.""" 68 | 69 | # parse the layer parameter string, which must be valid YAML 70 | layer_params = yaml.load(self.param_str_) 71 | 72 | self._num_classes = layer_params['num_classes'] 73 | 74 | self._name_to_top_map = { 75 | 'data': 0, 76 | 'info_boxes': 1, 77 | 'parameters': 2} 78 | 79 | # data blob: holds a batch of N images, each with 3 channels 80 | # The height and width (100 x 100) are dummy values 81 | num_scale_base = len(cfg.TRAIN.SCALES_BASE) 82 | top[0].reshape(num_scale_base, 3, 100, 100) 83 | 84 | # info boxes blob 85 | top[1].reshape(1, 18) 86 | 87 | # parameters blob 88 | num_scale = len(cfg.TRAIN.SCALES) 89 | num_aspect = len(cfg.TRAIN.ASPECTS) 90 | top[2].reshape(2 + 2*num_scale + 2*num_aspect) 91 | 92 | def forward(self, bottom, top): 93 | """Get blobs and copy them into this layer's top blob vector.""" 94 | blobs = self._get_next_minibatch() 95 | 96 | for blob_name, blob in blobs.iteritems(): 97 | top_ind = self._name_to_top_map[blob_name] 98 | # Reshape net's input blobs 99 | top[top_ind].reshape(*(blob.shape)) 100 | # Copy data into net's input blobs 101 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 102 | 103 | def backward(self, top, propagate_down, bottom): 104 | """This layer does not propagate gradients.""" 105 | pass 106 | 107 | def reshape(self, bottom, top): 108 | """Reshaping happens during the call to forward.""" 109 | pass 110 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/gt_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | 10 | import numpy as np 11 | import numpy.random as npr 12 | import cv2 13 | from fast_rcnn.config import cfg 14 | from utils.blob import prep_im_for_blob, im_list_to_blob 15 | 16 | def get_minibatch(roidb, num_classes): 17 | """Given a roidb, construct a minibatch sampled from it.""" 18 | num_images = len(roidb) 19 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 20 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 21 | format(num_images, cfg.TRAIN.BATCH_SIZE) 22 | 23 | # Get the input image blob, formatted for caffe 24 | im_blob = _get_image_blob(roidb) 25 | 26 | # build the box information blob 27 | info_boxes_blob = np.zeros((0, 18), dtype=np.float32) 28 | num_scale = len(cfg.TRAIN.SCALES) 29 | for i in xrange(num_images): 30 | info_boxes = roidb[i]['info_boxes'] 31 | 32 | # change the batch index 33 | info_boxes[:,2] += i * num_scale 34 | info_boxes[:,7] += i * num_scale 35 | 36 | info_boxes_blob = np.vstack((info_boxes_blob, info_boxes)) 37 | 38 | # build the parameter blob 39 | num_aspect = len(cfg.TRAIN.ASPECTS) 40 | num = 2 + 2 * num_scale + 2 * num_aspect 41 | parameters_blob = np.zeros((num), dtype=np.float32) 42 | parameters_blob[0] = num_scale 43 | parameters_blob[1] = num_aspect 44 | parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES 45 | parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING 46 | parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS 47 | parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS 48 | 49 | # For debug visualizations 50 | # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob) 51 | 52 | blobs = {'data': im_blob, 53 | 'info_boxes': info_boxes_blob, 54 | 'parameters': parameters_blob} 55 | 56 | return blobs 57 | 58 | def _get_image_blob(roidb): 59 | """Builds an input blob from the images in the roidb at the different scales. 60 | """ 61 | num_images = len(roidb) 62 | processed_ims = [] 63 | 64 | for i in xrange(num_images): 65 | # read image 66 | im = cv2.imread(roidb[i]['image']) 67 | if roidb[i]['flipped']: 68 | im = im[:, ::-1, :] 69 | 70 | im_orig = im.astype(np.float32, copy=True) 71 | im_orig -= cfg.PIXEL_MEANS 72 | 73 | # build image pyramid 74 | for im_scale in cfg.TRAIN.SCALES_BASE: 75 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 76 | interpolation=cv2.INTER_LINEAR) 77 | 78 | processed_ims.append(im) 79 | 80 | # Create a blob to hold the input images 81 | blob = im_list_to_blob(processed_ims) 82 | 83 | return blob 84 | 85 | def _project_im_rois(im_rois, im_scale_factor): 86 | """Project image RoIs into the rescaled training image.""" 87 | rois = im_rois * im_scale_factor 88 | return rois 89 | 90 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 91 | """Bounding-box regression targets are stored in a compact form in the 92 | roidb. 93 | 94 | This function expands those targets into the 4-of-4*K representation used 95 | by the network (i.e. only one class has non-zero targets). The loss weights 96 | are similarly expanded. 97 | 98 | Returns: 99 | bbox_target_data (ndarray): N x 4K blob of regression targets 100 | bbox_loss_weights (ndarray): N x 4K blob of loss weights 101 | """ 102 | clss = bbox_target_data[:, 0] 103 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 104 | bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 105 | inds = np.where(clss > 0)[0] 106 | for ind in inds: 107 | cls = clss[ind] 108 | start = 4 * cls 109 | end = start + 4 110 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 111 | bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] 112 | return bbox_targets, bbox_loss_weights 113 | 114 | 115 | def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): 116 | """Visualize a mini-batch for debugging.""" 117 | import matplotlib.pyplot as plt 118 | for i in xrange(rois_blob.shape[0]): 119 | rois = rois_blob[i, :] 120 | im_ind = rois[0] 121 | roi = rois[2:] 122 | im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() 123 | im += cfg.PIXEL_MEANS 124 | im = im[:, :, (2, 1, 0)] 125 | im = im.astype(np.uint8) 126 | cls = labels_blob[i] 127 | subcls = sublabels_blob[i] 128 | plt.imshow(im) 129 | print 'class: ', cls, ' subclass: ', subcls 130 | plt.gca().add_patch( 131 | plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], 132 | roi[3] - roi[1], fill=False, 133 | edgecolor='r', linewidth=3) 134 | ) 135 | plt.show() 136 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/gt_data_layer/roidb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/gt_data_layer/roidb.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | CXXFLAGS='' 5 | 6 | if [[ "$OSTYPE" =~ ^darwin ]]; then 7 | CXXFLAGS+='-undefined dynamic_lookup' 8 | fi 9 | 10 | cd roi_pooling_layer 11 | 12 | if [ -d "$CUDA_PATH" ]; then 13 | nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \ 14 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CXXFLAGS \ 15 | -arch=sm_37 16 | 17 | g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 18 | roi_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \ 19 | -lcudart -L $CUDA_PATH/lib64 \ 20 | -D_GLIBCXX_USE_CXX11_ABI=0 21 | else 22 | g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 23 | -I $TF_INC -fPIC $CXXFLAGS 24 | fi 25 | 26 | cd .. 27 | 28 | #cd feature_extrapolating_layer 29 | 30 | #nvcc -std=c++11 -c -o feature_extrapolating_op.cu.o feature_extrapolating_op_gpu.cu.cc \ 31 | # -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_50 32 | 33 | #g++ -std=c++11 -shared -o feature_extrapolating.so feature_extrapolating_op.cc \ 34 | # feature_extrapolating_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 35 | #cd .. 36 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/.VGGnet.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/.VGGnet.py.swo -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/VGGnet_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from networks.network import Network 3 | 4 | n_classes = 2 5 | _feat_stride = [16,] 6 | anchor_scales = [8, 16, 32] 7 | 8 | class VGGnet_test(Network): 9 | def __init__(self, trainable=True): 10 | self.inputs = [] 11 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 12 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) 13 | self.keep_prob = tf.placeholder(tf.float32) 14 | self.layers = dict({'data':self.data, 'im_info':self.im_info}) 15 | self.trainable = trainable 16 | self.setup() 17 | 18 | def setup(self): 19 | (self.feed('data') 20 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 21 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 22 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 23 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 24 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 25 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 26 | .conv(3, 3, 256, 1, 1, name='conv3_1') 27 | .conv(3, 3, 256, 1, 1, name='conv3_2') 28 | .conv(3, 3, 256, 1, 1, name='conv3_3') 29 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 30 | .conv(3, 3, 512, 1, 1, name='conv4_1') 31 | .conv(3, 3, 512, 1, 1, name='conv4_2') 32 | .conv(3, 3, 512, 1, 1, name='conv4_3') 33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 34 | .conv(3, 3, 512, 1, 1, name='conv5_1') 35 | .conv(3, 3, 512, 1, 1, name='conv5_2') 36 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 37 | 38 | (self.feed('conv5_3') 39 | .conv(3,3,512,1,1,name='rpn_conv/3x3') 40 | .conv(1,1,len(anchor_scales)*3*2,1,1,padding='VALID',relu = False,name='rpn_cls_score')) 41 | 42 | (self.feed('rpn_conv/3x3') 43 | .conv(1,1,len(anchor_scales)*3*4,1,1,padding='VALID',relu = False,name='rpn_bbox_pred')) 44 | 45 | (self.feed('rpn_cls_score') 46 | .reshape_layer(2,name = 'rpn_cls_score_reshape') 47 | .softmax(name='rpn_cls_prob')) 48 | 49 | (self.feed('rpn_cls_prob') 50 | .reshape_layer(len(anchor_scales)*3*2,name = 'rpn_cls_prob_reshape')) 51 | 52 | (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info') 53 | .proposal_layer(_feat_stride, anchor_scales, 'TEST', name = 'rois')) 54 | 55 | (self.feed('conv5_3', 'rois') 56 | .roi_pool(7, 7, 1.0/16, name='pool_5') 57 | .fc(4096, name='fc6') 58 | .fc(4096, name='fc7') 59 | .fc(n_classes, relu=False, name='cls_score') 60 | .softmax(name='cls_prob')) 61 | 62 | (self.feed('fc7') 63 | .fc(n_classes*4, relu=False, name='bbox_pred')) 64 | 65 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/VGGnet_test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/VGGnet_test.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/VGGnet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from networks.network import Network 3 | 4 | 5 | #define 6 | 7 | n_classes = 2 8 | _feat_stride = [16,] 9 | anchor_scales = [8, 16, 32] 10 | 11 | class VGGnet_train(Network): 12 | def __init__(self, trainable=True): 13 | self.inputs = [] 14 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 15 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) 16 | self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) 17 | self.keep_prob = tf.placeholder(tf.float32) 18 | self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes}) 19 | self.trainable = trainable 20 | self.setup() 21 | 22 | # create ops and placeholders for bbox normalization process 23 | with tf.variable_scope('bbox_pred', reuse=True): 24 | weights = tf.get_variable("weights") 25 | biases = tf.get_variable("biases") 26 | 27 | self.bbox_weights = tf.placeholder(weights.dtype, shape=weights.get_shape()) 28 | self.bbox_biases = tf.placeholder(biases.dtype, shape=biases.get_shape()) 29 | 30 | self.bbox_weights_assign = weights.assign(self.bbox_weights) 31 | self.bbox_bias_assign = biases.assign(self.bbox_biases) 32 | 33 | def setup(self): 34 | (self.feed('data') 35 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 36 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 38 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 39 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 40 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 41 | .conv(3, 3, 256, 1, 1, name='conv3_1') 42 | .conv(3, 3, 256, 1, 1, name='conv3_2') 43 | .conv(3, 3, 256, 1, 1, name='conv3_3') 44 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 45 | .conv(3, 3, 512, 1, 1, name='conv4_1') 46 | .conv(3, 3, 512, 1, 1, name='conv4_2') 47 | .conv(3, 3, 512, 1, 1, name='conv4_3') 48 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 49 | .conv(3, 3, 512, 1, 1, name='conv5_1') 50 | .conv(3, 3, 512, 1, 1, name='conv5_2') 51 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 52 | #========= RPN ============ 53 | (self.feed('conv5_3') 54 | .conv(3,3,512,1,1,name='rpn_conv/3x3') 55 | .conv(1,1,len(anchor_scales)*3*2 ,1 , 1, padding='VALID', relu = False, name='rpn_cls_score')) 56 | 57 | (self.feed('rpn_cls_score','gt_boxes','im_info','data') 58 | .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' )) 59 | 60 | # Loss of rpn_cls & rpn_boxes 61 | 62 | (self.feed('rpn_conv/3x3') 63 | .conv(1,1,len(anchor_scales)*3*4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred')) 64 | 65 | #========= RoI Proposal ============ 66 | (self.feed('rpn_cls_score') 67 | .reshape_layer(2,name = 'rpn_cls_score_reshape') 68 | .softmax(name='rpn_cls_prob')) 69 | 70 | (self.feed('rpn_cls_prob') 71 | .reshape_layer(len(anchor_scales)*3*2,name = 'rpn_cls_prob_reshape')) 72 | 73 | (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info') 74 | .proposal_layer(_feat_stride, anchor_scales, 'TRAIN',name = 'rpn_rois')) 75 | 76 | (self.feed('rpn_rois','gt_boxes') 77 | .proposal_target_layer(n_classes,name = 'roi-data')) 78 | 79 | 80 | #========= RCNN ============ 81 | (self.feed('conv5_3', 'roi-data') 82 | .roi_pool(7, 7, 1.0/16, name='pool_5') 83 | .fc(4096, name='fc6') 84 | .dropout(0.5, name='drop6') 85 | .fc(4096, name='fc7') 86 | .dropout(0.5, name='drop7') 87 | .fc(n_classes, relu=False, name='cls_score') 88 | .softmax(name='cls_prob')) 89 | 90 | (self.feed('drop7') 91 | .fc(n_classes*4, relu=False, name='bbox_pred')) 92 | 93 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/VGGnet_train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/VGGnet_train.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .VGGnet_train import VGGnet_train 9 | from .VGGnet_test import VGGnet_test 10 | from . import factory 11 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SubCNN_TF 3 | # Copyright (c) 2016 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | import networks.VGGnet_train 13 | import networks.VGGnet_test 14 | import pdb 15 | import tensorflow as tf 16 | 17 | #__sets['VGGnet_train'] = networks.VGGnet_train() 18 | 19 | #__sets['VGGnet_test'] = networks.VGGnet_test() 20 | 21 | 22 | def get_network(name): 23 | """Get a network by name.""" 24 | #if not __sets.has_key(name): 25 | # raise KeyError('Unknown dataset: {}'.format(name)) 26 | #return __sets[name] 27 | if name.split('_')[1] == 'test': 28 | return networks.VGGnet_test() 29 | elif name.split('_')[1] == 'train': 30 | return networks.VGGnet_train() 31 | else: 32 | raise KeyError('Unknown dataset: {}'.format(name)) 33 | 34 | 35 | def list_networks(): 36 | """List all registered imdbs.""" 37 | return __sets.keys() 38 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/factory.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/networks/network.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/networks/network.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/nms/__init__.py -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/nms/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_data_layer/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | 13 | from fast_rcnn.config import cfg 14 | from roi_data_layer.minibatch import get_minibatch 15 | import numpy as np 16 | 17 | class RoIDataLayer(object): 18 | """Fast R-CNN data layer used for training.""" 19 | 20 | def __init__(self, roidb, num_classes): 21 | """Set the roidb to be used by this layer during training.""" 22 | self._roidb = roidb 23 | self._num_classes = num_classes 24 | self._shuffle_roidb_inds() 25 | 26 | def _shuffle_roidb_inds(self): 27 | """Randomly permute the training roidb.""" 28 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 29 | self._cur = 0 30 | 31 | def _get_next_minibatch_inds(self): 32 | """Return the roidb indices for the next minibatch.""" 33 | 34 | if cfg.TRAIN.HAS_RPN: 35 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 36 | self._shuffle_roidb_inds() 37 | 38 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 39 | self._cur += cfg.TRAIN.IMS_PER_BATCH 40 | else: 41 | # sample images 42 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 43 | i = 0 44 | while (i < cfg.TRAIN.IMS_PER_BATCH): 45 | ind = self._perm[self._cur] 46 | num_objs = self._roidb[ind]['boxes'].shape[0] 47 | if num_objs != 0: 48 | db_inds[i] = ind 49 | i += 1 50 | 51 | self._cur += 1 52 | if self._cur >= len(self._roidb): 53 | self._shuffle_roidb_inds() 54 | 55 | return db_inds 56 | 57 | def _get_next_minibatch(self): 58 | """Return the blobs to be used for the next minibatch. 59 | 60 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 61 | separate process and made available through self._blob_queue. 62 | """ 63 | db_inds = self._get_next_minibatch_inds() 64 | minibatch_db = [self._roidb[i] for i in db_inds] 65 | return get_minibatch(minibatch_db, self._num_classes) 66 | 67 | def forward(self): 68 | """Get blobs and copy them into this layer's top blob vector.""" 69 | blobs = self._get_next_minibatch() 70 | return blobs 71 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_data_layer/layer.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/minibatch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_data_layer/minibatch.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | from fast_rcnn.config import cfg 12 | from fast_rcnn.bbox_transform import bbox_transform 13 | from utils.cython_bbox import bbox_overlaps 14 | import PIL 15 | 16 | def prepare_roidb(imdb): 17 | """Enrich the imdb's roidb by adding some derived quantities that 18 | are useful for training. This function precomputes the maximum 19 | overlap, taken over ground-truth boxes, between each ROI and 20 | each ground-truth box. The class with maximum overlap is also 21 | recorded. 22 | """ 23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 24 | for i in xrange(imdb.num_images)] 25 | roidb = imdb.roidb 26 | for i in xrange(len(imdb.image_index)): 27 | roidb[i]['image'] = imdb.image_path_at(i) 28 | roidb[i]['width'] = sizes[i][0] 29 | roidb[i]['height'] = sizes[i][1] 30 | # need gt_overlaps as a dense array for argmax 31 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 32 | # max overlap with gt over classes (columns) 33 | max_overlaps = gt_overlaps.max(axis=1) 34 | # gt class that had the max overlap 35 | max_classes = gt_overlaps.argmax(axis=1) 36 | roidb[i]['max_classes'] = max_classes 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | # sanity checks 39 | # max overlap of 0 => class should be zero (background) 40 | zero_inds = np.where(max_overlaps == 0)[0] 41 | assert all(max_classes[zero_inds] == 0) 42 | # max overlap > 0 => class should not be zero (must be a fg class) 43 | nonzero_inds = np.where(max_overlaps > 0)[0] 44 | assert all(max_classes[nonzero_inds] != 0) 45 | 46 | def add_bbox_regression_targets(roidb): 47 | """Add information needed to train bounding-box regressors.""" 48 | assert len(roidb) > 0 49 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 50 | 51 | num_images = len(roidb) 52 | # Infer number of classes from the number of columns in gt_overlaps 53 | num_classes = roidb[0]['gt_overlaps'].shape[1] 54 | for im_i in xrange(num_images): 55 | rois = roidb[im_i]['boxes'] 56 | max_overlaps = roidb[im_i]['max_overlaps'] 57 | max_classes = roidb[im_i]['max_classes'] 58 | roidb[im_i]['bbox_targets'] = \ 59 | _compute_targets(rois, max_overlaps, max_classes) 60 | 61 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 62 | # Use fixed / precomputed "means" and "stds" instead of empirical values 63 | means = np.tile( 64 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 65 | stds = np.tile( 66 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 67 | else: 68 | # Compute values needed for means and stds 69 | # var(x) = E(x^2) - E(x)^2 70 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 71 | sums = np.zeros((num_classes, 4)) 72 | squared_sums = np.zeros((num_classes, 4)) 73 | for im_i in xrange(num_images): 74 | targets = roidb[im_i]['bbox_targets'] 75 | for cls in xrange(1, num_classes): 76 | cls_inds = np.where(targets[:, 0] == cls)[0] 77 | if cls_inds.size > 0: 78 | class_counts[cls] += cls_inds.size 79 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 80 | squared_sums[cls, :] += \ 81 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 82 | 83 | means = sums / class_counts 84 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 85 | 86 | print 'bbox target means:' 87 | print means 88 | print means[1:, :].mean(axis=0) # ignore bg class 89 | print 'bbox target stdevs:' 90 | print stds 91 | print stds[1:, :].mean(axis=0) # ignore bg class 92 | 93 | # Normalize targets 94 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 95 | print "Normalizing targets" 96 | for im_i in xrange(num_images): 97 | targets = roidb[im_i]['bbox_targets'] 98 | for cls in xrange(1, num_classes): 99 | cls_inds = np.where(targets[:, 0] == cls)[0] 100 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 101 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 102 | else: 103 | print "NOT normalizing targets" 104 | 105 | # These values will be needed for making predictions 106 | # (the predicts will need to be unnormalized and uncentered) 107 | return means.ravel(), stds.ravel() 108 | 109 | def _compute_targets(rois, overlaps, labels): 110 | """Compute bounding-box regression targets for an image.""" 111 | # Indices of ground-truth ROIs 112 | gt_inds = np.where(overlaps == 1)[0] 113 | if len(gt_inds) == 0: 114 | # Bail if the image has no ground-truth ROIs 115 | return np.zeros((rois.shape[0], 5), dtype=np.float32) 116 | # Indices of examples for which we try to make predictions 117 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 118 | 119 | # Get IoU overlap between each ex ROI and gt ROI 120 | ex_gt_overlaps = bbox_overlaps( 121 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 122 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 123 | 124 | # Find which gt ROI each ex ROI has max overlap with: 125 | # this will be the ex ROI's gt target 126 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 127 | gt_rois = rois[gt_inds[gt_assignment], :] 128 | ex_rois = rois[ex_inds, :] 129 | 130 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 131 | targets[ex_inds, 0] = labels[ex_inds] 132 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 133 | return targets 134 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/roidb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_data_layer/roidb.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_data_layer/roidb2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | from fast_rcnn.config import cfg 12 | import utils.cython_bbox 13 | 14 | def prepare_roidb(imdb): 15 | """Enrich the imdb's roidb by adding some derived quantities that 16 | are useful for training. This function precomputes the maximum 17 | overlap, taken over ground-truth boxes, between each ROI and 18 | each ground-truth box. The class with maximum overlap is also 19 | recorded. 20 | """ 21 | roidb = imdb.roidb 22 | for i in xrange(len(imdb.image_index)): 23 | roidb[i]['image'] = imdb.image_path_at(i) 24 | # need gt_overlaps as a dense array for argmax 25 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 26 | # max overlap with gt over classes (columns) 27 | max_overlaps = gt_overlaps.max(axis=1) 28 | # gt class that had the max overlap 29 | max_classes = gt_overlaps.argmax(axis=1) 30 | 31 | roidb[i]['max_classes'] = max_classes 32 | roidb[i]['max_overlaps'] = max_overlaps 33 | 34 | # sanity checks 35 | # max overlap of 0 => class should be zero (background) 36 | zero_inds = np.where(max_overlaps == 0)[0] 37 | assert all(max_classes[zero_inds] == 0) 38 | # max overlap > 0 => class should not be zero (must be a fg class) 39 | nonzero_inds = np.where(max_overlaps > 0)[0] 40 | assert all(max_classes[nonzero_inds] != 0) 41 | 42 | def add_bbox_regression_targets(roidb): 43 | """Add information needed to train bounding-box regressors.""" 44 | assert len(roidb) > 0 45 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 46 | 47 | num_images = len(roidb) 48 | # Infer number of classes from the number of columns in gt_overlaps 49 | num_classes = roidb[0]['gt_overlaps'].shape[1] 50 | for im_i in xrange(num_images): 51 | rois = roidb[im_i]['boxes'] 52 | max_overlaps = roidb[im_i]['max_overlaps'] 53 | max_classes = roidb[im_i]['max_classes'] 54 | roidb[im_i]['bbox_targets'] = \ 55 | _compute_targets(rois, max_overlaps, max_classes, num_classes) 56 | 57 | # Compute values needed for means and stds 58 | # var(x) = E(x^2) - E(x)^2 59 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 60 | sums = np.zeros((num_classes, 4)) 61 | squared_sums = np.zeros((num_classes, 4)) 62 | for im_i in xrange(num_images): 63 | targets = roidb[im_i]['bbox_targets'] 64 | for cls in xrange(1, num_classes): 65 | cls_inds = np.where(targets[:, 0] == cls)[0] 66 | if cls_inds.size > 0: 67 | class_counts[cls] += cls_inds.size 68 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 69 | squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) 70 | 71 | means = sums / class_counts 72 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 73 | 74 | # Normalize targets 75 | for im_i in xrange(num_images): 76 | targets = roidb[im_i]['bbox_targets'] 77 | for cls in xrange(1, num_classes): 78 | cls_inds = np.where(targets[:, 0] == cls)[0] 79 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 80 | if stds[cls, 0] != 0: 81 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 82 | 83 | # These values will be needed for making predictions 84 | # (the predicts will need to be unnormalized and uncentered) 85 | return means.ravel(), stds.ravel() 86 | 87 | def _compute_targets(rois, overlaps, labels, num_classes): 88 | """Compute bounding-box regression targets for an image.""" 89 | # Ensure ROIs are floats 90 | rois = rois.astype(np.float, copy=False) 91 | 92 | # Indices of ground-truth ROIs 93 | gt_inds = np.where(overlaps == 1)[0] 94 | # Indices of examples for which we try to make predictions 95 | ex_inds = [] 96 | for i in xrange(1, num_classes): 97 | ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] ) 98 | 99 | # Get IoU overlap between each ex ROI and gt ROI 100 | ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :], 101 | rois[gt_inds, :]) 102 | 103 | # Find which gt ROI each ex ROI has max overlap with: 104 | # this will be the ex ROI's gt target 105 | if ex_gt_overlaps.shape[0] != 0: 106 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 107 | else: 108 | gt_assignment = [] 109 | gt_rois = rois[gt_inds[gt_assignment], :] 110 | ex_rois = rois[ex_inds, :] 111 | 112 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS 113 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS 114 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 115 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 116 | 117 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS 118 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS 119 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 120 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 121 | 122 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 123 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 124 | targets_dw = np.log(gt_widths / ex_widths) 125 | targets_dh = np.log(gt_heights / ex_heights) 126 | 127 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 128 | targets[ex_inds, 0] = labels[ex_inds] 129 | targets[ex_inds, 1] = targets_dx 130 | targets[ex_inds, 2] = targets_dy 131 | targets[ex_inds, 3] = targets_dw 132 | targets[ex_inds, 4] = targets_dh 133 | return targets 134 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_pooling_layer/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling.so -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op.cu.o -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so') 5 | _roi_pooling_module = tf.load_op_library(filename) 6 | roi_pool = _roi_pooling_module.roi_pool 7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad 8 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool ROIPoolForwardLaucher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 20 | const int width, const int channels, const int pooled_height, 21 | const int pooled_width, const float* bottom_rois, 22 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d); 23 | 24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int pooled_height, 26 | const int pooled_width, const float* bottom_rois, 27 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); 28 | 29 | } // namespace tensorflow 30 | 31 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 32 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import roi_pooling_op 4 | import pdb 5 | 6 | 7 | @ops.RegisterShape("RoiPool") 8 | def _roi_pool_shape(op): 9 | """Shape function for the RoiPool op. 10 | 11 | """ 12 | dims_data = op.inputs[0].get_shape().as_list() 13 | channels = dims_data[3] 14 | dims_rois = op.inputs[1].get_shape().as_list() 15 | num_rois = dims_rois[0] 16 | 17 | pooled_height = op.get_attr('pooled_height') 18 | pooled_width = op.get_attr('pooled_width') 19 | 20 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, channels]) 21 | return [output_shape, output_shape] 22 | 23 | @ops.RegisterGradient("RoiPool") 24 | def _roi_pool_grad(op, grad, _): 25 | """The gradients for `roi_pool`. 26 | Args: 27 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 28 | to find the inputs and outputs of the original op. 29 | grad: Gradient with respect to the output of the `roi_pool` op. 30 | Returns: 31 | Gradients with respect to the input of `zero_out`. 32 | """ 33 | data = op.inputs[0] 34 | rois = op.inputs[1] 35 | argmax = op.outputs[1] 36 | pooled_height = op.get_attr('pooled_height') 37 | pooled_width = op.get_attr('pooled_width') 38 | spatial_scale = op.get_attr('spatial_scale') 39 | 40 | # compute gradient 41 | data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 42 | 43 | return [data_grad, None] # List of one Tensor, since we have one input 44 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op_grad.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op_grad.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/roi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import roi_pooling_op 4 | import roi_pooling_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | array = np.random.rand(32, 100, 100, 3) 17 | data = tf.convert_to_tensor(array, dtype=tf.float32) 18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32) 19 | 20 | W = weight_variable([3, 3, 3, 1]) 21 | h = conv2d(data, W) 22 | 23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3) 24 | pdb.set_trace() 25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32) 26 | print y_data, y, argmax 27 | 28 | # Minimize the mean squared errors. 29 | loss = tf.reduce_mean(tf.square(y - y_data)) 30 | optimizer = tf.train.GradientDescentOptimizer(0.5) 31 | train = optimizer.minimize(loss) 32 | 33 | init = tf.initialize_all_variables() 34 | 35 | # Launch the graph. 36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 37 | sess.run(init) 38 | pdb.set_trace() 39 | for step in xrange(10): 40 | sess.run(train) 41 | print(step, sess.run(W)) 42 | print(sess.run(y)) 43 | 44 | #with tf.device('/gpu:0'): 45 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 46 | # print result.eval() 47 | #with tf.device('/cpu:0'): 48 | # run(init) 49 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/roi_pooling_layer/work_sharder.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #ifndef TENSORFLOW_UTIL_WORK_SHARDER_H_ 17 | #define TENSORFLOW_UTIL_WORK_SHARDER_H_ 18 | 19 | #include 20 | 21 | #include "tensorflow/core/lib/core/threadpool.h" 22 | #include "tensorflow/core/platform/types.h" 23 | 24 | namespace tensorflow { 25 | 26 | // Shards the "total" unit of work assuming each unit of work having 27 | // roughly "cost_per_unit". Each unit of work is indexed 0, 1, ..., 28 | // total - 1. Each shard contains 1 or more units of work and the 29 | // total cost of each shard is roughly the same. The calling thread and the 30 | // "workers" are used to compute each shard (calling work(start, 31 | // limit). A common configuration is that "workers" is a thread pool 32 | // with at least "max_parallelism" threads. 33 | // 34 | // "cost_per_unit" is an estimate of the number of CPU cycles (or nanoseconds 35 | // if not CPU-bound) to complete a unit of work. Overestimating creates too 36 | // many shards and CPU time will be dominated by per-shard overhead, such as 37 | // Context creation. Underestimating may not fully make use of the specified 38 | // parallelism. 39 | // 40 | // "work" should be a callable taking (int64, int64) arguments. 41 | // work(start, limit) computes the work units from [start, 42 | // limit), i.e., [start, limit) is a shard. 43 | // 44 | // REQUIRES: max_parallelism >= 0 45 | // REQUIRES: workers != nullptr 46 | // REQUIRES: total >= 0 47 | // REQUIRES: cost_per_unit >= 0 48 | void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total, 49 | int64 cost_per_unit, std::function work); 50 | 51 | } // end namespace tensorflow 52 | 53 | #endif // TENSORFLOW_UTIL_WORK_SHARDER_H_ 54 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/anchor_target_layer_tf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/anchor_target_layer_tf.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/generate.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | from utils.blob import im_list_to_blob 10 | from utils.timer import Timer 11 | import numpy as np 12 | import cv2 13 | 14 | def _vis_proposals(im, dets, thresh=0.5): 15 | """Draw detected bounding boxes.""" 16 | inds = np.where(dets[:, -1] >= thresh)[0] 17 | if len(inds) == 0: 18 | return 19 | 20 | class_name = 'obj' 21 | im = im[:, :, (2, 1, 0)] 22 | fig, ax = plt.subplots(figsize=(12, 12)) 23 | ax.imshow(im, aspect='equal') 24 | for i in inds: 25 | bbox = dets[i, :4] 26 | score = dets[i, -1] 27 | 28 | ax.add_patch( 29 | plt.Rectangle((bbox[0], bbox[1]), 30 | bbox[2] - bbox[0], 31 | bbox[3] - bbox[1], fill=False, 32 | edgecolor='red', linewidth=3.5) 33 | ) 34 | ax.text(bbox[0], bbox[1] - 2, 35 | '{:s} {:.3f}'.format(class_name, score), 36 | bbox=dict(facecolor='blue', alpha=0.5), 37 | fontsize=14, color='white') 38 | 39 | ax.set_title(('{} detections with ' 40 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 41 | thresh), 42 | fontsize=14) 43 | plt.axis('off') 44 | plt.tight_layout() 45 | plt.draw() 46 | 47 | def _get_image_blob(im): 48 | """Converts an image into a network input. 49 | 50 | Arguments: 51 | im (ndarray): a color image in BGR order 52 | 53 | Returns: 54 | blob (ndarray): a data blob holding an image pyramid 55 | im_scale_factors (list): list of image scales (relative to im) used 56 | in the image pyramid 57 | """ 58 | im_orig = im.astype(np.float32, copy=True) 59 | im_orig -= cfg.PIXEL_MEANS 60 | 61 | processed_ims = [] 62 | 63 | assert len(cfg.TEST.SCALES_BASE) == 1 64 | im_scale = cfg.TRAIN.SCALES_BASE[0] 65 | 66 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 67 | interpolation=cv2.INTER_LINEAR) 68 | im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] 69 | processed_ims.append(im) 70 | 71 | # Create a blob to hold the input images 72 | blob = im_list_to_blob(processed_ims) 73 | 74 | return blob, im_info 75 | 76 | def im_proposals(net, im): 77 | """Generate RPN proposals on a single image.""" 78 | blobs = {} 79 | blobs['data'], blobs['im_info'] = _get_image_blob(im) 80 | net.blobs['data'].reshape(*(blobs['data'].shape)) 81 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 82 | blobs_out = net.forward( 83 | data=blobs['data'].astype(np.float32, copy=False), 84 | im_info=blobs['im_info'].astype(np.float32, copy=False)) 85 | 86 | scale = blobs['im_info'][0, 2] 87 | boxes = blobs_out['rois'][:, 1:].copy() / scale 88 | scores = blobs_out['scores'].copy() 89 | return boxes, scores 90 | 91 | def imdb_proposals(net, imdb): 92 | """Generate RPN proposals on all images in an imdb.""" 93 | 94 | _t = Timer() 95 | imdb_boxes = [[] for _ in xrange(imdb.num_images)] 96 | for i in xrange(imdb.num_images): 97 | im = cv2.imread(imdb.image_path_at(i)) 98 | _t.tic() 99 | imdb_boxes[i], scores = im_proposals(net, im) 100 | _t.toc() 101 | print 'im_proposals: {:d}/{:d} {:.3f}s' \ 102 | .format(i + 1, imdb.num_images, _t.average_time) 103 | if 0: 104 | dets = np.hstack((imdb_boxes[i], scores)) 105 | # from IPython import embed; embed() 106 | _vis_proposals(im, dets[:3, :], thresh=0.9) 107 | plt.show() 108 | 109 | return imdb_boxes 110 | 111 | def imdb_proposals_det(net, imdb): 112 | """Generate RPN proposals on all images in an imdb.""" 113 | 114 | _t = Timer() 115 | imdb_boxes = [[] for _ in xrange(imdb.num_images)] 116 | for i in xrange(imdb.num_images): 117 | im = cv2.imread(imdb.image_path_at(i)) 118 | _t.tic() 119 | boxes, scores = im_proposals(net, im) 120 | _t.toc() 121 | print 'im_proposals: {:d}/{:d} {:.3f}s' \ 122 | .format(i + 1, imdb.num_images, _t.average_time) 123 | dets = np.hstack((boxes, scores)) 124 | imdb_boxes[i] = dets 125 | 126 | if 0: 127 | # from IPython import embed; embed() 128 | _vis_proposals(im, dets[:3, :], thresh=0.9) 129 | plt.show() 130 | 131 | return imdb_boxes 132 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/generate.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/generate.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | """ 40 | Generate anchor (reference) windows by enumerating aspect ratios X 41 | scales wrt a reference (0, 0, 15, 15) window. 42 | """ 43 | 44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 45 | ratio_anchors = _ratio_enum(base_anchor, ratios) 46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 47 | for i in xrange(ratio_anchors.shape[0])]) 48 | return anchors 49 | 50 | def _whctrs(anchor): 51 | """ 52 | Return width, height, x center, and y center for an anchor (window). 53 | """ 54 | 55 | w = anchor[2] - anchor[0] + 1 56 | h = anchor[3] - anchor[1] + 1 57 | x_ctr = anchor[0] + 0.5 * (w - 1) 58 | y_ctr = anchor[1] + 0.5 * (h - 1) 59 | return w, h, x_ctr, y_ctr 60 | 61 | def _mkanchors(ws, hs, x_ctr, y_ctr): 62 | """ 63 | Given a vector of widths (ws) and heights (hs) around a center 64 | (x_ctr, y_ctr), output a set of anchors (windows). 65 | """ 66 | 67 | ws = ws[:, np.newaxis] 68 | hs = hs[:, np.newaxis] 69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 70 | y_ctr - 0.5 * (hs - 1), 71 | x_ctr + 0.5 * (ws - 1), 72 | y_ctr + 0.5 * (hs - 1))) 73 | return anchors 74 | 75 | def _ratio_enum(anchor, ratios): 76 | """ 77 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 78 | """ 79 | 80 | w, h, x_ctr, y_ctr = _whctrs(anchor) 81 | size = w * h 82 | size_ratios = size / ratios 83 | ws = np.round(np.sqrt(size_ratios)) 84 | hs = np.round(ws * ratios) 85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 86 | return anchors 87 | 88 | def _scale_enum(anchor, scales): 89 | """ 90 | Enumerate a set of anchors for each scale wrt an anchor. 91 | """ 92 | 93 | w, h, x_ctr, y_ctr = _whctrs(anchor) 94 | ws = w * scales 95 | hs = h * scales 96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 97 | return anchors 98 | 99 | if __name__ == '__main__': 100 | import time 101 | t = time.time() 102 | a = generate_anchors() 103 | print time.time() - t 104 | print a 105 | from IPython import embed; embed() 106 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/generate_anchors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/generate_anchors.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/proposal_layer_tf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/proposal_layer_tf.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/rpn_msr/proposal_target_layer_tf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/rpn_msr/proposal_target_layer_tf.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | return None; 44 | home = os.path.dirname(os.path.dirname(nvcc)) 45 | 46 | cudaconfig = {'home':home, 'nvcc':nvcc, 47 | 'include': pjoin(home, 'include'), 48 | 'lib64': pjoin(home, 'lib64')} 49 | for k, v in cudaconfig.iteritems(): 50 | if not os.path.exists(v): 51 | return None; 52 | 53 | return cudaconfig 54 | 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print extra_postargs 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | ext_modules = [ 109 | Extension( 110 | "utils.cython_bbox", 111 | ["utils/bbox.pyx"], 112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 113 | include_dirs = [numpy_include] 114 | ), 115 | Extension( 116 | "utils.cython_nms", 117 | ["utils/nms.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs = [numpy_include] 120 | ), 121 | Extension( 122 | "nms.cpu_nms", 123 | ["nms/cpu_nms.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs = [numpy_include] 126 | ) 127 | ] 128 | 129 | if CUDA: 130 | ext_modules.append( 131 | Extension('nms.gpu_nms', 132 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 133 | library_dirs=[CUDA['lib64']], 134 | libraries=['cudart'], 135 | language='c++', 136 | runtime_library_dirs=[CUDA['lib64']], 137 | # this syntax is specific to this build system 138 | # we're only going to use certain compiler args with nvcc and not with gcc 139 | # the implementation of this trick is in customize_compiler() below 140 | extra_compile_args={'gcc': ["-Wno-unused-function"], 141 | 'nvcc': ['-arch=sm_35', 142 | '--ptxas-options=-v', 143 | '-c', 144 | '--compiler-options', 145 | "'-fPIC'"]}, 146 | include_dirs = [numpy_include, CUDA['include']] 147 | ) 148 | ) 149 | 150 | setup( 151 | name='fast_rcnn', 152 | ext_modules=ext_modules, 153 | # inject our custom trigger 154 | cmdclass={'build_ext': custom_build_ext}, 155 | ) 156 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/utils/__init__.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | 26 | return blob 27 | 28 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 29 | """Mean subtract and scale an image for use in a blob.""" 30 | im = im.astype(np.float32, copy=False) 31 | im -= pixel_means 32 | im_shape = im.shape 33 | im_size_min = np.min(im_shape[0:2]) 34 | im_size_max = np.max(im_shape[0:2]) 35 | im_scale = float(target_size) / float(im_size_min) 36 | # Prevent the biggest axis from being more than MAX_SIZE 37 | if np.round(im_scale * im_size_max) > max_size: 38 | im_scale = float(max_size) / float(im_size_max) 39 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 40 | interpolation=cv2.INTER_LINEAR) 41 | 42 | return im, im_scale 43 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/utils/blob.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/boxes_grid.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Subcategory CNN 3 | # Copyright (c) 2015 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import math 10 | from fast_rcnn.config import cfg 11 | 12 | def get_boxes_grid(image_height, image_width): 13 | """ 14 | Return the boxes on image grid. 15 | """ 16 | 17 | # height and width of the heatmap 18 | if cfg.NET_NAME == 'CaffeNet': 19 | height = np.floor((image_height * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1) 20 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 21 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 22 | 23 | width = np.floor((image_width * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1) 24 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 25 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 26 | elif cfg.NET_NAME == 'VGGnet': 27 | height = np.floor(image_height * max(cfg.TRAIN.SCALES) / 2.0 + 0.5) 28 | height = np.floor(height / 2.0 + 0.5) 29 | height = np.floor(height / 2.0 + 0.5) 30 | height = np.floor(height / 2.0 + 0.5) 31 | 32 | width = np.floor(image_width * max(cfg.TRAIN.SCALES) / 2.0 + 0.5) 33 | width = np.floor(width / 2.0 + 0.5) 34 | width = np.floor(width / 2.0 + 0.5) 35 | width = np.floor(width / 2.0 + 0.5) 36 | else: 37 | assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' 38 | 39 | # compute the grid box centers 40 | h = np.arange(height) 41 | w = np.arange(width) 42 | y, x = np.meshgrid(h, w, indexing='ij') 43 | centers = np.dstack((x, y)) 44 | centers = np.reshape(centers, (-1, 2)) 45 | num = centers.shape[0] 46 | 47 | # compute width and height of grid box 48 | area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE 49 | aspect = cfg.TRAIN.ASPECTS # height / width 50 | num_aspect = len(aspect) 51 | widths = np.zeros((1, num_aspect), dtype=np.float32) 52 | heights = np.zeros((1, num_aspect), dtype=np.float32) 53 | for i in xrange(num_aspect): 54 | widths[0,i] = math.sqrt(area / aspect[i]) 55 | heights[0,i] = widths[0,i] * aspect[i] 56 | 57 | # construct grid boxes 58 | centers = np.repeat(centers, num_aspect, axis=0) 59 | widths = np.tile(widths, num).transpose() 60 | heights = np.tile(heights, num).transpose() 61 | 62 | x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5 63 | x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5 64 | y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5 65 | y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5 66 | 67 | boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE 68 | 69 | return boxes_grid, centers[:,0], centers[:,1] 70 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/boxes_grid.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/utils/boxes_grid.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/lib/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/lib/utils/timer.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Set up paths for Fast R-CNN.""" 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | def add_path(path): 14 | if path not in sys.path: 15 | sys.path.insert(0, path) 16 | 17 | this_dir = osp.dirname(__file__) 18 | 19 | # Add caffe to PYTHONPATH 20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python') 21 | add_path(caffe_path) 22 | 23 | # Add lib to PYTHONPATH 24 | lib_path = osp.join(this_dir, '..', 'lib') 25 | add_path(lib_path) 26 | 27 | 28 | lib_path = osp.join(this_dir,'mftracker') 29 | add_path(lib_path) 30 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/tools/_init_paths.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/Faster-RCNN_TF/tools/_init_paths.pyc -------------------------------------------------------------------------------- /Faster-RCNN_TF/tools/demo.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import tensorflow as tf 3 | from fast_rcnn.config import cfg 4 | from fast_rcnn.test import im_detect 5 | from fast_rcnn.nms_wrapper import nms 6 | from utils.timer import Timer 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import os, sys, cv2 10 | import argparse 11 | from networks.factory import get_network 12 | 13 | 14 | CLASSES = ('__background__', 15 | 'text') 16 | 17 | 18 | #CLASSES = ('__background__','person','bike','motorbike','car','bus') 19 | 20 | def vis_detections(im, class_name, dets,ax, thresh=0.5): 21 | """Draw detected bounding boxes.""" 22 | inds = np.where(dets[:, -1] >= thresh)[0] 23 | if len(inds) == 0: 24 | return 25 | 26 | for i in inds: 27 | bbox = dets[i, :4] 28 | score = dets[i, -1] 29 | 30 | ax.add_patch( 31 | plt.Rectangle((bbox[0], bbox[1]), 32 | bbox[2] - bbox[0], 33 | bbox[3] - bbox[1], fill=False, 34 | edgecolor='red', linewidth=3.5) 35 | ) 36 | ax.text(bbox[0], bbox[1] - 2, 37 | '{:s} {:.3f}'.format(class_name, score), 38 | bbox=dict(facecolor='blue', alpha=0.5), 39 | fontsize=14, color='white') 40 | 41 | ax.set_title(('{} detections with ' 42 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 43 | thresh), 44 | fontsize=14) 45 | plt.axis('off') 46 | plt.tight_layout() 47 | plt.draw() 48 | 49 | 50 | def demo(sess, net, image_name): 51 | """Detect object classes in an image using pre-computed object proposals.""" 52 | 53 | # Load the demo image 54 | im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 55 | #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) 56 | im = cv2.imread(im_file) 57 | 58 | # Detect all object classes and regress object bounds 59 | timer = Timer() 60 | timer.tic() 61 | scores, boxes = im_detect(sess, net, im) 62 | timer.toc() 63 | print ('Detection took {:.3f}s for ' 64 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 65 | 66 | # Visualize detections for each class 67 | im = im[:, :, (2, 1, 0)] 68 | fig, ax = plt.subplots(figsize=(12, 12)) 69 | ax.imshow(im, aspect='equal') 70 | 71 | CONF_THRESH = 0.8 72 | NMS_THRESH = 0.3 73 | for cls_ind, cls in enumerate(CLASSES[1:]): 74 | cls_ind += 1 # because we skipped background 75 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 76 | cls_scores = scores[:, cls_ind] 77 | dets = np.hstack((cls_boxes, 78 | cls_scores[:, np.newaxis])).astype(np.float32) 79 | keep = nms(dets, NMS_THRESH) 80 | dets = dets[keep, :] 81 | vis_detections(im, cls, dets, ax, thresh=CONF_THRESH) 82 | 83 | def parse_args(): 84 | """Parse input arguments.""" 85 | parser = argparse.ArgumentParser(description='Faster R-CNN demo') 86 | parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 87 | default=0, type=int) 88 | parser.add_argument('--cpu', dest='cpu_mode', 89 | help='Use CPU mode (overrides --gpu)', 90 | action='store_true') 91 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 92 | default='VGGnet_test') 93 | parser.add_argument('--model', dest='model', help='Model path', 94 | default=' ') 95 | 96 | args = parser.parse_args() 97 | 98 | return args 99 | if __name__ == '__main__': 100 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 101 | 102 | args = parse_args() 103 | 104 | if args.model == ' ': 105 | raise IOError(('Error: Model not found.\n')) 106 | 107 | # init session 108 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 109 | # load network 110 | net = get_network(args.demo_net) 111 | # load model 112 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) 113 | saver.restore(sess, args.model) 114 | 115 | #sess.run(tf.initialize_all_variables()) 116 | 117 | print '\n\nLoaded network {:s}'.format(args.model) 118 | 119 | # Warmup on a dummy image 120 | im = 128 * np.ones((300, 300, 3), dtype=np.uint8) 121 | for i in xrange(2): 122 | _, _= im_detect(sess, net, im) 123 | 124 | im_names = ['101.jpg', '102.jpg', '103.jpg', 125 | '104.jpg', '105.jpg'] 126 | 127 | 128 | for im_name in im_names: 129 | print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' 130 | print 'Demo for data/demo/{}'.format(im_name) 131 | demo(sess, net, im_name) 132 | 133 | plt.show() 134 | 135 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/tools/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import test_net 14 | from fast_rcnn.config import cfg, cfg_from_file 15 | from datasets.factory import get_imdb 16 | from networks.factory import get_network 17 | import argparse 18 | import pprint 19 | import time, os, sys 20 | import tensorflow as tf 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 27 | parser.add_argument('--device', dest='device', help='device to use', 28 | default='cpu', type=str) 29 | parser.add_argument('--device_id', dest='device_id', help='device id to use', 30 | default=0, type=int) 31 | parser.add_argument('--def', dest='prototxt', 32 | help='prototxt file defining the network', 33 | default=None, type=str) 34 | parser.add_argument('--weights', dest='model', 35 | help='model to test', 36 | default=None, type=str) 37 | parser.add_argument('--cfg', dest='cfg_file', 38 | help='optional config file', default=None, type=str) 39 | parser.add_argument('--wait', dest='wait', 40 | help='wait until net file exists', 41 | default=True, type=bool) 42 | parser.add_argument('--imdb', dest='imdb_name', 43 | help='dataset to test', 44 | default='voc_2007_test', type=str) 45 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 46 | action='store_true') 47 | parser.add_argument('--network', dest='network_name', 48 | help='name of the network', 49 | default=None, type=str) 50 | 51 | if len(sys.argv) == 1: 52 | parser.print_help() 53 | sys.exit(1) 54 | 55 | args = parser.parse_args() 56 | return args 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | 61 | print('Called with args:') 62 | print(args) 63 | 64 | if args.cfg_file is not None: 65 | cfg_from_file(args.cfg_file) 66 | 67 | print('Using config:') 68 | pprint.pprint(cfg) 69 | 70 | # while not os.path.exists(args.model) and args.wait: 71 | # print('Waiting for {} to exist...'.format(args.model)) 72 | # time.sleep(10) 73 | 74 | weights_filename = os.path.splitext(os.path.basename(args.model))[0] 75 | 76 | imdb = get_imdb(args.imdb_name) 77 | imdb.competition_mode(args.comp_mode) 78 | # Find the checkpoint directory, or wait until it exists. 79 | checkpoint_dir = os.path.dirname(args.model) 80 | while True: 81 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 82 | if ckpt and ckpt.model_checkpoint_path: 83 | break 84 | else: 85 | print('waiting for ...checkpoint') 86 | time.sleep(10) 87 | 88 | device_name = '/{}:{:d}'.format(args.device,args.device_id) 89 | print device_name 90 | 91 | network = get_network(args.network_name) 92 | print 'Use network `{:s}` in training'.format(args.network_name) 93 | 94 | if args.device == 'gpu': 95 | cfg.USE_GPU_NMS = True 96 | cfg.GPU_ID = args.device_id 97 | else: 98 | cfg.USE_GPU_NMS = False 99 | 100 | # start a session 101 | saver = tf.train.Saver() 102 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 103 | saver.restore(sess, args.model) 104 | print ('Loading model weights from {:s}').format(args.model) 105 | 106 | test_net(sess, network, imdb, weights_filename) 107 | -------------------------------------------------------------------------------- /Faster-RCNN_TF/tools/train_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Train a Fast R-CNN network on a region of interest database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.train import get_training_roidb, train_net 14 | from fast_rcnn.config import cfg,cfg_from_file, cfg_from_list, get_output_dir 15 | from datasets.factory import get_imdb 16 | from networks.factory import get_network 17 | import argparse 18 | import pprint 19 | import numpy as np 20 | import sys 21 | import pdb 22 | 23 | def parse_args(): 24 | """ 25 | Parse input arguments 26 | """ 27 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 28 | parser.add_argument('--device', dest='device', help='device to use', 29 | default='cpu', type=str) 30 | parser.add_argument('--device_id', dest='device_id', help='device id to use', 31 | default=0, type=int) 32 | parser.add_argument('--solver', dest='solver', 33 | help='solver prototxt', 34 | default=None, type=str) 35 | parser.add_argument('--iters', dest='max_iters', 36 | help='number of iterations to train', 37 | default=70000, type=int) 38 | parser.add_argument('--weights', dest='pretrained_model', 39 | help='initialize with pretrained model weights', 40 | default=None, type=str) 41 | parser.add_argument('--cfg', dest='cfg_file', 42 | help='optional config file', 43 | default=None, type=str) 44 | parser.add_argument('--imdb', dest='imdb_name', 45 | help='dataset to train on', 46 | default='kitti_train', type=str) 47 | parser.add_argument('--rand', dest='randomize', 48 | help='randomize (do not use a fixed seed)', 49 | action='store_true') 50 | parser.add_argument('--network', dest='network_name', 51 | help='name of the network', 52 | default=None, type=str) 53 | parser.add_argument('--set', dest='set_cfgs', 54 | help='set config keys', default=None, 55 | nargs=argparse.REMAINDER) 56 | 57 | if len(sys.argv) == 1: 58 | parser.print_help() 59 | sys.exit(1) 60 | 61 | args = parser.parse_args() 62 | return args 63 | 64 | if __name__ == '__main__': 65 | args = parse_args() 66 | 67 | print('Called with args:') 68 | print(args) 69 | 70 | if args.cfg_file is not None: 71 | cfg_from_file(args.cfg_file) 72 | if args.set_cfgs is not None: 73 | cfg_from_list(args.set_cfgs) 74 | 75 | print('Using config:') 76 | pprint.pprint(cfg) 77 | 78 | if not args.randomize: 79 | # fix the random seeds (numpy and caffe) for reproducibility 80 | np.random.seed(cfg.RNG_SEED) 81 | imdb = get_imdb(args.imdb_name) 82 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 83 | roidb = get_training_roidb(imdb) 84 | 85 | output_dir = get_output_dir(imdb, None) 86 | print 'Output will be saved to `{:s}`'.format(output_dir) 87 | 88 | device_name = '/{}:{:d}'.format(args.device,args.device_id) 89 | print device_name 90 | 91 | network = get_network(args.network_name) 92 | print 'Use network `{:s}` in training'.format(args.network_name) 93 | 94 | train_net(network, imdb, roidb, output_dir, 95 | pretrained_model=args.pretrained_model, 96 | max_iters=args.max_iters) 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Deprecated] 2 | ## Faster_RCNN for text detection 3 | *** 4 | 5 | #### 使用Faster_RCNN做文本检测 6 | 7 | 软件支持: 8 | 9 | * python2.7 10 | 11 | * tesnsorflow_gpu, cython, python-opencv, easydict, etc. 12 | 13 | 将ICDAR2011数据集简单制作成VOC2007格式, 放入Faster_RCNN 14 | 15 | 以下链接是处理好的ICDAR2011数据 16 | 17 | 链接: https://pan.baidu.com/s/15vVdxmLI7uI1IAMOqJrHKQ 密码: asqb 18 | 19 | 20 | 21 | 将VGG_imagenet.npy放到Faster-RCNN_TF\data\pretrain_model文件夹下 22 | 23 | 链接: https://pan.baidu.com/s/1z4xSXJk7U81SpPusqr0trA 密码: xjph 24 | 25 | *** 26 | #### VOC2007结构 27 | 28 | * JPEGImages文件夹 : 训练图片和测试图片 29 | 30 | * Annatations文件夹 : xml格式的标签文件 31 | 32 | * ImageSets文件夹 : Action暂时不用 Layout暂时不用 33 | 34 | * Main存放的是图像物体识别的数据,Main里面有test.txt , train.txt, val.txt,trainval.txt. 35 | 36 | *** 37 | #### 数据制作 38 | * 直接将ICAR2011的训练集图片放入JPRGImages 39 | 40 | * xml.py: 用于制作.xml文件 41 | 42 | * generate_maintxt.py: 用于生成Main文件夹下的.txt文件 43 | *** 44 | #### 构建Cython模块 45 | ```bash 46 | cd $FRCN_ROOT/lib 47 | make 48 | ``` 49 | *** 50 | #### 测试模型 51 | ```bash 52 | cd $FRCN_ROOT 53 | python ./tools/demo.py --model model_path 54 | ``` 55 | ![thumbnial.jpg](./result/thumbnial.jpg) 56 | 57 | *** 58 | #### 训练模型 59 | ```bash 60 | cd $FRCN_ROOT 61 | ./experiments/scripts/faster_rcnn_end2end.sh DEVICE DEVICE_ID VGG16 pascal_voc 62 | ``` 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /generate_maintxt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | trainval_percent = 0.66 5 | train_percent = 0.5 6 | xmlfilepath = 'H:\\2Unsally\\MyVOC\\MyVOC2007\\Annotations' 7 | txtsavepath = 'H:\\2Unsally\\MyVOC\\MyVOC2007\\ImageSets\\Main' 8 | total_xml = os.listdir(xmlfilepath) 9 | 10 | num=len(total_xml) 11 | list=range(num) 12 | tv=int(num*trainval_percent) 13 | tr=int(tv*train_percent) 14 | trainval= random.sample(list,tv) 15 | train=random.sample(trainval,tr) 16 | 17 | ftrainval = open('MyVOC2007\\ImageSets\\Main\\trainval.txt', 'w') 18 | ftest = open('MyVOC2007\\ImageSets\\Main\\test.txt', 'w') 19 | ftrain = open('MyVOC2007\ImageSets\Main\\train.txt', 'w') 20 | fval = open('MyVOC2007\\ImageSets\\Main\\val.txt', 'w') 21 | 22 | for i in list: 23 | name=total_xml[i][:-4]+'\n' 24 | if i in trainval: 25 | ftrainval.write(name) 26 | if i in train: 27 | ftrain.write(name) 28 | else: 29 | fval.write(name) 30 | else: 31 | ftest.write(name) 32 | 33 | ftrainval.close() 34 | ftrain.close() 35 | fval.close() 36 | ftest .close() -------------------------------------------------------------------------------- /result/result166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/result/result166.jpg -------------------------------------------------------------------------------- /result/result195.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/result/result195.jpg -------------------------------------------------------------------------------- /result/result202.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/result/result202.jpg -------------------------------------------------------------------------------- /result/test.py: -------------------------------------------------------------------------------- 1 | # 操作图像 2 | from PIL import Image 3 | 4 | # 打开一个jpg图像 5 | im = Image.open('thumbnial.jpg') 6 | # 获取图像尺寸 7 | w, h = im.size 8 | print('Orinal image size: %sx%s' % (w, h)) 9 | # 缩放到50% 10 | im.thumbnail((w//2, h//2)) 11 | print('Resize image to: %sx%s' % (w//2, h//2)) 12 | # 把缩放后的图像用jpeg格式保存: 13 | im.save('thumbnial.jpg', 'jpeg') -------------------------------------------------------------------------------- /result/thumbnial.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QAlexBall/Faster_RCNN_for_TextDetection/6be4889920ca7bcc1662d6b2b478f76649c3e23a/result/thumbnial.jpg -------------------------------------------------------------------------------- /xml.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import glob 3 | from PIL import Image 4 | 5 | # ICDAR 图像存储位置 6 | src_img_dir = "train-textloc" 7 | # ICDAR 图像的 ground truth 的 txt 文件存放位置 8 | src_txt_dir = "train-textloc" 9 | 10 | img_Lists = glob.glob(src_img_dir + '/*.jpg') 11 | 12 | img_basenames = [] # e.g. 100.jpg 13 | for item in img_Lists: 14 | img_basenames.append(os.path.basename(item)) 15 | 16 | img_names = [] # e.g. 100 17 | for item in img_basenames: 18 | temp1, temp2 = os.path.splitext(item) 19 | img_names.append(temp1) 20 | 21 | for img in img_names: 22 | im = Image.open((src_img_dir + '/' + img + '.jpg')) 23 | width, height = im.size 24 | 25 | # open the crospronding txt file 26 | gt = open(src_txt_dir + '/gt_' + img + '.txt').read().splitlines() 27 | 28 | # write in xml file 29 | #os.mknod(src_txt_dir + '/' + img + '.xml') 30 | xml_file = open((src_txt_dir + '/' + img + '.xml'), 'w') 31 | xml_file.write('\n') 32 | xml_file.write(' VOC2007\n') 33 | xml_file.write(' ' + str(img) + '.jpg' + '\n') 34 | xml_file.write(' \n') 35 | xml_file.write(' ' + str(width) + '\n') 36 | xml_file.write(' ' + str(height) + '\n') 37 | xml_file.write(' 3\n') 38 | xml_file.write(' \n') 39 | 40 | # write the region of text on xml file 41 | for img_each_label in gt: 42 | spt = img_each_label.split(',') 43 | xml_file.write(' \n') 44 | xml_file.write(' text\n') 45 | xml_file.write(' Unspecified\n') 46 | xml_file.write(' 0\n') 47 | xml_file.write(' 0\n') 48 | xml_file.write(' \n') 49 | xml_file.write(' ' + str(spt[0]) + '\n') 50 | xml_file.write(' ' + str(spt[1]) + '\n') 51 | xml_file.write(' ' + str(spt[2]) + '\n') 52 | xml_file.write(' ' + str(spt[3]) + '\n') 53 | xml_file.write(' \n') 54 | xml_file.write(' \n') 55 | 56 | xml_file.write('') --------------------------------------------------------------------------------