├── .idea ├── InceptText-Tensorflow.iml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md ├── data_util.py ├── error_pic ├── .DS_Store ├── error1.jpg ├── error2.jpg └── error3.jpg ├── icdar.py ├── lib ├── Makefile ├── __init__.py ├── cnn_tools │ ├── __init__.py │ ├── __init__.pyc │ ├── tools.py │ └── tools.pyc ├── cuda_config.h ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── ds_utils.py │ ├── factory.py │ ├── imagenet3d.py │ ├── imdb.py │ ├── imdb2.py │ ├── kitti.py │ ├── kitti_tracking.py │ ├── kittivoc.py │ ├── nissan.py │ ├── nthu.py │ ├── pascal3d.py │ ├── pascal_voc.py │ ├── pascal_voc2.py │ └── voc_eval.py ├── deform_conv_layer │ ├── __init__.py │ ├── deform_conv.cc │ ├── deform_conv.cu.cc │ ├── deform_conv.h │ ├── deform_conv_grad.py │ ├── deform_conv_op.py │ ├── deform_conv_test_mx.py │ ├── deform_conv_util.h │ └── test_deform_conv.py ├── deform_psroi_pooling_layer │ ├── __init__.py │ ├── deform_psroi_pooling_op.cc │ ├── deform_psroi_pooling_op.py │ ├── deform_psroi_pooling_op_gpu.cu.cc │ ├── deform_psroi_pooling_op_gpu.h │ ├── deform_psroi_pooling_op_grad.py │ ├── deform_psroi_pooling_op_test.py │ └── deform_psroi_pooling_op_test_mx.py ├── fast_rcnn │ ├── __init__.py │ ├── bbox_transform.py │ ├── config.py │ ├── config2.py │ ├── nms_wrapper.py │ ├── test.py │ └── train.py ├── gt_data_layer │ ├── __init__.py │ ├── layer.py │ ├── minibatch.py │ └── roidb.py ├── make.sh ├── networks │ ├── .VGGnet.py.swo │ ├── Resnet101_test.py │ ├── Resnet101_train.py │ ├── Resnet50_test.py │ ├── Resnet50_train.py │ ├── VGGnet_test.py │ ├── VGGnet_train.py │ ├── __init__.py │ ├── caffenet.py │ ├── factory.py │ └── network.py ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── psroi_pooling_layer │ ├── __init__.py │ ├── cuda_kernel_helper.h │ ├── psroi_pooling_op.cc │ ├── psroi_pooling_op.py │ ├── psroi_pooling_op_gpu.cu.cc │ ├── psroi_pooling_op_gpu.h │ ├── psroi_pooling_op_grad.py │ └── psroi_pooling_op_test.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── roi_data_layer │ ├── __init__.py │ ├── layer.py │ ├── minibatch.py │ ├── minibatch2.py │ ├── roidb.py │ └── roidb2.py ├── roi_pooling_layer │ ├── __init__.py │ ├── roi_pooling_op.cc │ ├── roi_pooling_op.py │ ├── roi_pooling_op_gpu.cu.cc │ ├── roi_pooling_op_gpu.h │ ├── roi_pooling_op_grad.py │ └── roi_pooling_op_test.py ├── rpn_tools │ ├── .DS_Store │ ├── __init__.py │ ├── __init__.pyc │ ├── anchor_target_layer_modified.py │ ├── anchor_target_layer_modified.pyc │ ├── fast_rcnn │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox_transform.py │ │ ├── bbox_transform.pyc │ │ ├── config.py │ │ ├── config.pyc │ │ ├── nms │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── cpu_nms.c │ │ │ ├── cpu_nms.pyx │ │ │ ├── cpu_nms.so │ │ │ ├── gpu_mv.cpp │ │ │ ├── gpu_mv.hpp │ │ │ ├── gpu_mv.pyx │ │ │ ├── gpu_nms.cpp │ │ │ ├── gpu_nms.hpp │ │ │ ├── gpu_nms.pyx │ │ │ ├── gpu_nms.so │ │ │ ├── mnc_config.py │ │ │ ├── mnc_config.pyc │ │ │ ├── mv.so │ │ │ ├── mv_kernel.cu │ │ │ ├── nms_kernel.cu │ │ │ ├── nms_wrapper.py │ │ │ ├── nms_wrapper.pyc │ │ │ └── py_cpu_nms.py │ │ ├── nms_wrapper.py │ │ ├── nms_wrapper.pyc │ │ ├── nms_wrapper.py~ │ │ ├── test.py │ │ ├── test.pyc │ │ ├── train.py │ │ └── train.pyc │ ├── generate_anchors.py │ ├── generate_anchors.pyc │ ├── my_anchor_target_layer_modified.py │ ├── my_anchor_target_layer_modified.pyc │ ├── nms │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── cpu_nms.c │ │ ├── cpu_nms.pyx │ │ ├── cpu_nms.so │ │ ├── gpu_mv.cpp │ │ ├── gpu_mv.hpp │ │ ├── gpu_mv.pyx │ │ ├── gpu_nms.cpp │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── gpu_nms.so │ │ ├── mnc_config.py │ │ ├── mnc_config.pyc │ │ ├── mv.so │ │ ├── mv_kernel.cu │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ ├── nms_wrapper.pyc │ │ └── py_cpu_nms.py │ ├── proposal_layer_modified.py │ ├── proposal_layer_modified.pyc │ ├── proposal_target_layer_modified.py │ ├── proposal_target_layer_modified.pyc │ └── utils │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── blob.pyc │ │ ├── cython_bbox.so │ │ ├── mnc_config.py │ │ ├── mnc_config.pyc │ │ ├── timer.py │ │ ├── timer.pyc │ │ ├── unmap.py │ │ ├── unmap.pyc │ │ ├── utils │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── blob.pyc │ │ ├── timer.py │ │ └── timer.pyc │ │ ├── vis_seg.py │ │ └── voc_eval.py ├── setup.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── bbox.pyx │ ├── blob.py │ ├── boxes_grid.py │ ├── nms.py │ ├── nms.pyx │ └── timer.py ├── model.py ├── nets ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── resnet_utils.cpython-36.pyc │ └── resnet_v1.cpython-36.pyc ├── googlenet.py ├── resnet_utils.py └── resnet_v1.py ├── test.py └── train_main.py /.idea/InceptText-Tensorflow.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InceptText-Tensorflow 2 | An Implementation of the alogrithm in paper IncepText: A New Inception-Text Module with Deformable PSROI Pooling for Multi-Oriented Scene Text Detection 3 | 4 | ## Introduction 5 | ### Tensorflow=1.4.0 6 | 7 | ### Preparation 8 | 9 | 1.gcc 4.9 10 | 11 | 2.cuda8.0 12 | 13 | 3.cd lib && make 14 | 15 | - 可能遇到的错误: 16 | 17 | 1.![error1](error_pic/error1.jpg) 18 | 19 | 解决办法:把cuda路径添加到系统环境变量,然后改为#include 20 | 21 | 2.![error2](error_pic/error2.jpg) 22 | 23 | 解决办法:找到nsync_cv.h的绝对路径然后include 24 | 25 | 3.![error3](error_pic/error3.jpg) 26 | 27 | 解决办法:找到nsync_mu.h的绝对路径然后include 28 | 29 | ## Download 30 | ### 1.Models trained on ICDAR 2017 31 | ### 2.Resnet V1 50 provided by tensorflow slim[ResNet-v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz) 32 | 33 | ## Train 34 | ### python train_main.py 35 | 36 | ## Test 37 | ### python test.py 38 | -------------------------------------------------------------------------------- /data_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | this file is modified from keras implemention of data process multi-threading, 3 | see https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py 4 | ''' 5 | import time 6 | import numpy as np 7 | import threading 8 | import multiprocessing 9 | try: 10 | import queue 11 | except ImportError: 12 | import Queue as queue 13 | 14 | 15 | class GeneratorEnqueuer(): 16 | """Builds a queue out of a data generator. 17 | 18 | Used in `fit_generator`, `evaluate_generator`, `predict_generator`. 19 | 20 | # Arguments 21 | generator: a generator function which endlessly yields data 22 | use_multiprocessing: use multiprocessing if True, otherwise threading 23 | wait_time: time to sleep in-between calls to `put()` 24 | random_seed: Initial seed for workers, 25 | will be incremented by one for each workers. 26 | """ 27 | 28 | def __init__(self, generator, 29 | use_multiprocessing=False, 30 | wait_time=0.05, 31 | random_seed=None): 32 | self.wait_time = wait_time 33 | self._generator = generator 34 | self._use_multiprocessing = use_multiprocessing 35 | self._threads = [] 36 | self._stop_event = None 37 | self.queue = None 38 | self.random_seed = random_seed 39 | 40 | def start(self, workers=1, max_queue_size=10): 41 | """Kicks off threads which add data from the generator into the queue. 42 | 43 | # Arguments 44 | workers: number of worker threads 45 | max_queue_size: queue size 46 | (when full, threads could block on `put()`) 47 | """ 48 | 49 | def data_generator_task(): 50 | while not self._stop_event.is_set(): 51 | try: 52 | if self._use_multiprocessing or self.queue.qsize() < max_queue_size: 53 | generator_output = next(self._generator) 54 | self.queue.put(generator_output) 55 | else: 56 | time.sleep(self.wait_time) 57 | except Exception: 58 | self._stop_event.set() 59 | raise 60 | 61 | try: 62 | if self._use_multiprocessing: 63 | self.queue = multiprocessing.Queue(maxsize=max_queue_size) 64 | self._stop_event = multiprocessing.Event() 65 | else: 66 | self.queue = queue.Queue() 67 | self._stop_event = threading.Event() 68 | 69 | for _ in range(workers): 70 | if self._use_multiprocessing: 71 | # Reset random seed else all children processes 72 | # share the same seed 73 | np.random.seed(self.random_seed) 74 | thread = multiprocessing.Process(target=data_generator_task) 75 | thread.daemon = True 76 | if self.random_seed is not None: 77 | self.random_seed += 1 78 | else: 79 | thread = threading.Thread(target=data_generator_task) 80 | self._threads.append(thread) 81 | thread.start() 82 | except: 83 | self.stop() 84 | raise 85 | 86 | def is_running(self): 87 | return self._stop_event is not None and not self._stop_event.is_set() 88 | 89 | def stop(self, timeout=None): 90 | """Stops running threads and wait for them to exit, if necessary. 91 | 92 | Should be called by the same thread which called `start()`. 93 | 94 | # Arguments 95 | timeout: maximum time to wait on `thread.join()`. 96 | """ 97 | if self.is_running(): 98 | self._stop_event.set() 99 | 100 | for thread in self._threads: 101 | if thread.is_alive(): 102 | if self._use_multiprocessing: 103 | thread.terminate() 104 | else: 105 | thread.join(timeout) 106 | 107 | if self._use_multiprocessing: 108 | if self.queue is not None: 109 | self.queue.close() 110 | 111 | self._threads = [] 112 | self._stop_event = None 113 | self.queue = None 114 | 115 | def get(self): 116 | """Creates a generator to extract data from the queue. 117 | 118 | Skip the data if it is `None`. 119 | 120 | # Returns 121 | A generator 122 | """ 123 | while self.is_running(): 124 | if not self.queue.empty(): 125 | inputs = self.queue.get() 126 | if inputs is not None: 127 | yield inputs 128 | else: 129 | time.sleep(self.wait_time) -------------------------------------------------------------------------------- /error_pic/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/.DS_Store -------------------------------------------------------------------------------- /error_pic/error1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error1.jpg -------------------------------------------------------------------------------- /error_pic/error2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error2.jpg -------------------------------------------------------------------------------- /error_pic/error3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error3.jpg -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | sh make.sh 5 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | #import fast_rcnn 2 | -------------------------------------------------------------------------------- /lib/cnn_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/__init__.py -------------------------------------------------------------------------------- /lib/cnn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/__init__.pyc -------------------------------------------------------------------------------- /lib/cnn_tools/tools.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/tools.pyc -------------------------------------------------------------------------------- /lib/cuda_config.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | /* 17 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly. 18 | 19 | # Which CUDA capabilities do we want to pre-build for? 20 | # https://developer.nvidia.com/cuda-gpus 21 | # Compute/shader model Cards 22 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61 23 | # 6.0 P100 so CUDA_MODEL = 60 24 | # 5.2 M40 25 | # 3.7 K80 26 | # 3.5 K40, K20 27 | # 3.0 K10, Grid K520 (AWS G2) 28 | # Other Nvidia shader models should work, but they will require extra startup 29 | # time as the code is pre-optimized for them. 30 | # CUDA_MODELS=30 35 37 52 60 61 31 | */ 32 | // DO NOT EDIT: automatically generated file 33 | #ifndef CUDA_CUDA_CONFIG_H_ 34 | #define CUDA_CUDA_CONFIG_H_ 35 | // please modify the TF_CUDA_CAPABILITIES according to the above list and 36 | // your gpu model. 37 | 38 | #define TF_CUDA_CAPABILITIES CudaVersion("8.0") 39 | 40 | #define TF_CUDA_VERSION "8.0" 41 | #define TF_CUDNN_VERSION "6" 42 | 43 | #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-8.0" 44 | 45 | #endif // CUDA_CUDA_CONFIG_H_ 46 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # TODO: make this fold self-contained, only depends on utils package 9 | 10 | from .imdb import imdb 11 | from .pascal_voc import pascal_voc 12 | from .pascal3d import pascal3d 13 | from .imagenet3d import imagenet3d 14 | from .kitti import kitti 15 | from .kitti_tracking import kitti_tracking 16 | from .nissan import nissan 17 | from .nthu import nthu 18 | from . import factory 19 | 20 | ## NOTE: obsolete 21 | import os.path as osp 22 | from .imdb import ROOT_DIR 23 | from .imdb import MATLAB 24 | 25 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 26 | def _which(program): 27 | import os 28 | def is_exe(fpath): 29 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 30 | 31 | fpath, fname = os.path.split(program) 32 | if fpath: 33 | if is_exe(program): 34 | return program 35 | else: 36 | for path in os.environ["PATH"].split(os.pathsep): 37 | path = path.strip('"') 38 | exe_file = os.path.join(path, program) 39 | if is_exe(exe_file): 40 | return exe_file 41 | 42 | return None 43 | """ 44 | if _which(MATLAB) is None: 45 | msg = ("MATLAB command '{}' not found. " 46 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB) 47 | raise EnvironmentError(msg) 48 | """ 49 | -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | def unique_boxes(boxes, scale=1.0): 10 | """Return indices of unique boxes.""" 11 | v = np.array([1, 1e3, 1e6, 1e9]) 12 | hashes = np.round(boxes * scale).dot(v) 13 | _, index = np.unique(hashes, return_index=True) 14 | return np.sort(index) 15 | 16 | def xywh_to_xyxy(boxes): 17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 19 | 20 | def xyxy_to_xywh(boxes): 21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 23 | 24 | def validate_boxes(boxes, width=0, height=0): 25 | """Check that a set of boxes are valid.""" 26 | x1 = boxes[:, 0] 27 | y1 = boxes[:, 1] 28 | x2 = boxes[:, 2] 29 | y2 = boxes[:, 3] 30 | assert (x1 >= 0).all() 31 | assert (y1 >= 0).all() 32 | assert (x2 >= x1).all() 33 | assert (y2 >= y1).all() 34 | assert (x2 < width).all() 35 | assert (y2 < height).all() 36 | 37 | def filter_small_boxes(boxes, min_size): 38 | w = boxes[:, 2] - boxes[:, 0] 39 | h = boxes[:, 3] - boxes[:, 1] 40 | keep = np.where((w >= min_size) & (h > min_size))[0] 41 | return keep 42 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | import numpy as np 13 | 14 | from .pascal_voc import pascal_voc 15 | from .imagenet3d import imagenet3d 16 | from .kitti import kitti 17 | from .kitti_tracking import kitti_tracking 18 | from .nthu import nthu 19 | from .coco import coco 20 | from .kittivoc import kittivoc 21 | 22 | def _selective_search_IJCV_top_k(split, year, top_k): 23 | """Return an imdb that uses the top k proposals from the selective search 24 | IJCV code. 25 | """ 26 | imdb = pascal_voc(split, year) 27 | imdb.roidb_handler = imdb.selective_search_IJCV_roidb 28 | imdb.config['top_k'] = top_k 29 | return imdb 30 | 31 | # Set up voc__ using selective search "fast" mode 32 | for year in ['2007', '2012', '0712']: 33 | for split in ['train', 'val', 'trainval', 'test']: 34 | name = 'voc_{}_{}'.format(year, split) 35 | __sets[name] = (lambda split=split, year=year: 36 | pascal_voc(split, year)) 37 | 38 | 39 | # Set up kittivoc 40 | for split in ['train', 'val', 'trainval', 'test']: 41 | name = 'kittivoc_{}'.format(split) 42 | print(name) 43 | __sets[name] = (lambda split=split: kittivoc(split)) 44 | 45 | # # KITTI dataset 46 | # for split in ['train', 'val', 'trainval', 'test']: 47 | # name = 'kitti_{}'.format(split) 48 | # print name 49 | # __sets[name] = (lambda split=split: kitti(split)) 50 | 51 | # Set up coco_2014_ 52 | for year in ['2014']: 53 | for split in ['train', 'val', 'minival', 'valminusminival']: 54 | name = 'coco_{}_{}'.format(year, split) 55 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 56 | 57 | # Set up coco_2015_ 58 | for year in ['2015']: 59 | for split in ['test', 'test-dev']: 60 | name = 'coco_{}_{}'.format(year, split) 61 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 62 | 63 | # NTHU dataset 64 | for split in ['71', '370']: 65 | name = 'nthu_{}'.format(split) 66 | print(name) 67 | __sets[name] = (lambda split=split: nthu(split)) 68 | 69 | 70 | def get_imdb(name): 71 | """Get an imdb (image database) by name.""" 72 | if name not in __sets: 73 | print((list_imdbs())) 74 | raise KeyError('Unknown dataset: {}'.format(name)) 75 | return __sets[name]() 76 | 77 | def list_imdbs(): 78 | """List all registered imdbs.""" 79 | return list(__sets.keys()) 80 | -------------------------------------------------------------------------------- /lib/deform_conv_layer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/deform_conv_layer/__init__.py -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_grad.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | from tensorflow.python.framework import ops 4 | from . import deform_conv_op 5 | 6 | @ops.RegisterGradient("DeformConvOp") 7 | def _deform_conv_grad(op, grad): 8 | """The gradients for `deform_conv`. 9 | Args: 10 | op: The `deform_conv` `Operation` that we are differentiating, which we can use 11 | to find the inputs and outputs of the original op. 12 | grad: Gradient with respect to the output of the `roi_pool` op. 13 | Returns: 14 | Gradients with respect to the input of `zero_out`. 15 | """ 16 | data = op.inputs[0] 17 | filter = op.inputs[1] 18 | offset = op.inputs[2] 19 | 20 | strides = op.get_attr('strides') 21 | rates = op.get_attr('rates') 22 | num_groups = op.get_attr('num_groups') 23 | padding = op.get_attr('padding') 24 | data_format = op.get_attr('data_format') 25 | 26 | # compute gradient 27 | data_grad = deform_conv_op.deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, padding, data_format) 28 | 29 | return data_grad -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_op.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | import os.path as osp 4 | from tensorflow.python.framework import ops 5 | 6 | 7 | filename = osp.join(osp.dirname(__file__), 'deform_conv.so') 8 | _deform_conv_module = tf.load_op_library(filename) 9 | deform_conv_op = _deform_conv_module.deform_conv_op 10 | deform_conv_grad_op = _deform_conv_module.deform_conv_backprop_op 11 | 12 | 13 | @ops.RegisterGradient("DeformConvOp") 14 | def _deform_conv_grad(op, grad): 15 | """The gradients for `deform_conv`. 16 | Args: 17 | op: The `deform_conv` `Operation` that we are differentiating, which we can use 18 | to find the inputs and outputs of the original op. 19 | grad: Gradient with respect to the output of the `roi_pool` op. 20 | Returns: 21 | Gradients with respect to the input of `zero_out`. 22 | """ 23 | data = op.inputs[0] 24 | filter = op.inputs[1] 25 | offset = op.inputs[2] 26 | 27 | strides = op.get_attr('strides') 28 | rates = op.get_attr('rates') 29 | num_groups = op.get_attr('num_groups') 30 | padding = op.get_attr('padding') 31 | data_format = op.get_attr('data_format') 32 | deformable_group = op.get_attr('deformable_group') 33 | 34 | # compute gradient 35 | data_grad = deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, deformable_group, padding, data_format) 36 | 37 | return data_grad # List of one Tensor, since we have one input -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_test_mx.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | gpu_device=mx.gpu() 7 | cpu_device=mx.cpu() 8 | 9 | # trans = np.random.rand(1,2,2,2) 10 | 11 | if not os.path.isfile('test.npz'): 12 | with open("test.npz", 'wb') as f: 13 | arr=np.random.random((8, 6, 4, 5)) 14 | np.save(f, arr) 15 | else: 16 | with open("test.npz", 'rb') as f: 17 | arr = np.load(f) 18 | kernel = mx.nd.array(np.ones((21,2,2,2)), ctx=gpu_device) 19 | trans = mx.nd.array(np.ones((8,8,2,2)), ctx=gpu_device) 20 | arr = mx.nd.array(arr, ctx=gpu_device) 21 | data_grad = mx.nd.zeros_like(arr) 22 | kernel_grad = mx.nd.zeros_like(kernel) 23 | trans_grad = mx.nd.zeros_like(trans) 24 | 25 | def main(): 26 | data_var = mx.symbol.Variable('data') 27 | ker_var = mx.symbol.Variable('kernel') 28 | trans_var = mx.symbol.Variable('trans') 29 | res = mx.contrib.sym.DeformableConvolution(data=data_var, offset=trans_var, weight=ker_var, 30 | num_group=3, no_bias=True, kernel=[2,2], num_filter=21, stride=[2, 2]) 31 | rua = res.bind(ctx=gpu_device, args={'data':arr, 'kernel':kernel, 'trans':trans}, args_grad={'data':data_grad, 'kernel':kernel_grad, 'trans':trans_grad}) 32 | rua.forward(is_train=True) 33 | rua.backward(out_grads=mx.nd.ones((8,21,2,2))) 34 | # print(trans.asnumpy()) 35 | # res_arr = rua.outputs[0].asnumpy() 36 | # print(res_arr) 37 | # print([a.asnumpy() for a in rua.grad_arrays]) 38 | print(data_grad.asnumpy()) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_util.h: -------------------------------------------------------------------------------- 1 | #include "tensorflow/core/util/tensor_format.h" 2 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 3 | #include "tensorflow/core/framework/tensor.h" 4 | 5 | namespace tensorflow { 6 | typedef std::vector TShape; 7 | 8 | inline int ProdShape(const TensorShape &shape, int start) { 9 | int64 res = 1; 10 | for(int i=start; i ToVector(const TensorShape &shape) { 17 | // int64 res = 1; 18 | std::vector res; 19 | for(int i=0; i 20 | struct DeformPSROIPoolForwardLauncher { 21 | bool operator()( 22 | const DType* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height, 23 | const int width, const int pooled_height, const int pooled_width, const DType* bottom_rois, const DType* bottom_trans, 24 | const bool no_trans, const float trans_std, const int sample_per_part, const int output_dim, const int num_classes, 25 | const int group_size, const int part_size, DType* top_data, DType* mapping_channel, const Eigen::GpuDevice& d); 26 | }; 27 | 28 | template 29 | struct DeformPSROIPoolBackwardLauncher { 30 | bool operator() (const DType* top_diff, const DType* mapping_channel, const int num_rois, const float spatial_scale, 31 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width, 32 | const int output_dim, DType* bottom_data_diff, DType* bottom_trans_diff, const DType* bottom_data, 33 | const DType* bottom_rois, const DType* bottom_trans, const bool no_trans, const float trans_std, 34 | const int sample_per_part, const int group_size, const int part_size, 35 | const int num_classes, const int channels_each_class, const Eigen::GpuDevice& d); 36 | }; 37 | 38 | template 39 | struct setZero { 40 | void operator() (const Device& d, const int n, DType* result_data); 41 | }; 42 | 43 | } // namespace tensorflow 44 | 45 | #endif // TENSORFLOW_USER_OPS_DEFORMPSROIPOOLING_OP_GPU_H_ -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | # from __future__ import absolute_import 2 | # import tensorflow as tf 3 | # from tensorflow.python.framework import ops 4 | # import deform_psroi_pooling_op 5 | # import pdb 6 | 7 | 8 | # # @tf.RegisterShape("DeformPSROIPool") 9 | # # def _deform_psroi_pool_shape(op): 10 | # # """Shape function for the DeformPSROIPool op. 11 | 12 | # # """ 13 | # # dims_data = op.inputs[0].get_shape().as_list() 14 | # # channels = dims_data[3] 15 | # # dims_rois = op.inputs[1].get_shape().as_list() 16 | # # num_rois = dims_rois[0] 17 | # # output_dim = op.get_attr('output_dim') 18 | # # group_size = op.get_attr('group_size') 19 | # # pooled_height = group_size 20 | # # pooled_width = group_size 21 | 22 | # # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 23 | # # return [output_shape, output_shape] 24 | 25 | # @ops.RegisterGradient("DeformPSROIPool") 26 | # def _deform_psroi_pool_grad(op, grad, _): 27 | # """The gradients for `Deform_PSROI_pool`. 28 | # Args: 29 | # op: The `roi_pool` `Operation` that we are differentiating, which we can use 30 | # to find the inputs and outputs of the original op. 31 | # grad: Gradient with respect to the output of the `roi_pool` op. 32 | # Returns: 33 | # Gradients with respect to the input of `zero_out`. 34 | # """ 35 | 36 | 37 | # data = op.inputs[0] 38 | # rois = op.inputs[1] 39 | # trans = op.inputs[2] 40 | # mapping_channel = op.outputs[1] 41 | # spatial_scale = op.get_attr('spatial_scale') 42 | # output_dim = op.get_attr('output_dim') 43 | # group_size = op.get_attr('group_size') 44 | # pooled_size = op.get_attr('pooled_size') 45 | # part_size = op.get_attr('part_size') 46 | # sample_per_part = op.get_attr('sample_per_part') 47 | # trans_std = op.get_attr('trans_std') 48 | # no_trans = op.get_attr('no_trans') 49 | 50 | 51 | 52 | # # compute gradient 53 | # #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 54 | # data_grad, trans_grad = deform_psroi_pooling_op.deform_psroi_pool_grad(data, rois, trans, mapping_channel, grad, spatial_scale, 55 | # output_dim, group_size, pooled_size, part_size, sample_per_part, 56 | # trans_std, no_trans) 57 | # # rois_grad = tf.zeros(rois.shape) 58 | # return [data_grad, None, trans_grad] # List of one Tensor, since we have one input 59 | 60 | -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | import numpy as np 4 | import deform_psroi_pooling_op 5 | import deform_psroi_pooling_op_grad 6 | import pdb 7 | 8 | # pdb.set_trace() 9 | data_arr = np.random.rand(1,25,5,5) 10 | # roi = np.array([[0, 0, 0, 4, 4]],dtype=np.float32) 11 | trans_arr = np.random.rand(1,2,2,2) 12 | 13 | # with open("data.npz", 'rb') as f: 14 | # data_arr = np.load(f) 15 | # with open("trans.npz", 'rb') as f: 16 | # trans_arr = np.load(f) 17 | 18 | 19 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32) 20 | trans = tf.convert_to_tensor(trans_arr, dtype=tf.float32) 21 | hh=tf.convert_to_tensor(data_arr,dtype=tf.float32) 22 | [y2, channels] = deform_psroi_pooling_op.deform_psroi_pool(hh, rois, trans=trans, pooled_size=2, output_dim=1, group_size=1, spatial_scale=1.0, 23 | trans_std=1e-1, sample_per_part=1, part_size=2, no_trans=False) 24 | s = tf.gradients(y2, [hh, trans]) 25 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 26 | # sess.run(s[0]) 27 | # print( sess.run(trans)) 28 | # print( sess.run(y2)) 29 | print( sess.run(s[1])) 30 | # print( sess.run(s[1])) 31 | # pdb.set_trace() 32 | -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test_mx.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import mxnet as mx 3 | import numpy as np 4 | 5 | gpu_device=mx.gpu() 6 | # data = np.random.rand(1,25,5,5) 7 | roi = mx.nd.array(np.array([[0, 0, 0, 4, 4]],dtype=np.float32), ctx=gpu_device) 8 | # trans = np.random.rand(1,2,2,2) 9 | 10 | with open("data.npz", 'rb') as f: 11 | data = mx.nd.array(np.load(f), ctx=gpu_device) 12 | with open("trans.npz", 'rb') as f: 13 | trans = mx.nd.array(np.load(f), ctx=gpu_device) 14 | 15 | data_grad = mx.nd.zeros_like(data) 16 | roi_grad = mx.nd.zeros_like(roi) 17 | trans_grad = mx.nd.zeros_like(trans) 18 | 19 | def main(): 20 | data_var = mx.symbol.Variable('data') 21 | roi_var = mx.symbol.Variable('roi') 22 | trans_var = mx.symbol.Variable('trans') 23 | res = mx.contrib.sym.DeformablePSROIPooling(data=data_var, rois=roi_var, trans=trans_var, group_size=1, pooled_size=2, 24 | output_dim=1, no_trans=False, part_size=2, sample_per_part=1, spatial_scale=1., trans_std=0.1) 25 | rua = res.bind(ctx=gpu_device, args={'data':data, 'roi':roi, 'trans':trans}, args_grad={'data':data_grad, 'roi':roi_grad, 'trans':trans_grad}) 26 | rua.forward(is_train=True) 27 | rua.backward(out_grads=mx.nd.ones((1, 1, 2, 2))) 28 | # print(trans.asnumpy()) 29 | # res_arr = rua.outputs[0].asnumpy() 30 | # print(res_arr) 31 | # print([a.asnumpy() for a in rua.grad_arrays]) 32 | print(trans_grad.asnumpy()) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() -------------------------------------------------------------------------------- /lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from . import config 9 | from . import train 10 | from . import test 11 | from . import nms_wrapper 12 | # from nms_wrapper import nms -------------------------------------------------------------------------------- /lib/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import warnings 10 | 11 | def bbox_transform(ex_rois, gt_rois): 12 | """ 13 | computes the distance from ground-truth boxes to the given boxes, normed by their size 14 | :param ex_rois: n * 4 numpy array, given boxes 15 | :param gt_rois: n * 4 numpy array, ground-truth boxes 16 | :return: deltas: n * 4 numpy array, ground-truth boxes 17 | """ 18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 22 | 23 | assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ 24 | 'Invalid boxes found: {} {}'. \ 25 | format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :]) 26 | 27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 31 | 32 | # warnings.catch_warnings() 33 | # warnings.filterwarnings('error') 34 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 35 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 36 | targets_dw = np.log(gt_widths / ex_widths) 37 | targets_dh = np.log(gt_heights / ex_heights) 38 | 39 | targets = np.vstack( 40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 41 | return targets 42 | 43 | def bbox_transform_inv(boxes, deltas): 44 | if boxes.shape[0] == 0: 45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 46 | 47 | boxes = boxes.astype(deltas.dtype, copy=False) 48 | 49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 51 | ctr_x = boxes[:, 0] + 0.5 * widths 52 | ctr_y = boxes[:, 1] + 0.5 * heights 53 | 54 | dx = deltas[:, 0::4] 55 | dy = deltas[:, 1::4] 56 | dw = deltas[:, 2::4] 57 | dh = deltas[:, 3::4] 58 | 59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 61 | pred_w = np.exp(dw) * widths[:, np.newaxis] 62 | pred_h = np.exp(dh) * heights[:, np.newaxis] 63 | 64 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 65 | # x1 66 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 67 | # y1 68 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 69 | # x2 70 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 71 | # y2 72 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 73 | 74 | return pred_boxes 75 | 76 | def clip_boxes(boxes, im_shape): 77 | """ 78 | Clip boxes to image boundaries. 79 | """ 80 | 81 | # x1 >= 0 82 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 83 | # y1 >= 0 84 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 85 | # x2 < im_shape[1] 86 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 87 | # y2 < im_shape[0] 88 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 89 | return boxes 90 | -------------------------------------------------------------------------------- /lib/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from .config import cfg 10 | from ..nms.gpu_nms import gpu_nms 11 | from ..nms.cpu_nms import cpu_nms 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS and not force_cpu: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None): 24 | """ 25 | post-process the results of im_detect 26 | :param scores: N * (K * 4) numpy 27 | :param boxes: N * K numpy 28 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus') 29 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]} 30 | """ 31 | num_class = scores.shape[1] if class_sets is None else len(class_sets) 32 | assert num_class * 4 == boxes.shape[1],\ 33 | 'Detection scores and boxes dont match' 34 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets 35 | 36 | res = [] 37 | for ind, cls in enumerate(class_sets[1:]): 38 | ind += 1 # skip background 39 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)] 40 | cls_scores = scores[:, ind] 41 | # if ind == 1: 42 | print(np.max(cls_scores)) 43 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) 44 | keep = nms(dets, thresh=0.3) 45 | dets = dets[keep, :] 46 | # if ind == 1: 47 | print(np.max(dets[:, 4])) 48 | dets = dets[np.where(dets[:, 4] > threshold)] 49 | r = {} 50 | if dets.shape[0] > 0: 51 | r['class'], r['dets'] = cls, dets 52 | else: 53 | r['class'], r['dets'] = cls, None 54 | res.append(r) 55 | return res -------------------------------------------------------------------------------- /lib/gt_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roidb 8 | # from layer import GtDataLayer -------------------------------------------------------------------------------- /lib/gt_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | GtDataLayer implements a Caffe Python layer. 11 | """ 12 | # TODO: make caffe irrelevant, or remove caffe backend from this projcet 13 | import caffe 14 | 15 | import numpy as np 16 | import yaml 17 | from multiprocessing import Process, Queue 18 | 19 | from .minibatch import get_minibatch 20 | 21 | # TODO: make fast_rcnn irrelevant 22 | # >>>> obsolete, because it depends on sth outside of this project 23 | from ..fast_rcnn.config import cfg 24 | # <<<< obsolete 25 | 26 | class GtDataLayer(caffe.Layer): 27 | """Fast R-CNN data layer used for training.""" 28 | 29 | def _shuffle_roidb_inds(self): 30 | """Randomly permute the training roidb.""" 31 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 32 | self._cur = 0 33 | 34 | def _get_next_minibatch_inds(self): 35 | """Return the roidb indices for the next minibatch.""" 36 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 37 | self._shuffle_roidb_inds() 38 | 39 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 40 | self._cur += cfg.TRAIN.IMS_PER_BATCH 41 | 42 | """ 43 | # sample images with gt objects 44 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 45 | i = 0 46 | while (i < cfg.TRAIN.IMS_PER_BATCH): 47 | ind = self._perm[self._cur] 48 | num_objs = self._roidb[ind]['boxes'].shape[0] 49 | if num_objs != 0: 50 | db_inds[i] = ind 51 | i += 1 52 | 53 | self._cur += 1 54 | if self._cur >= len(self._roidb): 55 | self._shuffle_roidb_inds() 56 | """ 57 | 58 | return db_inds 59 | 60 | def _get_next_minibatch(self): 61 | """Return the blobs to be used for the next minibatch.""" 62 | db_inds = self._get_next_minibatch_inds() 63 | minibatch_db = [self._roidb[i] for i in db_inds] 64 | return get_minibatch(minibatch_db, self._num_classes) 65 | 66 | # this function is called in training the net 67 | def set_roidb(self, roidb): 68 | """Set the roidb to be used by this layer during training.""" 69 | self._roidb = roidb 70 | self._shuffle_roidb_inds() 71 | 72 | def setup(self, bottom, top): 73 | """Setup the GtDataLayer.""" 74 | 75 | # parse the layer parameter string, which must be valid YAML 76 | layer_params = yaml.load(self.param_str_) 77 | 78 | self._num_classes = layer_params['num_classes'] 79 | 80 | self._name_to_top_map = { 81 | 'data': 0, 82 | 'info_boxes': 1, 83 | 'parameters': 2} 84 | 85 | # data blob: holds a batch of N images, each with 3 channels 86 | # The height and width (100 x 100) are dummy values 87 | num_scale_base = len(cfg.TRAIN.SCALES_BASE) 88 | top[0].reshape(num_scale_base, 3, 100, 100) 89 | 90 | # info boxes blob 91 | top[1].reshape(1, 18) 92 | 93 | # parameters blob 94 | num_scale = len(cfg.TRAIN.SCALES) 95 | num_aspect = len(cfg.TRAIN.ASPECTS) 96 | top[2].reshape(2 + 2*num_scale + 2*num_aspect) 97 | 98 | def forward(self, bottom, top): 99 | """Get blobs and copy them into this layer's top blob vector.""" 100 | blobs = self._get_next_minibatch() 101 | 102 | for blob_name, blob in blobs.items(): 103 | top_ind = self._name_to_top_map[blob_name] 104 | # Reshape net's input blobs 105 | top[top_ind].reshape(*(blob.shape)) 106 | # Copy data into net's input blobs 107 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 108 | 109 | def backward(self, top, propagate_down, bottom): 110 | """This layer does not propagate gradients.""" 111 | pass 112 | 113 | def reshape(self, bottom, top): 114 | """Reshaping happens during the call to forward.""" 115 | pass 116 | -------------------------------------------------------------------------------- /lib/gt_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | 10 | import numpy as np 11 | import numpy.random as npr 12 | import cv2 13 | 14 | from ..utils.blob import prep_im_for_blob, im_list_to_blob 15 | 16 | # TODO: make fast_rcnn irrelevant 17 | # >>>> obsolete, because it depends on sth outside of this project 18 | from ..fast_rcnn.config import cfg 19 | # <<<< obsolete 20 | 21 | def get_minibatch(roidb, num_classes): 22 | """Given a roidb, construct a minibatch sampled from it.""" 23 | num_images = len(roidb) 24 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 25 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 26 | format(num_images, cfg.TRAIN.BATCH_SIZE) 27 | 28 | # Get the input image blob, formatted for caffe 29 | im_blob = _get_image_blob(roidb) 30 | 31 | # build the box information blob 32 | info_boxes_blob = np.zeros((0, 18), dtype=np.float32) 33 | num_scale = len(cfg.TRAIN.SCALES) 34 | for i in range(num_images): 35 | info_boxes = roidb[i]['info_boxes'] 36 | 37 | # change the batch index 38 | info_boxes[:,2] += i * num_scale 39 | info_boxes[:,7] += i * num_scale 40 | 41 | info_boxes_blob = np.vstack((info_boxes_blob, info_boxes)) 42 | 43 | # build the parameter blob 44 | num_aspect = len(cfg.TRAIN.ASPECTS) 45 | num = 2 + 2 * num_scale + 2 * num_aspect 46 | parameters_blob = np.zeros((num), dtype=np.float32) 47 | parameters_blob[0] = num_scale 48 | parameters_blob[1] = num_aspect 49 | parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES 50 | parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING 51 | parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS 52 | parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS 53 | 54 | # For debug visualizations 55 | # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob) 56 | 57 | blobs = {'data': im_blob, 58 | 'info_boxes': info_boxes_blob, 59 | 'parameters': parameters_blob} 60 | 61 | return blobs 62 | 63 | def _get_image_blob(roidb): 64 | """Builds an input blob from the images in the roidb at the different scales. 65 | """ 66 | num_images = len(roidb) 67 | processed_ims = [] 68 | 69 | for i in range(num_images): 70 | # read image 71 | im = cv2.imread(roidb[i]['image']) 72 | if roidb[i]['flipped']: 73 | im = im[:, ::-1, :] 74 | 75 | im_orig = im.astype(np.float32, copy=True) 76 | im_orig -= cfg.PIXEL_MEANS 77 | 78 | # build image pyramid 79 | for im_scale in cfg.TRAIN.SCALES_BASE: 80 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 81 | interpolation=cv2.INTER_LINEAR) 82 | 83 | processed_ims.append(im) 84 | 85 | # Create a blob to hold the input images 86 | blob = im_list_to_blob(processed_ims) 87 | 88 | return blob 89 | 90 | def _project_im_rois(im_rois, im_scale_factor): 91 | """Project image RoIs into the rescaled training image.""" 92 | rois = im_rois * im_scale_factor 93 | return rois 94 | 95 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 96 | """Bounding-box regression targets are stored in a compact form in the 97 | roidb. 98 | 99 | This function expands those targets into the 4-of-4*K representation used 100 | by the network (i.e. only one class has non-zero targets). The loss weights 101 | are similarly expanded. 102 | 103 | Returns: 104 | bbox_target_data (ndarray): N x 4K blob of regression targets 105 | bbox_loss_weights (ndarray): N x 4K blob of loss weights 106 | """ 107 | clss = bbox_target_data[:, 0] 108 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 109 | bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 110 | inds = np.where(clss > 0)[0] 111 | for ind in inds: 112 | cls = clss[ind] 113 | start = 4 * cls 114 | end = start + 4 115 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 116 | bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] 117 | return bbox_targets, bbox_loss_weights 118 | 119 | 120 | def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): 121 | """Visualize a mini-batch for debugging.""" 122 | import matplotlib.pyplot as plt 123 | for i in range(rois_blob.shape[0]): 124 | rois = rois_blob[i, :] 125 | im_ind = rois[0] 126 | roi = rois[2:] 127 | im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() 128 | im += cfg.PIXEL_MEANS 129 | im = im[:, :, (2, 1, 0)] 130 | im = im.astype(np.uint8) 131 | cls = labels_blob[i] 132 | subcls = sublabels_blob[i] 133 | plt.imshow(im) 134 | print('class: ', cls, ' subclass: ', subcls) 135 | plt.gca().add_patch( 136 | plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], 137 | roi[3] - roi[1], fill=False, 138 | edgecolor='r', linewidth=3) 139 | ) 140 | plt.show() 141 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 3 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 4 | NSYNC_INC=$TF_INC"/external/nsync/public" 5 | # please modify $ARCH according to the following list and your gpu model. 6 | ARCH=sm_60 7 | echo $TF_INC 8 | 9 | 10 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly. 11 | 12 | # Which CUDA capabilities do we want to pre-build for? 13 | # https://developer.nvidia.com/cuda-gpus 14 | # Compute/shader model Cards 15 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61 16 | # 6.0 P100 so CUDA_MODEL = 60 17 | # 5.2 M40 18 | # 3.7 K80 19 | # 3.5 K40, K20 20 | # 3.0 K10, Grid K520 (AWS G2) 21 | # Other Nvidia shader models should work, but they will require extra startup 22 | # time as the code is pre-optimized for them. 23 | # CUDA_MODELS=30 35 37 52 60 61 24 | 25 | 26 | 27 | CUDA_HOME=/usr/local/cuda/ 28 | 29 | if [ ! -f $TF_INC/tensorflow/stream_executor/cuda/cuda_config.h ]; then 30 | cp ./cuda_config.h $TF_INC/tensorflow/stream_executor/cuda/ 31 | fi 32 | 33 | cd roi_pooling_layer 34 | 35 | #nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \ 36 | # -I $TF_INC -D GOOGLE_CUDA=1 -L $CUDA_HOME/lib64 -x cu -Xcompiler -fPIC -arch=$ARCH --expt-relaxed-constexpr 37 | 38 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 39 | #g++-4.9 -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 40 | # roi_pooling_op.cu.o -I TF_INC -fPIC -lcudart -L CUDA_HOME/lib64 -D GOOGLE_CUDA=1 -Wfatal-errors -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0 41 | 42 | # for gcc5-built tf 43 | # g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 44 | # roi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 45 | cd .. 46 | 47 | 48 | # add building psroi_pooling layer 49 | cd psroi_pooling_layer 50 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o psroi_pooling_op.cu.o psroi_pooling_op_gpu.cu.cc \ 51 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr 52 | 53 | 54 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 55 | g++-4.9 -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc psroi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart \ 56 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0 57 | # for gcc5-built tf 58 | #g++ -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc \ 59 | # psroi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 60 | 61 | cd .. 62 | 63 | cd deform_psroi_pooling_layer 64 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o deform_psroi_pooling_op.cu.o deform_psroi_pooling_op_gpu.cu.cc \ 65 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr -L $TF_LIB -ltensorflow_framework 66 | 67 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below 68 | g++-4.9 -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart \ 69 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0 70 | # for gcc5-built tf 71 | #g++ -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o \ 72 | # -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 -L $TF_LIB -ltensorflow_framework 73 | cd .. 74 | 75 | cd deform_conv_layer 76 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o deform_conv.cu.o deform_conv.cu.cc \ 77 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr 78 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below 79 | g++-4.9 -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o -I $TF_INC -fPIC -lcudart \ 80 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0 81 | # for gcc5-built tf 82 | #g++ -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o \ 83 | # -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 84 | 85 | cd .. 86 | 87 | -------------------------------------------------------------------------------- /lib/networks/.VGGnet.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/networks/.VGGnet.py.swo -------------------------------------------------------------------------------- /lib/networks/VGGnet_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | from ..fast_rcnn.config import cfg 4 | 5 | 6 | class VGGnet_test(Network): 7 | def __init__(self, trainable=True): 8 | self.inputs = [] 9 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 10 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) 11 | self.keep_prob = tf.placeholder(tf.float32) 12 | self.layers = dict({'data': self.data, 'im_info': self.im_info}) 13 | self.trainable = trainable 14 | self.setup() 15 | 16 | def setup(self): 17 | # n_classes = 21 18 | n_classes = cfg.NCLASSES 19 | # anchor_scales = [8, 16, 32] 20 | anchor_scales = cfg.ANCHOR_SCALES 21 | _feat_stride = [16, ] 22 | 23 | (self.feed('data') 24 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 25 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 26 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 27 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 28 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 29 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 30 | .conv(3, 3, 256, 1, 1, name='conv3_1') 31 | .conv(3, 3, 256, 1, 1, name='conv3_2') 32 | .conv(3, 3, 256, 1, 1, name='conv3_3') 33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 34 | .conv(3, 3, 512, 1, 1, name='conv4_1') 35 | .conv(3, 3, 512, 1, 1, name='conv4_2') 36 | .conv(3, 3, 512, 1, 1, name='conv4_3') 37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 38 | .conv(3, 3, 512, 1, 1, name='conv5_1') 39 | .conv(3, 3, 512, 1, 1, name='conv5_2') 40 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 41 | 42 | (self.feed('conv5_3') 43 | .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3') 44 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score')) 45 | 46 | (self.feed('rpn_conv/3x3') 47 | .conv(1, 1, len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu=False, name='rpn_bbox_pred')) 48 | 49 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) 50 | (self.feed('rpn_cls_score') 51 | .spatial_reshape_layer(2, name='rpn_cls_score_reshape') 52 | .spatial_softmax(name='rpn_cls_prob')) 53 | 54 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) 55 | (self.feed('rpn_cls_prob') 56 | .spatial_reshape_layer(len(anchor_scales) * 3 * 2, name='rpn_cls_prob_reshape')) 57 | 58 | (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info') 59 | .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois')) 60 | 61 | (self.feed('conv5_3') 62 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros')) 63 | (self.feed('conv5_3', 'conv6_1_offset') 64 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1')) 65 | (self.feed('conv6_1') 66 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros')) 67 | (self.feed('conv6_1', 'conv6_2_offset') 68 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2')) 69 | (self.feed('conv6_2', 'rois') 70 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t') 71 | .fc(num_out=7 * 7 * 2, name='offset', relu=False) 72 | .reshape(shape=(-1,2,7,7), name='offset_reshape')) 73 | (self.feed('conv6_2', 'rois', 'offset_reshape') 74 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6') 75 | .fc(4096, name='fc6') 76 | .dropout(0.5, name='drop6') 77 | .fc(4096, name='fc7') 78 | .dropout(0.5, name='drop7') 79 | .fc(n_classes, relu=False, name='cls_score') 80 | .softmax(name='cls_prob')) 81 | 82 | (self.feed('drop7') 83 | .fc(n_classes*4, relu=False, name='bbox_pred')) 84 | 85 | -------------------------------------------------------------------------------- /lib/networks/VGGnet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | from ..fast_rcnn.config import cfg 4 | 5 | class VGGnet_train(Network): 6 | def __init__(self, trainable=True): 7 | self.inputs = [] 8 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data') 9 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info') 10 | self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes') 11 | self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard') 12 | self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas') 13 | self.keep_prob = tf.placeholder(tf.float32) 14 | self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\ 15 | 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) 16 | self.trainable = trainable 17 | self.setup() 18 | 19 | def setup(self): 20 | 21 | # n_classes = 21 22 | n_classes = cfg.NCLASSES 23 | # anchor_scales = [8, 16, 32] 24 | anchor_scales = cfg.ANCHOR_SCALES 25 | _feat_stride = [16, ] 26 | 27 | (self.feed('data') 28 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 29 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 30 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 31 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 32 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 34 | .conv(3, 3, 256, 1, 1, name='conv3_1') 35 | .conv(3, 3, 256, 1, 1, name='conv3_2') 36 | .conv(3, 3, 256, 1, 1, name='conv3_3') 37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 38 | .conv(3, 3, 512, 1, 1, name='conv4_1') 39 | .conv(3, 3, 512, 1, 1, name='conv4_2') 40 | .conv(3, 3, 512, 1, 1, name='conv4_3') 41 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 42 | .conv(3, 3, 512, 1, 1, name='conv5_1') 43 | .conv(3, 3, 512, 1, 1, name='conv5_2') 44 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 45 | #========= RPN ============ 46 | (self.feed('conv5_3') 47 | .conv(3,3,512,1,1,name='rpn_conv/3x3')) 48 | 49 | # Loss of rpn_cls & rpn_boxes 50 | # shape is (1, H, W, A x 4) and (1, H, W, A x 2) 51 | (self.feed('rpn_conv/3x3') 52 | .conv(1,1,len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred')) 53 | (self.feed('rpn_conv/3x3') 54 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score')) 55 | 56 | # generating training labels on the fly 57 | # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights 58 | (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info') 59 | .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' )) 60 | 61 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) 62 | (self.feed('rpn_cls_score') 63 | .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape') 64 | .spatial_softmax(name='rpn_cls_prob')) 65 | 66 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) 67 | (self.feed('rpn_cls_prob') 68 | .spatial_reshape_layer(len(anchor_scales)*3*2, name = 'rpn_cls_prob_reshape')) 69 | 70 | # ========= RoI Proposal ============ 71 | # add the delta(output) to anchors then 72 | # choose some reasonabel boxes, considering scores, ratios, size and iou 73 | # rpn_rois <- (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] 74 | (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info') 75 | .proposal_layer(_feat_stride, anchor_scales, 'TRAIN', name = 'rpn_rois')) 76 | 77 | # matching boxes and groundtruth, 78 | # and randomly sample some rois and labels for RCNN 79 | (self.feed('rpn_rois','gt_boxes', 'gt_ishard', 'dontcare_areas') 80 | .proposal_target_layer(n_classes,name = 'roi-data')) 81 | 82 | #========= RCNN ============ 83 | (self.feed('conv5_3') 84 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros')) 85 | (self.feed('conv5_3', 'conv6_1_offset') 86 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1')) 87 | (self.feed('conv6_1') 88 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros')) 89 | (self.feed('conv6_1', 'conv6_2_offset') 90 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2')) 91 | (self.feed('conv6_2', 'rois') 92 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t') 93 | .fc(num_out=7 * 7 * 2, name='offset', relu=False) 94 | .reshape(shape=(-1,2,7,7), name='offset_reshape')) 95 | (self.feed('conv6_2', 'rois', 'offset_reshape') 96 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6') 97 | .fc(4096, name='fc6') 98 | .dropout(0.5, name='drop6') 99 | .fc(4096, name='fc7') 100 | .dropout(0.5, name='drop7') 101 | .fc(n_classes, relu=False, name='cls_score') 102 | .softmax(name='cls_prob')) 103 | 104 | (self.feed('drop7') 105 | .fc(n_classes*4, relu=False, name='bbox_pred')) 106 | -------------------------------------------------------------------------------- /lib/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .VGGnet_train import VGGnet_train 9 | from .VGGnet_test import VGGnet_test 10 | from .Resnet50_train import Resnet50_train 11 | from .Resnet50_test import Resnet50_test 12 | from .Resnet101_train import Resnet101_train 13 | from .Resnet101_test import Resnet101_test 14 | from . import factory 15 | -------------------------------------------------------------------------------- /lib/networks/caffenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | 4 | class caffenet(Network): 5 | def __init__(self, trainable=True): 6 | self.inputs = [] 7 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 8 | self.rois = tf.placeholder(tf.float32, shape=[None, 5]) 9 | self.keep_prob = tf.placeholder(tf.float32) 10 | self.layers = dict({'data':self.data, 'rois':self.rois}) 11 | self.trainable = trainable 12 | self.setup() 13 | 14 | def setup(self): 15 | (self.feed('data') 16 | .conv(11, 11, 96, 4, 4, padding='VALID', name='conv1', trainable=False) 17 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool1') 18 | .lrn(2, 2e-05, 0.75, name='norm1') 19 | .conv(5, 5, 256, 1, 1, group=2, name='conv2') 20 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 21 | .lrn(2, 2e-05, 0.75, name='norm2') 22 | .conv(3, 3, 384, 1, 1, name='conv3') 23 | .conv(3, 3, 384, 1, 1, group=2, name='conv4') 24 | .conv(3, 3, 256, 1, 1, group=2, name='conv5') 25 | .feature_extrapolating([1.0, 2.0, 3.0, 4.0], 4, 4, name='conv5_feature')) 26 | 27 | (self.feed('conv5_feature','im_info') 28 | .conv(3,3,) 29 | 30 | (self.feed('conv5_feature', 'rois') 31 | .roi_pool(6, 6, 1.0/16, name='pool5') 32 | .fc(4096, name='fc6') 33 | .dropout(self.keep_prob, name='drop6') 34 | .fc(4096, name='fc7') 35 | .dropout(self.keep_prob, name='drop7') 36 | .fc(174, relu=False, name='subcls_score') 37 | .softmax(name='subcls_prob')) 38 | 39 | (self.feed('subcls_score') 40 | .fc(4, relu=False, name='cls_score') 41 | .softmax(name='cls_prob')) 42 | 43 | (self.feed('subcls_score') 44 | .fc(16, relu=False, name='bbox_pred'))) 45 | -------------------------------------------------------------------------------- /lib/networks/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SubCNN_TF 3 | # Copyright (c) 2016 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | from .VGGnet_test import VGGnet_test 13 | from .VGGnet_testold import VGGnet_testold 14 | from .VGGnet_train import VGGnet_train 15 | from .Resnet50_test import Resnet50_test 16 | from .Resnet50_train import Resnet50_train 17 | from .Resnet101_test import Resnet101_test 18 | from .Resnet101_train import Resnet101_train 19 | from .PVAnet_train import PVAnet_train 20 | from .PVAnet_test import PVAnet_test 21 | 22 | 23 | def get_network(name): 24 | """Get a network by name.""" 25 | if name.split('_')[0] == 'VGGnet': 26 | if name.split('_')[1] == 'test': 27 | return VGGnet_test() 28 | elif name.split('_')[1] == 'train': 29 | return VGGnet_train() 30 | elif name.split('_')[1] == 'testold': 31 | return VGGnet_testold() 32 | else: 33 | raise KeyError('Unknown dataset: {}'.format(name)) 34 | elif name.split('_')[0] == 'Resnet50': 35 | if name.split('_')[1] == 'test': 36 | return Resnet50_test() 37 | elif name.split('_')[1] == 'train': 38 | return Resnet50_train() 39 | else: 40 | raise KeyError('Unknown dataset: {}'.format(name)) 41 | elif name.split('_')[0] == 'Resnet101': 42 | if name.split('_')[1] == 'test': 43 | return Resnet101_test() 44 | elif name.split('_')[1] == 'train': 45 | return Resnet101_train() 46 | else: 47 | raise KeyError('Unknown dataset: {}'.format(name)) 48 | elif name.split('_')[0] == 'PVAnet': 49 | if name.split('_')[1] == 'test': 50 | return PVAnet_test() 51 | elif name.split('_')[1] == 'train': 52 | return PVAnet_train() 53 | else: 54 | raise KeyError('Unknown dataset: {}'.format(name)) 55 | else: 56 | raise KeyError('Unknown dataset: {}'.format(name)) 57 | 58 | def list_networks(): 59 | """List all registered imdbs.""" 60 | return list(__sets.keys()) 61 | -------------------------------------------------------------------------------- /lib/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # R-FCN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Revised by Minyue Jiang 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'psroi_pooling.so') 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool PSROIPoolForwardLauncher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height, 20 | const int width, const int pooled_height, const int pooled_width, const float* bottom_rois, 21 | const int output_dim, const int group_size, float* top_data, int* mapping_channel, const Eigen::GpuDevice& d); 22 | 23 | bool PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int num_rois, const float spatial_scale, 24 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width, 25 | const int output_dim, float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d); 26 | 27 | } // namespace tensorflow 28 | 29 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from . import psroi_pooling_op 4 | import pdb 5 | 6 | 7 | @tf.RegisterShape("PSROIPool") 8 | def _psroi_pool_shape(op): 9 | """Shape function for the PSROIPool op. 10 | 11 | """ 12 | dims_data = op.inputs[0].get_shape().as_list() 13 | channels = dims_data[3] 14 | dims_rois = op.inputs[1].get_shape().as_list() 15 | num_rois = dims_rois[0] 16 | output_dim = op.get_attr('output_dim') 17 | group_size = op.get_attr('group_size') 18 | pooled_height = group_size 19 | pooled_width = group_size 20 | 21 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 22 | return [output_shape, output_shape] 23 | 24 | @ops.RegisterGradient("PSROIPool") 25 | def _psroi_pool_grad(op, grad, _): 26 | """The gradients for `PSROI_pool`. 27 | Args: 28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 29 | to find the inputs and outputs of the original op. 30 | grad: Gradient with respect to the output of the `roi_pool` op. 31 | Returns: 32 | Gradients with respect to the input of `zero_out`. 33 | """ 34 | 35 | data = op.inputs[0] 36 | rois = op.inputs[1] 37 | mapping_channel = op.outputs[1] 38 | spatial_scale = op.get_attr('spatial_scale') 39 | 40 | # compute gradient 41 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 42 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale) 43 | 44 | return [data_grad, None] # List of one Tensor, since we have one input 45 | 46 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from . import psroi_pooling_op 4 | from . import psroi_pooling_op_grad 5 | import pdb 6 | 7 | pdb.set_trace() 8 | 9 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32) 10 | hh=tf.convert_to_tensor(np.random.rand(1,5,5,25),dtype=tf.float32) 11 | [y2, channels] = psroi_pooling_op.psroi_pool(hh, rois, output_dim=1, group_size=5, spatial_scale=1.0) 12 | 13 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 14 | print(( sess.run(hh))) 15 | print(( sess.run(y2))) 16 | pdb.set_trace() 17 | -------------------------------------------------------------------------------- /lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roidb -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | 13 | import numpy as np 14 | 15 | # TODO: make fast_rcnn irrelevant 16 | # >>>> obsolete, because it depends on sth outside of this project 17 | from ..fast_rcnn.config import cfg 18 | # <<<< obsolete 19 | from ..roi_data_layer.minibatch import get_minibatch 20 | 21 | class RoIDataLayer(object): 22 | """Fast R-CNN data layer used for training.""" 23 | 24 | def __init__(self, roidb, num_classes): 25 | """Set the roidb to be used by this layer during training.""" 26 | self._roidb = roidb 27 | self._num_classes = num_classes 28 | self._shuffle_roidb_inds() 29 | 30 | def _shuffle_roidb_inds(self): 31 | """Randomly permute the training roidb.""" 32 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 33 | self._cur = 0 34 | 35 | def _get_next_minibatch_inds(self): 36 | """Return the roidb indices for the next minibatch.""" 37 | 38 | if cfg.TRAIN.HAS_RPN: 39 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 40 | self._shuffle_roidb_inds() 41 | 42 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 43 | self._cur += cfg.TRAIN.IMS_PER_BATCH 44 | else: 45 | # sample images 46 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 47 | i = 0 48 | while (i < cfg.TRAIN.IMS_PER_BATCH): 49 | ind = self._perm[self._cur] 50 | num_objs = self._roidb[ind]['boxes'].shape[0] 51 | if num_objs != 0: 52 | db_inds[i] = ind 53 | i += 1 54 | 55 | self._cur += 1 56 | if self._cur >= len(self._roidb): 57 | self._shuffle_roidb_inds() 58 | 59 | return db_inds 60 | 61 | def _get_next_minibatch(self): 62 | """Return the blobs to be used for the next minibatch. 63 | 64 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 65 | separate process and made available through self._blob_queue. 66 | """ 67 | db_inds = self._get_next_minibatch_inds() 68 | minibatch_db = [self._roidb[i] for i in db_inds] 69 | return get_minibatch(minibatch_db, self._num_classes) 70 | 71 | def forward(self): 72 | """Get blobs and copy them into this layer's top blob vector.""" 73 | blobs = self._get_next_minibatch() 74 | return blobs 75 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | 12 | import PIL 13 | 14 | # TODO: make fast_rcnn irrelevant 15 | # >>>> obsolete, because it depends on sth outside of this project 16 | from ..fast_rcnn.config import cfg 17 | from ..fast_rcnn.bbox_transform import bbox_transform 18 | # <<<< obsolete 19 | from ..utils.cython_bbox import bbox_overlaps 20 | 21 | def prepare_roidb(imdb): 22 | """Enrich the imdb's roidb by adding some derived quantities that 23 | are useful for training. This function precomputes the maximum 24 | overlap, taken over ground-truth boxes, between each ROI and 25 | each ground-truth box. The class with maximum overlap is also 26 | recorded. 27 | """ 28 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 29 | for i in range(imdb.num_images)] 30 | roidb = imdb.roidb 31 | for i in range(len(imdb.image_index)): 32 | roidb[i]['image'] = imdb.image_path_at(i) 33 | roidb[i]['width'] = sizes[i][0] 34 | roidb[i]['height'] = sizes[i][1] 35 | # need gt_overlaps as a dense array for argmax 36 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 37 | # max overlap with gt over classes (columns) 38 | max_overlaps = gt_overlaps.max(axis=1) 39 | # gt class that had the max overlap 40 | max_classes = gt_overlaps.argmax(axis=1) 41 | roidb[i]['max_classes'] = max_classes 42 | roidb[i]['max_overlaps'] = max_overlaps 43 | # sanity checks 44 | # max overlap of 0 => class should be zero (background) 45 | zero_inds = np.where(max_overlaps == 0)[0] 46 | assert all(max_classes[zero_inds] == 0) 47 | # max overlap > 0 => class should not be zero (must be a fg class) 48 | nonzero_inds = np.where(max_overlaps > 0)[0] 49 | assert all(max_classes[nonzero_inds] != 0) 50 | 51 | def add_bbox_regression_targets(roidb): 52 | """ 53 | Add information needed to train bounding-box regressors. 54 | For each roi find the corresponding gt box, and compute the distance. 55 | then normalize the distance into Gaussian by minus mean and divided by std 56 | """ 57 | assert len(roidb) > 0 58 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 59 | 60 | num_images = len(roidb) 61 | # Infer number of classes from the number of columns in gt_overlaps 62 | num_classes = roidb[0]['gt_overlaps'].shape[1] 63 | for im_i in range(num_images): 64 | rois = roidb[im_i]['boxes'] 65 | max_overlaps = roidb[im_i]['max_overlaps'] 66 | max_classes = roidb[im_i]['max_classes'] 67 | roidb[im_i]['bbox_targets'] = \ 68 | _compute_targets(rois, max_overlaps, max_classes) 69 | 70 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 71 | # Use fixed / precomputed "means" and "stds" instead of empirical values 72 | means = np.tile( 73 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 74 | stds = np.tile( 75 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 76 | else: 77 | # Compute values needed for means and stds 78 | # var(x) = E(x^2) - E(x)^2 79 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 80 | sums = np.zeros((num_classes, 4)) 81 | squared_sums = np.zeros((num_classes, 4)) 82 | for im_i in range(num_images): 83 | targets = roidb[im_i]['bbox_targets'] 84 | for cls in range(1, num_classes): 85 | cls_inds = np.where(targets[:, 0] == cls)[0] 86 | if cls_inds.size > 0: 87 | class_counts[cls] += cls_inds.size 88 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 89 | squared_sums[cls, :] += \ 90 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 91 | 92 | means = sums / class_counts 93 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 94 | # too small number will cause nan error 95 | assert np.min(stds) < 0.01, \ 96 | 'Boxes std is too small, std:{}'.format(stds) 97 | 98 | print('bbox target means:') 99 | print(means) 100 | print(means[1:, :].mean(axis=0)) # ignore bg class 101 | print('bbox target stdevs:') 102 | print(stds) 103 | print(stds[1:, :].mean(axis=0)) # ignore bg class 104 | 105 | # Normalize targets 106 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 107 | print("Normalizing targets") 108 | for im_i in range(num_images): 109 | targets = roidb[im_i]['bbox_targets'] 110 | for cls in range(1, num_classes): 111 | cls_inds = np.where(targets[:, 0] == cls)[0] 112 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 113 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 114 | else: 115 | print("NOT normalizing targets") 116 | 117 | # These values will be needed for making predictions 118 | # (the predicts will need to be unnormalized and uncentered) 119 | return means.ravel(), stds.ravel() 120 | 121 | def _compute_targets(rois, overlaps, labels): 122 | """ 123 | Compute bounding-box regression targets for an image. 124 | for each roi find the corresponding gt_box, then compute the distance. 125 | """ 126 | # Indices of ground-truth ROIs 127 | gt_inds = np.where(overlaps == 1)[0] 128 | if len(gt_inds) == 0: 129 | # Bail if the image has no ground-truth ROIs 130 | return np.zeros((rois.shape[0], 5), dtype=np.float32) 131 | # Indices of examples for which we try to make predictions 132 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 133 | 134 | # Get IoU overlap between each ex ROI and gt ROI 135 | ex_gt_overlaps = bbox_overlaps( 136 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 137 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 138 | 139 | # Find which gt ROI each ex ROI has max overlap with: 140 | # this will be the ex ROI's gt target 141 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 142 | gt_rois = rois[gt_inds[gt_assignment], :] 143 | ex_rois = rois[ex_inds, :] 144 | 145 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 146 | targets[ex_inds, 0] = labels[ex_inds] 147 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 148 | return targets 149 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | 12 | # TODO: make fast_rcnn irrelevant 13 | # >>>> obsolete, because it depends on sth outside of this project 14 | from ..fast_rcnn.config import cfg 15 | from ..fast_rcnn.bbox_transform import bbox_transform 16 | # <<<< obsolete 17 | from ..utils.cython_bbox import bbox_overlaps 18 | 19 | def prepare_roidb(imdb): 20 | """Enrich the imdb's roidb by adding some derived quantities that 21 | are useful for training. This function precomputes the maximum 22 | overlap, taken over ground-truth boxes, between each ROI and 23 | each ground-truth box. The class with maximum overlap is also 24 | recorded. 25 | """ 26 | roidb = imdb.roidb 27 | for i in range(len(imdb.image_index)): 28 | roidb[i]['image'] = imdb.image_path_at(i) 29 | # need gt_overlaps as a dense array for argmax 30 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 31 | # max overlap with gt over classes (columns) 32 | max_overlaps = gt_overlaps.max(axis=1) 33 | # gt class that had the max overlap 34 | max_classes = gt_overlaps.argmax(axis=1) 35 | 36 | roidb[i]['max_classes'] = max_classes 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | 39 | # sanity checks 40 | # max overlap of 0 => class should be zero (background) 41 | zero_inds = np.where(max_overlaps == 0)[0] 42 | assert all(max_classes[zero_inds] == 0) 43 | # max overlap > 0 => class should not be zero (must be a fg class) 44 | nonzero_inds = np.where(max_overlaps > 0)[0] 45 | assert all(max_classes[nonzero_inds] != 0) 46 | 47 | def add_bbox_regression_targets(roidb): 48 | """Add information needed to train bounding-box regressors.""" 49 | assert len(roidb) > 0 50 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 51 | 52 | num_images = len(roidb) 53 | # Infer number of classes from the number of columns in gt_overlaps 54 | num_classes = roidb[0]['gt_overlaps'].shape[1] 55 | for im_i in range(num_images): 56 | rois = roidb[im_i]['boxes'] 57 | max_overlaps = roidb[im_i]['max_overlaps'] 58 | max_classes = roidb[im_i]['max_classes'] 59 | roidb[im_i]['bbox_targets'] = \ 60 | _compute_targets(rois, max_overlaps, max_classes, num_classes) 61 | 62 | # Compute values needed for means and stds 63 | # var(x) = E(x^2) - E(x)^2 64 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 65 | sums = np.zeros((num_classes, 4)) 66 | squared_sums = np.zeros((num_classes, 4)) 67 | for im_i in range(num_images): 68 | targets = roidb[im_i]['bbox_targets'] 69 | for cls in range(1, num_classes): 70 | cls_inds = np.where(targets[:, 0] == cls)[0] 71 | if cls_inds.size > 0: 72 | class_counts[cls] += cls_inds.size 73 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 74 | squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) 75 | 76 | means = sums / class_counts 77 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 78 | 79 | # Normalize targets 80 | for im_i in range(num_images): 81 | targets = roidb[im_i]['bbox_targets'] 82 | for cls in range(1, num_classes): 83 | cls_inds = np.where(targets[:, 0] == cls)[0] 84 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 85 | if stds[cls, 0] != 0: 86 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 87 | 88 | # These values will be needed for making predictions 89 | # (the predicts will need to be unnormalized and uncentered) 90 | return means.ravel(), stds.ravel() 91 | 92 | def _compute_targets(rois, overlaps, labels, num_classes): 93 | """Compute bounding-box regression targets for an image.""" 94 | # Ensure ROIs are floats 95 | rois = rois.astype(np.float, copy=False) 96 | 97 | # Indices of ground-truth ROIs 98 | gt_inds = np.where(overlaps == 1)[0] 99 | # Indices of examples for which we try to make predictions 100 | ex_inds = [] 101 | for i in range(1, num_classes): 102 | ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] ) 103 | 104 | # Get IoU overlap between each ex ROI and gt ROI 105 | ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :], 106 | rois[gt_inds, :]) 107 | 108 | # Find which gt ROI each ex ROI has max overlap with: 109 | # this will be the ex ROI's gt target 110 | if ex_gt_overlaps.shape[0] != 0: 111 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 112 | else: 113 | gt_assignment = [] 114 | gt_rois = rois[gt_inds[gt_assignment], :] 115 | ex_rois = rois[ex_inds, :] 116 | 117 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS 118 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS 119 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 120 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 121 | 122 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS 123 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS 124 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 125 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 126 | 127 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 128 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 129 | targets_dw = np.log(gt_widths / ex_widths) 130 | targets_dh = np.log(gt_heights / ex_heights) 131 | 132 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 133 | targets[ex_inds, 0] = labels[ex_inds] 134 | targets[ex_inds, 1] = targets_dx 135 | targets[ex_inds, 2] = targets_dy 136 | targets[ex_inds, 3] = targets_dw 137 | targets[ex_inds, 4] = targets_dh 138 | return targets 139 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roi_pooling_op 8 | from . import roi_pooling_op_grad -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so') 5 | _roi_pooling_module = tf.load_op_library(filename) 6 | roi_pool = _roi_pooling_module.roi_pool 7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad 8 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool ROIPoolForwardLaucher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 20 | const int width, const int channels, const int pooled_height, 21 | const int pooled_width, const float* bottom_rois, 22 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d); 23 | 24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int pooled_height, 26 | const int pooled_width, const float* bottom_rois, 27 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); 28 | 29 | } // namespace tensorflow 30 | 31 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 32 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from . import roi_pooling_op 4 | 5 | @ops.RegisterGradient("RoiPool") 6 | def _roi_pool_grad(op, grad, _): 7 | """The gradients for `roi_pool`. 8 | Args: 9 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 10 | to find the inputs and outputs of the original op. 11 | grad: Gradient with respect to the output of the `roi_pool` op. 12 | Returns: 13 | Gradients with respect to the input of `zero_out`. 14 | """ 15 | data = op.inputs[0] 16 | rois = op.inputs[1] 17 | argmax = op.outputs[1] 18 | pooled_height = op.get_attr('pooled_height') 19 | pooled_width = op.get_attr('pooled_width') 20 | spatial_scale = op.get_attr('spatial_scale') 21 | 22 | # compute gradient 23 | data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 24 | 25 | return [data_grad, None] # List of one Tensor, since we have one input 26 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from . import roi_pooling_op 4 | from . import roi_pooling_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | array = np.random.rand(32, 100, 100, 3) 17 | data = tf.convert_to_tensor(array, dtype=tf.float32) 18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32) 19 | 20 | W = weight_variable([3, 3, 3, 1]) 21 | h = conv2d(data, W) 22 | 23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3) 24 | pdb.set_trace() 25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32) 26 | print(y_data, y, argmax) 27 | 28 | # Minimize the mean squared errors. 29 | loss = tf.reduce_mean(tf.square(y - y_data)) 30 | optimizer = tf.train.GradientDescentOptimizer(0.5) 31 | train = optimizer.minimize(loss) 32 | 33 | init = tf.global_variables_initializer() 34 | 35 | # Launch the graph. 36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 37 | sess.run(init) 38 | pdb.set_trace() 39 | for step in range(10): 40 | sess.run(train) 41 | print((step, sess.run(W))) 42 | print((sess.run(y))) 43 | 44 | #with tf.device('/gpu:0'): 45 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 46 | # print result.eval() 47 | #with tf.device('/cpu:0'): 48 | # run(init) 49 | -------------------------------------------------------------------------------- /lib/rpn_tools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/.DS_Store -------------------------------------------------------------------------------- /lib/rpn_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/__init__.py -------------------------------------------------------------------------------- /lib/rpn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/anchor_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/anchor_target_layer_modified.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/bbox_transform.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/config.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/__init__.py -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/cpu_nms.so -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/gpu_nms.so -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/mnc_config.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/mv.so -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from config import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms_wrapper.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/nms_wrapper.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/test.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/fast_rcnn/train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/train.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | """ 40 | Generate anchor (reference) windows by enumerating aspect ratios X 41 | scales wrt a reference (0, 0, 15, 15) window. 42 | """ 43 | 44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 45 | ratio_anchors = _ratio_enum(base_anchor, ratios) 46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 47 | for i in xrange(ratio_anchors.shape[0])]) 48 | return anchors 49 | 50 | def _whctrs(anchor): 51 | """ 52 | Return width, height, x center, and y center for an anchor (window). 53 | """ 54 | 55 | w = anchor[2] - anchor[0] + 1 56 | h = anchor[3] - anchor[1] + 1 57 | x_ctr = anchor[0] + 0.5 * (w - 1) 58 | y_ctr = anchor[1] + 0.5 * (h - 1) 59 | return w, h, x_ctr, y_ctr 60 | 61 | def _mkanchors(ws, hs, x_ctr, y_ctr): 62 | """ 63 | Given a vector of widths (ws) and heights (hs) around a center 64 | (x_ctr, y_ctr), output a set of anchors (windows). 65 | """ 66 | 67 | ws = ws[:, np.newaxis] 68 | hs = hs[:, np.newaxis] 69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 70 | y_ctr - 0.5 * (hs - 1), 71 | x_ctr + 0.5 * (ws - 1), 72 | y_ctr + 0.5 * (hs - 1))) 73 | return anchors 74 | 75 | def _ratio_enum(anchor, ratios): 76 | """ 77 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 78 | """ 79 | 80 | w, h, x_ctr, y_ctr = _whctrs(anchor) 81 | size = w * h 82 | size_ratios = size / ratios 83 | ws = np.round(np.sqrt(size_ratios)) 84 | hs = np.round(ws * ratios) 85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 86 | return anchors 87 | 88 | def _scale_enum(anchor, scales): 89 | """ 90 | Enumerate a set of anchors for each scale wrt an anchor. 91 | """ 92 | 93 | w, h, x_ctr, y_ctr = _whctrs(anchor) 94 | ws = w * scales 95 | hs = h * scales 96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 97 | return anchors 98 | 99 | if __name__ == '__main__': 100 | import time 101 | t = time.time() 102 | a = generate_anchors() 103 | print time.time() - t 104 | print a 105 | from IPython import embed; embed() 106 | -------------------------------------------------------------------------------- /lib/rpn_tools/generate_anchors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/generate_anchors.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/my_anchor_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/my_anchor_target_layer_modified.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/__init__.py -------------------------------------------------------------------------------- /lib/rpn_tools/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/cpu_nms.so -------------------------------------------------------------------------------- /lib/rpn_tools/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/gpu_nms.so -------------------------------------------------------------------------------- /lib/rpn_tools/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/mnc_config.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/mv.so -------------------------------------------------------------------------------- /lib/rpn_tools/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /lib/rpn_tools/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /lib/rpn_tools/proposal_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/proposal_layer_modified.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/proposal_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/proposal_target_layer_modified.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/__init__.py -------------------------------------------------------------------------------- /lib/rpn_tools/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import random 12 | import cv2 13 | from utils.cython_bbox import bbox_overlaps 14 | from mnc_config import cfg 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """ 19 | Convert a list of images into a network input. 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in xrange(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | # Move channels (axis 3) to axis 1 30 | # Axis order will become: (batch elem, channel, height, width) 31 | channel_swap = (0, 3, 1, 2) 32 | blob = blob.transpose(channel_swap) 33 | return blob 34 | 35 | 36 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 37 | """Mean subtract and scale an image for use in a blob.""" 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | im_shape = im.shape 41 | im_size_min = np.min(im_shape[0:2]) 42 | im_size_max = np.max(im_shape[0:2]) 43 | im_scale = float(target_size) / float(im_size_min) 44 | # Prevent the biggest axis from being more than MAX_SIZE 45 | if np.round(im_scale * im_size_max) > max_size: 46 | im_scale = float(max_size) / float(im_size_max) 47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 48 | interpolation=cv2.INTER_LINEAR) 49 | 50 | return im, im_scale 51 | 52 | 53 | def prep_im_for_blob_cfm(im, input_scales): 54 | """Converts an image into a network input. 55 | Arguments: 56 | im (ndarray): a color image in BGR order 57 | Returns: 58 | blob (ndarray): a data blob holding an image pyramid 59 | im_scale_factors (list): list of image scales (relative to im) used 60 | in the image pyramid 61 | """ 62 | im_orig = im.astype(np.float32, copy=True) 63 | im_orig -= cfg.PIXEL_MEANS 64 | 65 | im_shape = im_orig.shape 66 | im_size_min = np.min(im_shape[0:2]) 67 | im_size_max = np.max(im_shape[0:2]) 68 | 69 | processed_ims = [] 70 | im_scale_factors = [] 71 | 72 | for target_size in input_scales: 73 | im_scale = float(target_size) / float(im_size_min) 74 | # Prevent the biggest axis from being more than MAX_SIZE 75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 78 | interpolation=cv2.INTER_LINEAR) 79 | im_scale_factors.append(im_scale) 80 | processed_ims.append(im) 81 | 82 | # Create a blob to hold the input images 83 | blob = im_list_to_blob(processed_ims) 84 | 85 | return blob, np.array(im_scale_factors) 86 | 87 | 88 | def pred_rois_for_blob(im_rois, im_scales): 89 | """ 90 | Convert rois to network input 91 | support multi-scale testing 92 | """ 93 | im_rois = im_rois.astype(np.float, copy=False) 94 | if len(im_scales) > 1: 95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 97 | 98 | areas = widths * heights 99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2) 100 | diff_areas = np.abs(scaled_areas - 224 * 224) 101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 102 | else: 103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 104 | im_rois = im_rois * im_scales[levels] 105 | rois_blob = np.hstack((levels.astype(np.float), im_rois)) 106 | return rois_blob 107 | 108 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/blob.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/cython_bbox.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/cython_bbox.so -------------------------------------------------------------------------------- /lib/rpn_tools/utils/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/mnc_config.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/timer.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/unmap.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def unmap(data, count, inds, fill=0): 12 | """ Unmap a subset of item (data) back to the original set of items (of 13 | size count) """ 14 | if len(data.shape) == 1: 15 | ret = np.empty((count, ), dtype=np.float32) 16 | ret.fill(fill) 17 | ret[inds] = data 18 | else: 19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 20 | ret.fill(fill) 21 | ret[inds, :] = data 22 | return ret 23 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/unmap.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/unmap.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | # Move channels (axis 3) to axis 1 26 | # Axis order will become: (batch elem, channel, height, width) 27 | channel_swap = (0, 3, 1, 2) 28 | blob = blob.transpose(channel_swap) 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/blob.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /lib/rpn_tools/utils/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/timer.pyc -------------------------------------------------------------------------------- /lib/rpn_tools/utils/vis_seg.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Written by Haozhi Qi 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import cPickle 10 | import os 11 | import cv2 12 | import Image 13 | from mnc_config import cfg 14 | 15 | 16 | def vis_seg(img_names, cls_names, output_dir, gt_dir): 17 | """ 18 | This function plot segmentation results to specific directory 19 | Args: 20 | img_names: list 21 | """ 22 | assert os.path.exists(output_dir) 23 | # a list of dictionary 24 | inst_dir = os.path.join(output_dir, 'SegInst') 25 | cls_dir = os.path.join(output_dir, 'SegCls') 26 | res_dir = os.path.join(output_dir, 'SegRes') 27 | if not os.path.isdir(inst_dir): 28 | os.mkdir(inst_dir) 29 | if not os.path.isdir(cls_dir): 30 | os.mkdir(cls_dir) 31 | if not os.path.isdir(res_dir): 32 | os.mkdir(res_dir) 33 | 34 | res_list = _prepare_dict(img_names, cls_names, output_dir) 35 | for img_ind, image_name in enumerate(img_names): 36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg') 37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg') 38 | print image_name 39 | gt_image = gt_dir + '/img/' + image_name + '.jpg' 40 | img_data = cv2.imread(gt_image) 41 | img_width = img_data.shape[1] 42 | img_height = img_data.shape[0] 43 | pred_dict = res_list[img_ind] 44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict) 45 | color_map = _get_voc_color_map() 46 | inst_out_img = np.zeros((img_height, img_width, 3)) 47 | cls_out_img = np.zeros((img_height, img_width, 3)) 48 | for i in xrange(img_height): 49 | for j in xrange(img_width): 50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1] 51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1] 52 | 53 | cv2.imwrite(target_inst_file, inst_out_img) 54 | cv2.imwrite(target_cls_file, cls_out_img) 55 | background = Image.open(gt_image) 56 | mask = Image.open(target_cls_file) 57 | background = background.convert('RGBA') 58 | mask = mask.convert('RGBA') 59 | superimpose_image = Image.blend(background, mask, 0.8) 60 | name = os.path.join(res_dir, image_name + '.png') 61 | superimpose_image.save(name, 'PNG') 62 | 63 | 64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5): 65 | """ 66 | Returns: 67 | list, each list is a dictionary contains mask list, box list 68 | """ 69 | res_list = [] 70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl') 71 | with open(det_file, 'rb') as f: 72 | det_pkl = cPickle.load(f) 73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl') 74 | with open(seg_file, 'rb') as f: 75 | seg_pkl = cPickle.load(f) 76 | 77 | for img_ind, image_name in enumerate(img_names): 78 | box_for_img = [] 79 | mask_for_img = [] 80 | cls_for_img = [] 81 | for cls_ind, cls_name in enumerate(cls_names): 82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0: 83 | continue 84 | det_for_img = det_pkl[cls_ind][img_ind] 85 | seg_for_img = seg_pkl[cls_ind][img_ind] 86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0] 87 | for keep in keep_inds: 88 | box_for_img.append(det_for_img[keep]) 89 | # TODO: remove this annoying 0 90 | mask_for_img.append(seg_for_img[keep][0]) 91 | cls_for_img.append(cls_ind) 92 | res_dict = {'image_name': image_name, 93 | 'cls_name': cls_for_img, 94 | 'boxes': box_for_img, 95 | 'masks': mask_for_img} 96 | res_list.append(res_dict) 97 | 98 | return res_list 99 | 100 | 101 | def _convert_pred_to_image(img_width, img_height, pred_dict): 102 | num_inst = len(pred_dict['boxes']) 103 | inst_img = np.zeros((img_height, img_width)) 104 | cls_img = np.zeros((img_height, img_width)) 105 | for i in xrange(num_inst): 106 | box = np.round(pred_dict['boxes'][i]).astype(int) 107 | mask = pred_dict['masks'][i] 108 | cls_num = pred_dict['cls_name'][i] 109 | # clip box into image space 110 | box[0] = min(max(box[0], 0), img_width - 1) 111 | box[1] = min(max(box[1], 0), img_height - 1) 112 | box[2] = min(max(box[2], 0), img_width - 1) 113 | box[3] = min(max(box[3], 0), img_height - 1) 114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1)) 115 | mask = mask >= cfg.BINARIZE_THRESH 116 | 117 | part1 = (i+1) * mask.astype(np.float32) 118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1]) 119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1]) 120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2 121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3 122 | # Plot bounding boxes simultaneously 123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150 124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150 125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150 126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150 127 | 128 | inst_img = inst_img.astype(int) 129 | cls_img = cls_img.astype(int) 130 | return inst_img, cls_img 131 | 132 | 133 | def _get_voc_color_map(n=256): 134 | color_map = np.zeros((n, 3)) 135 | for i in xrange(n): 136 | r = b = g = 0 137 | cid = i 138 | for j in xrange(0, 8): 139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j)) 140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j)) 141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j)) 142 | cid = np.right_shift(cid, 3) 143 | 144 | color_map[i][0] = r 145 | color_map[i][1] = g 146 | color_map[i][2] = b 147 | return color_map 148 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in list(cudaconfig.items()): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | ext_modules = [ 109 | Extension( 110 | "utils.cython_bbox", 111 | ["utils/bbox.pyx"], 112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 113 | include_dirs = [numpy_include] 114 | ), 115 | Extension( 116 | "utils.cython_nms", 117 | ["utils/nms.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs = [numpy_include] 120 | ), 121 | Extension( 122 | "nms.cpu_nms", 123 | ["nms/cpu_nms.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs = [numpy_include] 126 | ), 127 | Extension('nms.gpu_nms', 128 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 129 | library_dirs=[CUDA['lib64']], 130 | libraries=['cudart'], 131 | language='c++', 132 | runtime_library_dirs=[CUDA['lib64']], 133 | # this syntax is specific to this build system 134 | # we're only going to use certain compiler args with nvcc and not with gcc 135 | # the implementation of this trick is in customize_compiler() below 136 | extra_compile_args={'gcc': ["-Wno-unused-function"], 137 | 'nvcc': ['-arch=sm_35', 138 | '--ptxas-options=-v', 139 | '-c', 140 | '--compiler-options', 141 | "'-fPIC'"]}, 142 | include_dirs = [numpy_include, CUDA['include']] 143 | ), 144 | Extension( 145 | 'pycocotools._mask', 146 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'], 147 | include_dirs = [numpy_include, 'pycocotools'], 148 | extra_compile_args={ 149 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']}, 150 | ), 151 | ] 152 | 153 | setup( 154 | name='fast_rcnn', 155 | ext_modules=ext_modules, 156 | # inject our custom trigger 157 | cmdclass={'build_ext': custom_build_ext}, 158 | ) 159 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import cython_nms 8 | from . import cython_bbox 9 | from . import boxes_grid 10 | from . import blob 11 | from . import nms 12 | from . import timer -------------------------------------------------------------------------------- /lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | def bbox_intersections( 58 | np.ndarray[DTYPE_t, ndim=2] boxes, 59 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 60 | """ 61 | For each query box compute the intersection ratio covered by boxes 62 | ---------- 63 | Parameters 64 | ---------- 65 | boxes: (N, 4) ndarray of float 66 | query_boxes: (K, 4) ndarray of float 67 | Returns 68 | ------- 69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 70 | """ 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef unsigned int K = query_boxes.shape[0] 73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 74 | cdef DTYPE_t iw, ih, box_area 75 | cdef DTYPE_t ua 76 | cdef unsigned int k, n 77 | for k in range(K): 78 | box_area = ( 79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 81 | ) 82 | for n in range(N): 83 | iw = ( 84 | min(boxes[n, 2], query_boxes[k, 2]) - 85 | max(boxes[n, 0], query_boxes[k, 0]) + 1 86 | ) 87 | if iw > 0: 88 | ih = ( 89 | min(boxes[n, 3], query_boxes[k, 3]) - 90 | max(boxes[n, 1], query_boxes[k, 1]) + 1 91 | ) 92 | if ih > 0: 93 | intersec[n, k] = iw * ih / box_area 94 | return intersec -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | from ..fast_rcnn.config import cfg 13 | 14 | def im_list_to_blob(ims): 15 | """Convert a list of images into a network input. 16 | 17 | Assumes images are already prepared (means subtracted, BGR order, ...). 18 | """ 19 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 20 | num_images = len(ims) 21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 22 | dtype=np.float32) 23 | for i in range(num_images): 24 | im = ims[i] 25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 26 | 27 | return blob 28 | 29 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 30 | """Mean subtract and scale an image for use in a blob.""" 31 | im = im.astype(np.float32, copy=False) 32 | im -= pixel_means 33 | im_shape = im.shape 34 | im_size_min = np.min(im_shape[0:2]) 35 | im_size_max = np.max(im_shape[0:2]) 36 | im_scale = float(target_size) / float(im_size_min) 37 | # Prevent the biggest axis from being more than MAX_SIZE 38 | if np.round(im_scale * im_size_max) > max_size: 39 | im_scale = float(max_size) / float(im_size_max) 40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE: 41 | r = 0.6 + np.random.rand() * 0.4 42 | im_scale *= r 43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 44 | interpolation=cv2.INTER_LINEAR) 45 | 46 | return im, im_scale 47 | -------------------------------------------------------------------------------- /lib/utils/boxes_grid.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Subcategory CNN 3 | # Copyright (c) 2015 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import math 10 | # TODO: make fast_rcnn irrelevant 11 | # >>>> obsolete, because it depends on sth outside of this project 12 | from ..fast_rcnn.config import cfg 13 | # <<<< obsolete 14 | 15 | def get_boxes_grid(image_height, image_width): 16 | """ 17 | Return the boxes on image grid. 18 | calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead. 19 | """ 20 | 21 | # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE 22 | # coz, here needs a ratio around 1.0, not the accutual size. 23 | # height and width of the feature map 24 | if cfg.NET_NAME == 'CaffeNet': 25 | height = np.floor((image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) 26 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 27 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 28 | 29 | width = np.floor((image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) 30 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 31 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 32 | elif cfg.NET_NAME == 'VGGnet': 33 | height = np.floor(image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) 34 | height = np.floor(height / 2.0 + 0.5) 35 | height = np.floor(height / 2.0 + 0.5) 36 | height = np.floor(height / 2.0 + 0.5) 37 | 38 | width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) 39 | width = np.floor(width / 2.0 + 0.5) 40 | width = np.floor(width / 2.0 + 0.5) 41 | width = np.floor(width / 2.0 + 0.5) 42 | else: 43 | assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' 44 | 45 | # compute the grid box centers 46 | h = np.arange(height) 47 | w = np.arange(width) 48 | y, x = np.meshgrid(h, w, indexing='ij') 49 | centers = np.dstack((x, y)) 50 | centers = np.reshape(centers, (-1, 2)) 51 | num = centers.shape[0] 52 | 53 | # compute width and height of grid box 54 | area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE 55 | aspect = cfg.TRAIN.ASPECTS # height / width 56 | num_aspect = len(aspect) 57 | widths = np.zeros((1, num_aspect), dtype=np.float32) 58 | heights = np.zeros((1, num_aspect), dtype=np.float32) 59 | for i in range(num_aspect): 60 | widths[0,i] = math.sqrt(area / aspect[i]) 61 | heights[0,i] = widths[0,i] * aspect[i] 62 | 63 | # construct grid boxes 64 | centers = np.repeat(centers, num_aspect, axis=0) 65 | widths = np.tile(widths, num).transpose() 66 | heights = np.tile(heights, num).transpose() 67 | 68 | x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5 69 | x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5 70 | y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5 71 | y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5 72 | 73 | boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE 74 | 75 | return boxes_grid, centers[:,0], centers[:,1] 76 | -------------------------------------------------------------------------------- /lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /lib/utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__init__.py -------------------------------------------------------------------------------- /nets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/resnet_utils.cpython-36.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet_v1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/resnet_v1.cpython-36.pyc -------------------------------------------------------------------------------- /nets/googlenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.layers as layers 3 | import tensorflow.contrib.framework as ops 4 | 5 | 6 | def get_inception_layer(inputs, conv11_size, conv33_11_size, conv33_size, 7 | conv55_11_size, conv55_size, pool11_size): 8 | with tf.variable_scope("conv_1x1"): 9 | conv11 = layers.conv2d(inputs, conv11_size, [1, 1]) 10 | with tf.variable_scope("conv_3x3"): 11 | conv33_11 = layers.conv2d(inputs, conv33_11_size, [1, 1]) 12 | conv33 = layers.conv2d(conv33_11, conv33_size, [3, 3]) 13 | with tf.variable_scope("conv_5x5"): 14 | conv55_11 = layers.conv2d(inputs, conv55_11_size, [1, 1]) 15 | conv55 = layers.conv2d(conv55_11, conv55_size, [5, 5]) 16 | with tf.variable_scope("pool_proj"): 17 | pool_proj = layers.max_pool2d(inputs, [3, 3], stride=1) 18 | pool11 = layers.conv2d(pool_proj, pool11_size, [1, 1]) 19 | if tf.__version__ == '0.11.0rc0': 20 | return tf.concat(3, [conv11, conv33, conv55, pool11]) 21 | return tf.concat([conv11, conv33, conv55, pool11], 3) 22 | 23 | 24 | def aux_logit_layer(inputs, num_classes, is_training): 25 | with tf.variable_scope("pool2d"): 26 | pooled = layers.avg_pool2d(inputs, [5, 5], stride=3) 27 | with tf.variable_scope("conv11"): 28 | conv11 = layers.conv2d(pooled, 128, [1, 1]) 29 | with tf.variable_scope("flatten"): 30 | flat = tf.reshape(conv11, [-1, 2048]) 31 | with tf.variable_scope("fc"): 32 | fc = layers.fully_connected(flat, 1024, activation_fn=None) 33 | with tf.variable_scope("drop"): 34 | drop = layers.dropout(fc, 0.3, is_training=is_training) 35 | with tf.variable_scope("linear"): 36 | linear = layers.fully_connected(drop, num_classes, activation_fn=None) 37 | with tf.variable_scope("soft"): 38 | soft = tf.nn.softmax(linear) 39 | return soft 40 | 41 | 42 | def googlenet(inputs, 43 | dropout_keep_prob=0.4, 44 | num_classes=1000, 45 | is_training=True, 46 | restore_logits=None, 47 | scope=''): 48 | ''' 49 | Implementation of https://arxiv.org/pdf/1409.4842.pdf 50 | ''' 51 | 52 | end_points = {} 53 | with tf.name_scope(scope, "googlenet", [inputs]): 54 | with ops.arg_scope([layers.max_pool2d], padding='SAME'): 55 | end_points['conv0'] = layers.conv2d(inputs, 64, [7, 7], stride=2, scope='conv0') 56 | end_points['pool0'] = layers.max_pool2d(end_points['conv0'], [3, 3], scope='pool0') 57 | end_points['conv1_a'] = layers.conv2d(end_points['pool0'], 64, [1, 1], scope='conv1_a') 58 | end_points['conv1_b'] = layers.conv2d(end_points['conv1_a'], 192, [3, 3], scope='conv1_b') 59 | end_points['pool1'] = layers.max_pool2d(end_points['conv1_b'], [3, 3], scope='pool1') 60 | 61 | with tf.variable_scope("inception_3a"): 62 | end_points['inception_3a'] = get_inception_layer(end_points['pool1'], 64, 96, 128, 16, 32, 32) 63 | 64 | with tf.variable_scope("inception_3b"): 65 | end_points['inception_3b'] = get_inception_layer(end_points['inception_3a'], 128, 128, 192, 32, 96, 64) 66 | 67 | end_points['pool2'] = layers.max_pool2d(end_points['inception_3b'], [3, 3], scope='pool2') 68 | 69 | with tf.variable_scope("inception_4a"): 70 | end_points['inception_4a'] = get_inception_layer(end_points['pool2'], 192, 96, 208, 16, 48, 64) 71 | 72 | with tf.variable_scope("aux_logits_1"): 73 | end_points['aux_logits_1'] = aux_logit_layer(end_points['inception_4a'], num_classes, is_training) 74 | 75 | with tf.variable_scope("inception_4b"): 76 | end_points['inception_4b'] = get_inception_layer(end_points['inception_4a'], 160, 112, 224, 24, 64, 64) 77 | 78 | with tf.variable_scope("inception_4c"): 79 | end_points['inception_4c'] = get_inception_layer(end_points['inception_4b'], 128, 128, 256, 24, 64, 64) 80 | 81 | with tf.variable_scope("inception_4d"): 82 | end_points['inception_4d'] = get_inception_layer(end_points['inception_4c'], 112, 144, 288, 32, 64, 64) 83 | 84 | with tf.variable_scope("aux_logits_2"): 85 | end_points['aux_logits_2'] = aux_logit_layer(end_points['inception_4d'], num_classes, is_training) 86 | 87 | with tf.variable_scope("inception_4e"): 88 | end_points['inception_4e'] = get_inception_layer(end_points['inception_4d'], 256, 160, 320, 32, 128, 89 | 128) 90 | 91 | end_points['pool3'] = layers.max_pool2d(end_points['inception_4e'], [3, 3], scope='pool3') 92 | 93 | with tf.variable_scope("inception_5a"): 94 | end_points['inception_5a'] = get_inception_layer(end_points['pool3'], 256, 160, 320, 32, 128, 128) 95 | 96 | with tf.variable_scope("inception_5b"): 97 | end_points['inception_5b'] = get_inception_layer(end_points['inception_5a'], 384, 192, 384, 48, 128, 98 | 128) 99 | 100 | end_points['pool4'] = layers.avg_pool2d(end_points['inception_5b'], [7, 7], stride=1, scope='pool4') 101 | 102 | end_points['reshape'] = tf.reshape(end_points['pool4'], [-1, 1024]) 103 | 104 | end_points['dropout'] = layers.dropout(end_points['reshape'], dropout_keep_prob, is_training=is_training) 105 | 106 | end_points['logits'] = layers.fully_connected(end_points['dropout'], num_classes, activation_fn=None, 107 | scope='logits') 108 | 109 | end_points['predictions'] = tf.nn.softmax(end_points['logits'], name='predictions') 110 | 111 | return end_points['logits'], end_points --------------------------------------------------------------------------------