├── .idea
├── InceptText-Tensorflow.iml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── data_util.py
├── error_pic
├── .DS_Store
├── error1.jpg
├── error2.jpg
└── error3.jpg
├── icdar.py
├── lib
├── Makefile
├── __init__.py
├── cnn_tools
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── tools.py
│ └── tools.pyc
├── cuda_config.h
├── datasets
│ ├── __init__.py
│ ├── coco.py
│ ├── ds_utils.py
│ ├── factory.py
│ ├── imagenet3d.py
│ ├── imdb.py
│ ├── imdb2.py
│ ├── kitti.py
│ ├── kitti_tracking.py
│ ├── kittivoc.py
│ ├── nissan.py
│ ├── nthu.py
│ ├── pascal3d.py
│ ├── pascal_voc.py
│ ├── pascal_voc2.py
│ └── voc_eval.py
├── deform_conv_layer
│ ├── __init__.py
│ ├── deform_conv.cc
│ ├── deform_conv.cu.cc
│ ├── deform_conv.h
│ ├── deform_conv_grad.py
│ ├── deform_conv_op.py
│ ├── deform_conv_test_mx.py
│ ├── deform_conv_util.h
│ └── test_deform_conv.py
├── deform_psroi_pooling_layer
│ ├── __init__.py
│ ├── deform_psroi_pooling_op.cc
│ ├── deform_psroi_pooling_op.py
│ ├── deform_psroi_pooling_op_gpu.cu.cc
│ ├── deform_psroi_pooling_op_gpu.h
│ ├── deform_psroi_pooling_op_grad.py
│ ├── deform_psroi_pooling_op_test.py
│ └── deform_psroi_pooling_op_test_mx.py
├── fast_rcnn
│ ├── __init__.py
│ ├── bbox_transform.py
│ ├── config.py
│ ├── config2.py
│ ├── nms_wrapper.py
│ ├── test.py
│ └── train.py
├── gt_data_layer
│ ├── __init__.py
│ ├── layer.py
│ ├── minibatch.py
│ └── roidb.py
├── make.sh
├── networks
│ ├── .VGGnet.py.swo
│ ├── Resnet101_test.py
│ ├── Resnet101_train.py
│ ├── Resnet50_test.py
│ ├── Resnet50_train.py
│ ├── VGGnet_test.py
│ ├── VGGnet_train.py
│ ├── __init__.py
│ ├── caffenet.py
│ ├── factory.py
│ └── network.py
├── nms
│ ├── .gitignore
│ ├── __init__.py
│ ├── cpu_nms.pyx
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── psroi_pooling_layer
│ ├── __init__.py
│ ├── cuda_kernel_helper.h
│ ├── psroi_pooling_op.cc
│ ├── psroi_pooling_op.py
│ ├── psroi_pooling_op_gpu.cu.cc
│ ├── psroi_pooling_op_gpu.h
│ ├── psroi_pooling_op_grad.py
│ └── psroi_pooling_op_test.py
├── pycocotools
│ ├── UPSTREAM_REV
│ ├── __init__.py
│ ├── _mask.c
│ ├── _mask.pyx
│ ├── coco.py
│ ├── cocoeval.py
│ ├── license.txt
│ ├── mask.py
│ ├── maskApi.c
│ └── maskApi.h
├── roi_data_layer
│ ├── __init__.py
│ ├── layer.py
│ ├── minibatch.py
│ ├── minibatch2.py
│ ├── roidb.py
│ └── roidb2.py
├── roi_pooling_layer
│ ├── __init__.py
│ ├── roi_pooling_op.cc
│ ├── roi_pooling_op.py
│ ├── roi_pooling_op_gpu.cu.cc
│ ├── roi_pooling_op_gpu.h
│ ├── roi_pooling_op_grad.py
│ └── roi_pooling_op_test.py
├── rpn_tools
│ ├── .DS_Store
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── anchor_target_layer_modified.py
│ ├── anchor_target_layer_modified.pyc
│ ├── fast_rcnn
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── bbox_transform.py
│ │ ├── bbox_transform.pyc
│ │ ├── config.py
│ │ ├── config.pyc
│ │ ├── nms
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyc
│ │ │ ├── cpu_nms.c
│ │ │ ├── cpu_nms.pyx
│ │ │ ├── cpu_nms.so
│ │ │ ├── gpu_mv.cpp
│ │ │ ├── gpu_mv.hpp
│ │ │ ├── gpu_mv.pyx
│ │ │ ├── gpu_nms.cpp
│ │ │ ├── gpu_nms.hpp
│ │ │ ├── gpu_nms.pyx
│ │ │ ├── gpu_nms.so
│ │ │ ├── mnc_config.py
│ │ │ ├── mnc_config.pyc
│ │ │ ├── mv.so
│ │ │ ├── mv_kernel.cu
│ │ │ ├── nms_kernel.cu
│ │ │ ├── nms_wrapper.py
│ │ │ ├── nms_wrapper.pyc
│ │ │ └── py_cpu_nms.py
│ │ ├── nms_wrapper.py
│ │ ├── nms_wrapper.pyc
│ │ ├── nms_wrapper.py~
│ │ ├── test.py
│ │ ├── test.pyc
│ │ ├── train.py
│ │ └── train.pyc
│ ├── generate_anchors.py
│ ├── generate_anchors.pyc
│ ├── my_anchor_target_layer_modified.py
│ ├── my_anchor_target_layer_modified.pyc
│ ├── nms
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── cpu_nms.c
│ │ ├── cpu_nms.pyx
│ │ ├── cpu_nms.so
│ │ ├── gpu_mv.cpp
│ │ ├── gpu_mv.hpp
│ │ ├── gpu_mv.pyx
│ │ ├── gpu_nms.cpp
│ │ ├── gpu_nms.hpp
│ │ ├── gpu_nms.pyx
│ │ ├── gpu_nms.so
│ │ ├── mnc_config.py
│ │ ├── mnc_config.pyc
│ │ ├── mv.so
│ │ ├── mv_kernel.cu
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ ├── nms_wrapper.pyc
│ │ └── py_cpu_nms.py
│ ├── proposal_layer_modified.py
│ ├── proposal_layer_modified.pyc
│ ├── proposal_target_layer_modified.py
│ ├── proposal_target_layer_modified.pyc
│ └── utils
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── bbox.c
│ │ ├── bbox.pyx
│ │ ├── blob.py
│ │ ├── blob.pyc
│ │ ├── cython_bbox.so
│ │ ├── mnc_config.py
│ │ ├── mnc_config.pyc
│ │ ├── timer.py
│ │ ├── timer.pyc
│ │ ├── unmap.py
│ │ ├── unmap.pyc
│ │ ├── utils
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── bbox.pyx
│ │ ├── blob.py
│ │ ├── blob.pyc
│ │ ├── timer.py
│ │ └── timer.pyc
│ │ ├── vis_seg.py
│ │ └── voc_eval.py
├── setup.py
└── utils
│ ├── .gitignore
│ ├── __init__.py
│ ├── bbox.pyx
│ ├── blob.py
│ ├── boxes_grid.py
│ ├── nms.py
│ ├── nms.pyx
│ └── timer.py
├── model.py
├── nets
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── resnet_utils.cpython-36.pyc
│ └── resnet_v1.cpython-36.pyc
├── googlenet.py
├── resnet_utils.py
└── resnet_v1.py
├── test.py
└── train_main.py
/.idea/InceptText-Tensorflow.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # InceptText-Tensorflow
2 | An Implementation of the alogrithm in paper IncepText: A New Inception-Text Module with Deformable PSROI Pooling for Multi-Oriented Scene Text Detection
3 |
4 | ## Introduction
5 | ### Tensorflow=1.4.0
6 |
7 | ### Preparation
8 |
9 | 1.gcc 4.9
10 |
11 | 2.cuda8.0
12 |
13 | 3.cd lib && make
14 |
15 | - 可能遇到的错误:
16 |
17 | 1.
18 |
19 | 解决办法:把cuda路径添加到系统环境变量,然后改为#include
20 |
21 | 2.
22 |
23 | 解决办法:找到nsync_cv.h的绝对路径然后include
24 |
25 | 3.
26 |
27 | 解决办法:找到nsync_mu.h的绝对路径然后include
28 |
29 | ## Download
30 | ### 1.Models trained on ICDAR 2017
31 | ### 2.Resnet V1 50 provided by tensorflow slim[ResNet-v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)
32 |
33 | ## Train
34 | ### python train_main.py
35 |
36 | ## Test
37 | ### python test.py
38 |
--------------------------------------------------------------------------------
/data_util.py:
--------------------------------------------------------------------------------
1 | '''
2 | this file is modified from keras implemention of data process multi-threading,
3 | see https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
4 | '''
5 | import time
6 | import numpy as np
7 | import threading
8 | import multiprocessing
9 | try:
10 | import queue
11 | except ImportError:
12 | import Queue as queue
13 |
14 |
15 | class GeneratorEnqueuer():
16 | """Builds a queue out of a data generator.
17 |
18 | Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
19 |
20 | # Arguments
21 | generator: a generator function which endlessly yields data
22 | use_multiprocessing: use multiprocessing if True, otherwise threading
23 | wait_time: time to sleep in-between calls to `put()`
24 | random_seed: Initial seed for workers,
25 | will be incremented by one for each workers.
26 | """
27 |
28 | def __init__(self, generator,
29 | use_multiprocessing=False,
30 | wait_time=0.05,
31 | random_seed=None):
32 | self.wait_time = wait_time
33 | self._generator = generator
34 | self._use_multiprocessing = use_multiprocessing
35 | self._threads = []
36 | self._stop_event = None
37 | self.queue = None
38 | self.random_seed = random_seed
39 |
40 | def start(self, workers=1, max_queue_size=10):
41 | """Kicks off threads which add data from the generator into the queue.
42 |
43 | # Arguments
44 | workers: number of worker threads
45 | max_queue_size: queue size
46 | (when full, threads could block on `put()`)
47 | """
48 |
49 | def data_generator_task():
50 | while not self._stop_event.is_set():
51 | try:
52 | if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
53 | generator_output = next(self._generator)
54 | self.queue.put(generator_output)
55 | else:
56 | time.sleep(self.wait_time)
57 | except Exception:
58 | self._stop_event.set()
59 | raise
60 |
61 | try:
62 | if self._use_multiprocessing:
63 | self.queue = multiprocessing.Queue(maxsize=max_queue_size)
64 | self._stop_event = multiprocessing.Event()
65 | else:
66 | self.queue = queue.Queue()
67 | self._stop_event = threading.Event()
68 |
69 | for _ in range(workers):
70 | if self._use_multiprocessing:
71 | # Reset random seed else all children processes
72 | # share the same seed
73 | np.random.seed(self.random_seed)
74 | thread = multiprocessing.Process(target=data_generator_task)
75 | thread.daemon = True
76 | if self.random_seed is not None:
77 | self.random_seed += 1
78 | else:
79 | thread = threading.Thread(target=data_generator_task)
80 | self._threads.append(thread)
81 | thread.start()
82 | except:
83 | self.stop()
84 | raise
85 |
86 | def is_running(self):
87 | return self._stop_event is not None and not self._stop_event.is_set()
88 |
89 | def stop(self, timeout=None):
90 | """Stops running threads and wait for them to exit, if necessary.
91 |
92 | Should be called by the same thread which called `start()`.
93 |
94 | # Arguments
95 | timeout: maximum time to wait on `thread.join()`.
96 | """
97 | if self.is_running():
98 | self._stop_event.set()
99 |
100 | for thread in self._threads:
101 | if thread.is_alive():
102 | if self._use_multiprocessing:
103 | thread.terminate()
104 | else:
105 | thread.join(timeout)
106 |
107 | if self._use_multiprocessing:
108 | if self.queue is not None:
109 | self.queue.close()
110 |
111 | self._threads = []
112 | self._stop_event = None
113 | self.queue = None
114 |
115 | def get(self):
116 | """Creates a generator to extract data from the queue.
117 |
118 | Skip the data if it is `None`.
119 |
120 | # Returns
121 | A generator
122 | """
123 | while self.is_running():
124 | if not self.queue.empty():
125 | inputs = self.queue.get()
126 | if inputs is not None:
127 | yield inputs
128 | else:
129 | time.sleep(self.wait_time)
--------------------------------------------------------------------------------
/error_pic/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/.DS_Store
--------------------------------------------------------------------------------
/error_pic/error1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error1.jpg
--------------------------------------------------------------------------------
/error_pic/error2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error2.jpg
--------------------------------------------------------------------------------
/error_pic/error3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/error_pic/error3.jpg
--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 | sh make.sh
5 |
--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | #import fast_rcnn
2 |
--------------------------------------------------------------------------------
/lib/cnn_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/__init__.py
--------------------------------------------------------------------------------
/lib/cnn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/__init__.pyc
--------------------------------------------------------------------------------
/lib/cnn_tools/tools.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/cnn_tools/tools.pyc
--------------------------------------------------------------------------------
/lib/cuda_config.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | /*
17 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly.
18 |
19 | # Which CUDA capabilities do we want to pre-build for?
20 | # https://developer.nvidia.com/cuda-gpus
21 | # Compute/shader model Cards
22 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61
23 | # 6.0 P100 so CUDA_MODEL = 60
24 | # 5.2 M40
25 | # 3.7 K80
26 | # 3.5 K40, K20
27 | # 3.0 K10, Grid K520 (AWS G2)
28 | # Other Nvidia shader models should work, but they will require extra startup
29 | # time as the code is pre-optimized for them.
30 | # CUDA_MODELS=30 35 37 52 60 61
31 | */
32 | // DO NOT EDIT: automatically generated file
33 | #ifndef CUDA_CUDA_CONFIG_H_
34 | #define CUDA_CUDA_CONFIG_H_
35 | // please modify the TF_CUDA_CAPABILITIES according to the above list and
36 | // your gpu model.
37 |
38 | #define TF_CUDA_CAPABILITIES CudaVersion("8.0")
39 |
40 | #define TF_CUDA_VERSION "8.0"
41 | #define TF_CUDNN_VERSION "6"
42 |
43 | #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-8.0"
44 |
45 | #endif // CUDA_CUDA_CONFIG_H_
46 |
--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | # TODO: make this fold self-contained, only depends on utils package
9 |
10 | from .imdb import imdb
11 | from .pascal_voc import pascal_voc
12 | from .pascal3d import pascal3d
13 | from .imagenet3d import imagenet3d
14 | from .kitti import kitti
15 | from .kitti_tracking import kitti_tracking
16 | from .nissan import nissan
17 | from .nthu import nthu
18 | from . import factory
19 |
20 | ## NOTE: obsolete
21 | import os.path as osp
22 | from .imdb import ROOT_DIR
23 | from .imdb import MATLAB
24 |
25 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
26 | def _which(program):
27 | import os
28 | def is_exe(fpath):
29 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
30 |
31 | fpath, fname = os.path.split(program)
32 | if fpath:
33 | if is_exe(program):
34 | return program
35 | else:
36 | for path in os.environ["PATH"].split(os.pathsep):
37 | path = path.strip('"')
38 | exe_file = os.path.join(path, program)
39 | if is_exe(exe_file):
40 | return exe_file
41 |
42 | return None
43 | """
44 | if _which(MATLAB) is None:
45 | msg = ("MATLAB command '{}' not found. "
46 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB)
47 | raise EnvironmentError(msg)
48 | """
49 |
--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Ross Girshick
5 | # --------------------------------------------------------
6 |
7 | import numpy as np
8 |
9 | def unique_boxes(boxes, scale=1.0):
10 | """Return indices of unique boxes."""
11 | v = np.array([1, 1e3, 1e6, 1e9])
12 | hashes = np.round(boxes * scale).dot(v)
13 | _, index = np.unique(hashes, return_index=True)
14 | return np.sort(index)
15 |
16 | def xywh_to_xyxy(boxes):
17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
19 |
20 | def xyxy_to_xywh(boxes):
21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
23 |
24 | def validate_boxes(boxes, width=0, height=0):
25 | """Check that a set of boxes are valid."""
26 | x1 = boxes[:, 0]
27 | y1 = boxes[:, 1]
28 | x2 = boxes[:, 2]
29 | y2 = boxes[:, 3]
30 | assert (x1 >= 0).all()
31 | assert (y1 >= 0).all()
32 | assert (x2 >= x1).all()
33 | assert (y2 >= y1).all()
34 | assert (x2 < width).all()
35 | assert (y2 < height).all()
36 |
37 | def filter_small_boxes(boxes, min_size):
38 | w = boxes[:, 2] - boxes[:, 0]
39 | h = boxes[:, 3] - boxes[:, 1]
40 | keep = np.where((w >= min_size) & (h > min_size))[0]
41 | return keep
42 |
--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Factory method for easily getting imdbs by name."""
9 |
10 | __sets = {}
11 |
12 | import numpy as np
13 |
14 | from .pascal_voc import pascal_voc
15 | from .imagenet3d import imagenet3d
16 | from .kitti import kitti
17 | from .kitti_tracking import kitti_tracking
18 | from .nthu import nthu
19 | from .coco import coco
20 | from .kittivoc import kittivoc
21 |
22 | def _selective_search_IJCV_top_k(split, year, top_k):
23 | """Return an imdb that uses the top k proposals from the selective search
24 | IJCV code.
25 | """
26 | imdb = pascal_voc(split, year)
27 | imdb.roidb_handler = imdb.selective_search_IJCV_roidb
28 | imdb.config['top_k'] = top_k
29 | return imdb
30 |
31 | # Set up voc__ using selective search "fast" mode
32 | for year in ['2007', '2012', '0712']:
33 | for split in ['train', 'val', 'trainval', 'test']:
34 | name = 'voc_{}_{}'.format(year, split)
35 | __sets[name] = (lambda split=split, year=year:
36 | pascal_voc(split, year))
37 |
38 |
39 | # Set up kittivoc
40 | for split in ['train', 'val', 'trainval', 'test']:
41 | name = 'kittivoc_{}'.format(split)
42 | print(name)
43 | __sets[name] = (lambda split=split: kittivoc(split))
44 |
45 | # # KITTI dataset
46 | # for split in ['train', 'val', 'trainval', 'test']:
47 | # name = 'kitti_{}'.format(split)
48 | # print name
49 | # __sets[name] = (lambda split=split: kitti(split))
50 |
51 | # Set up coco_2014_
52 | for year in ['2014']:
53 | for split in ['train', 'val', 'minival', 'valminusminival']:
54 | name = 'coco_{}_{}'.format(year, split)
55 | __sets[name] = (lambda split=split, year=year: coco(split, year))
56 |
57 | # Set up coco_2015_
58 | for year in ['2015']:
59 | for split in ['test', 'test-dev']:
60 | name = 'coco_{}_{}'.format(year, split)
61 | __sets[name] = (lambda split=split, year=year: coco(split, year))
62 |
63 | # NTHU dataset
64 | for split in ['71', '370']:
65 | name = 'nthu_{}'.format(split)
66 | print(name)
67 | __sets[name] = (lambda split=split: nthu(split))
68 |
69 |
70 | def get_imdb(name):
71 | """Get an imdb (image database) by name."""
72 | if name not in __sets:
73 | print((list_imdbs()))
74 | raise KeyError('Unknown dataset: {}'.format(name))
75 | return __sets[name]()
76 |
77 | def list_imdbs():
78 | """List all registered imdbs."""
79 | return list(__sets.keys())
80 |
--------------------------------------------------------------------------------
/lib/deform_conv_layer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/deform_conv_layer/__init__.py
--------------------------------------------------------------------------------
/lib/deform_conv_layer/deform_conv_grad.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import tensorflow as tf
3 | from tensorflow.python.framework import ops
4 | from . import deform_conv_op
5 |
6 | @ops.RegisterGradient("DeformConvOp")
7 | def _deform_conv_grad(op, grad):
8 | """The gradients for `deform_conv`.
9 | Args:
10 | op: The `deform_conv` `Operation` that we are differentiating, which we can use
11 | to find the inputs and outputs of the original op.
12 | grad: Gradient with respect to the output of the `roi_pool` op.
13 | Returns:
14 | Gradients with respect to the input of `zero_out`.
15 | """
16 | data = op.inputs[0]
17 | filter = op.inputs[1]
18 | offset = op.inputs[2]
19 |
20 | strides = op.get_attr('strides')
21 | rates = op.get_attr('rates')
22 | num_groups = op.get_attr('num_groups')
23 | padding = op.get_attr('padding')
24 | data_format = op.get_attr('data_format')
25 |
26 | # compute gradient
27 | data_grad = deform_conv_op.deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, padding, data_format)
28 |
29 | return data_grad
--------------------------------------------------------------------------------
/lib/deform_conv_layer/deform_conv_op.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import tensorflow as tf
3 | import os.path as osp
4 | from tensorflow.python.framework import ops
5 |
6 |
7 | filename = osp.join(osp.dirname(__file__), 'deform_conv.so')
8 | _deform_conv_module = tf.load_op_library(filename)
9 | deform_conv_op = _deform_conv_module.deform_conv_op
10 | deform_conv_grad_op = _deform_conv_module.deform_conv_backprop_op
11 |
12 |
13 | @ops.RegisterGradient("DeformConvOp")
14 | def _deform_conv_grad(op, grad):
15 | """The gradients for `deform_conv`.
16 | Args:
17 | op: The `deform_conv` `Operation` that we are differentiating, which we can use
18 | to find the inputs and outputs of the original op.
19 | grad: Gradient with respect to the output of the `roi_pool` op.
20 | Returns:
21 | Gradients with respect to the input of `zero_out`.
22 | """
23 | data = op.inputs[0]
24 | filter = op.inputs[1]
25 | offset = op.inputs[2]
26 |
27 | strides = op.get_attr('strides')
28 | rates = op.get_attr('rates')
29 | num_groups = op.get_attr('num_groups')
30 | padding = op.get_attr('padding')
31 | data_format = op.get_attr('data_format')
32 | deformable_group = op.get_attr('deformable_group')
33 |
34 | # compute gradient
35 | data_grad = deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, deformable_group, padding, data_format)
36 |
37 | return data_grad # List of one Tensor, since we have one input
--------------------------------------------------------------------------------
/lib/deform_conv_layer/deform_conv_test_mx.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import mxnet as mx
4 | import numpy as np
5 |
6 | gpu_device=mx.gpu()
7 | cpu_device=mx.cpu()
8 |
9 | # trans = np.random.rand(1,2,2,2)
10 |
11 | if not os.path.isfile('test.npz'):
12 | with open("test.npz", 'wb') as f:
13 | arr=np.random.random((8, 6, 4, 5))
14 | np.save(f, arr)
15 | else:
16 | with open("test.npz", 'rb') as f:
17 | arr = np.load(f)
18 | kernel = mx.nd.array(np.ones((21,2,2,2)), ctx=gpu_device)
19 | trans = mx.nd.array(np.ones((8,8,2,2)), ctx=gpu_device)
20 | arr = mx.nd.array(arr, ctx=gpu_device)
21 | data_grad = mx.nd.zeros_like(arr)
22 | kernel_grad = mx.nd.zeros_like(kernel)
23 | trans_grad = mx.nd.zeros_like(trans)
24 |
25 | def main():
26 | data_var = mx.symbol.Variable('data')
27 | ker_var = mx.symbol.Variable('kernel')
28 | trans_var = mx.symbol.Variable('trans')
29 | res = mx.contrib.sym.DeformableConvolution(data=data_var, offset=trans_var, weight=ker_var,
30 | num_group=3, no_bias=True, kernel=[2,2], num_filter=21, stride=[2, 2])
31 | rua = res.bind(ctx=gpu_device, args={'data':arr, 'kernel':kernel, 'trans':trans}, args_grad={'data':data_grad, 'kernel':kernel_grad, 'trans':trans_grad})
32 | rua.forward(is_train=True)
33 | rua.backward(out_grads=mx.nd.ones((8,21,2,2)))
34 | # print(trans.asnumpy())
35 | # res_arr = rua.outputs[0].asnumpy()
36 | # print(res_arr)
37 | # print([a.asnumpy() for a in rua.grad_arrays])
38 | print(data_grad.asnumpy())
39 |
40 |
41 | if __name__ == '__main__':
42 | main()
--------------------------------------------------------------------------------
/lib/deform_conv_layer/deform_conv_util.h:
--------------------------------------------------------------------------------
1 | #include "tensorflow/core/util/tensor_format.h"
2 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
3 | #include "tensorflow/core/framework/tensor.h"
4 |
5 | namespace tensorflow {
6 | typedef std::vector TShape;
7 |
8 | inline int ProdShape(const TensorShape &shape, int start) {
9 | int64 res = 1;
10 | for(int i=start; i ToVector(const TensorShape &shape) {
17 | // int64 res = 1;
18 | std::vector res;
19 | for(int i=0; i
20 | struct DeformPSROIPoolForwardLauncher {
21 | bool operator()(
22 | const DType* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height,
23 | const int width, const int pooled_height, const int pooled_width, const DType* bottom_rois, const DType* bottom_trans,
24 | const bool no_trans, const float trans_std, const int sample_per_part, const int output_dim, const int num_classes,
25 | const int group_size, const int part_size, DType* top_data, DType* mapping_channel, const Eigen::GpuDevice& d);
26 | };
27 |
28 | template
29 | struct DeformPSROIPoolBackwardLauncher {
30 | bool operator() (const DType* top_diff, const DType* mapping_channel, const int num_rois, const float spatial_scale,
31 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width,
32 | const int output_dim, DType* bottom_data_diff, DType* bottom_trans_diff, const DType* bottom_data,
33 | const DType* bottom_rois, const DType* bottom_trans, const bool no_trans, const float trans_std,
34 | const int sample_per_part, const int group_size, const int part_size,
35 | const int num_classes, const int channels_each_class, const Eigen::GpuDevice& d);
36 | };
37 |
38 | template
39 | struct setZero {
40 | void operator() (const Device& d, const int n, DType* result_data);
41 | };
42 |
43 | } // namespace tensorflow
44 |
45 | #endif // TENSORFLOW_USER_OPS_DEFORMPSROIPOOLING_OP_GPU_H_
--------------------------------------------------------------------------------
/lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_grad.py:
--------------------------------------------------------------------------------
1 | # from __future__ import absolute_import
2 | # import tensorflow as tf
3 | # from tensorflow.python.framework import ops
4 | # import deform_psroi_pooling_op
5 | # import pdb
6 |
7 |
8 | # # @tf.RegisterShape("DeformPSROIPool")
9 | # # def _deform_psroi_pool_shape(op):
10 | # # """Shape function for the DeformPSROIPool op.
11 |
12 | # # """
13 | # # dims_data = op.inputs[0].get_shape().as_list()
14 | # # channels = dims_data[3]
15 | # # dims_rois = op.inputs[1].get_shape().as_list()
16 | # # num_rois = dims_rois[0]
17 | # # output_dim = op.get_attr('output_dim')
18 | # # group_size = op.get_attr('group_size')
19 | # # pooled_height = group_size
20 | # # pooled_width = group_size
21 |
22 | # # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim])
23 | # # return [output_shape, output_shape]
24 |
25 | # @ops.RegisterGradient("DeformPSROIPool")
26 | # def _deform_psroi_pool_grad(op, grad, _):
27 | # """The gradients for `Deform_PSROI_pool`.
28 | # Args:
29 | # op: The `roi_pool` `Operation` that we are differentiating, which we can use
30 | # to find the inputs and outputs of the original op.
31 | # grad: Gradient with respect to the output of the `roi_pool` op.
32 | # Returns:
33 | # Gradients with respect to the input of `zero_out`.
34 | # """
35 |
36 |
37 | # data = op.inputs[0]
38 | # rois = op.inputs[1]
39 | # trans = op.inputs[2]
40 | # mapping_channel = op.outputs[1]
41 | # spatial_scale = op.get_attr('spatial_scale')
42 | # output_dim = op.get_attr('output_dim')
43 | # group_size = op.get_attr('group_size')
44 | # pooled_size = op.get_attr('pooled_size')
45 | # part_size = op.get_attr('part_size')
46 | # sample_per_part = op.get_attr('sample_per_part')
47 | # trans_std = op.get_attr('trans_std')
48 | # no_trans = op.get_attr('no_trans')
49 |
50 |
51 |
52 | # # compute gradient
53 | # #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
54 | # data_grad, trans_grad = deform_psroi_pooling_op.deform_psroi_pool_grad(data, rois, trans, mapping_channel, grad, spatial_scale,
55 | # output_dim, group_size, pooled_size, part_size, sample_per_part,
56 | # trans_std, no_trans)
57 | # # rois_grad = tf.zeros(rois.shape)
58 | # return [data_grad, None, trans_grad] # List of one Tensor, since we have one input
59 |
60 |
--------------------------------------------------------------------------------
/lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import tensorflow as tf
3 | import numpy as np
4 | import deform_psroi_pooling_op
5 | import deform_psroi_pooling_op_grad
6 | import pdb
7 |
8 | # pdb.set_trace()
9 | data_arr = np.random.rand(1,25,5,5)
10 | # roi = np.array([[0, 0, 0, 4, 4]],dtype=np.float32)
11 | trans_arr = np.random.rand(1,2,2,2)
12 |
13 | # with open("data.npz", 'rb') as f:
14 | # data_arr = np.load(f)
15 | # with open("trans.npz", 'rb') as f:
16 | # trans_arr = np.load(f)
17 |
18 |
19 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32)
20 | trans = tf.convert_to_tensor(trans_arr, dtype=tf.float32)
21 | hh=tf.convert_to_tensor(data_arr,dtype=tf.float32)
22 | [y2, channels] = deform_psroi_pooling_op.deform_psroi_pool(hh, rois, trans=trans, pooled_size=2, output_dim=1, group_size=1, spatial_scale=1.0,
23 | trans_std=1e-1, sample_per_part=1, part_size=2, no_trans=False)
24 | s = tf.gradients(y2, [hh, trans])
25 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
26 | # sess.run(s[0])
27 | # print( sess.run(trans))
28 | # print( sess.run(y2))
29 | print( sess.run(s[1]))
30 | # print( sess.run(s[1]))
31 | # pdb.set_trace()
32 |
--------------------------------------------------------------------------------
/lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test_mx.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import mxnet as mx
3 | import numpy as np
4 |
5 | gpu_device=mx.gpu()
6 | # data = np.random.rand(1,25,5,5)
7 | roi = mx.nd.array(np.array([[0, 0, 0, 4, 4]],dtype=np.float32), ctx=gpu_device)
8 | # trans = np.random.rand(1,2,2,2)
9 |
10 | with open("data.npz", 'rb') as f:
11 | data = mx.nd.array(np.load(f), ctx=gpu_device)
12 | with open("trans.npz", 'rb') as f:
13 | trans = mx.nd.array(np.load(f), ctx=gpu_device)
14 |
15 | data_grad = mx.nd.zeros_like(data)
16 | roi_grad = mx.nd.zeros_like(roi)
17 | trans_grad = mx.nd.zeros_like(trans)
18 |
19 | def main():
20 | data_var = mx.symbol.Variable('data')
21 | roi_var = mx.symbol.Variable('roi')
22 | trans_var = mx.symbol.Variable('trans')
23 | res = mx.contrib.sym.DeformablePSROIPooling(data=data_var, rois=roi_var, trans=trans_var, group_size=1, pooled_size=2,
24 | output_dim=1, no_trans=False, part_size=2, sample_per_part=1, spatial_scale=1., trans_std=0.1)
25 | rua = res.bind(ctx=gpu_device, args={'data':data, 'roi':roi, 'trans':trans}, args_grad={'data':data_grad, 'roi':roi_grad, 'trans':trans_grad})
26 | rua.forward(is_train=True)
27 | rua.backward(out_grads=mx.nd.ones((1, 1, 2, 2)))
28 | # print(trans.asnumpy())
29 | # res_arr = rua.outputs[0].asnumpy()
30 | # print(res_arr)
31 | # print([a.asnumpy() for a in rua.grad_arrays])
32 | print(trans_grad.asnumpy())
33 |
34 |
35 | if __name__ == '__main__':
36 | main()
--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from . import config
9 | from . import train
10 | from . import test
11 | from . import nms_wrapper
12 | # from nms_wrapper import nms
--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import warnings
10 |
11 | def bbox_transform(ex_rois, gt_rois):
12 | """
13 | computes the distance from ground-truth boxes to the given boxes, normed by their size
14 | :param ex_rois: n * 4 numpy array, given boxes
15 | :param gt_rois: n * 4 numpy array, ground-truth boxes
16 | :return: deltas: n * 4 numpy array, ground-truth boxes
17 | """
18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
22 |
23 | assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
24 | 'Invalid boxes found: {} {}'. \
25 | format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])
26 |
27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
31 |
32 | # warnings.catch_warnings()
33 | # warnings.filterwarnings('error')
34 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
35 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
36 | targets_dw = np.log(gt_widths / ex_widths)
37 | targets_dh = np.log(gt_heights / ex_heights)
38 |
39 | targets = np.vstack(
40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
41 | return targets
42 |
43 | def bbox_transform_inv(boxes, deltas):
44 | if boxes.shape[0] == 0:
45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
46 |
47 | boxes = boxes.astype(deltas.dtype, copy=False)
48 |
49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
51 | ctr_x = boxes[:, 0] + 0.5 * widths
52 | ctr_y = boxes[:, 1] + 0.5 * heights
53 |
54 | dx = deltas[:, 0::4]
55 | dy = deltas[:, 1::4]
56 | dw = deltas[:, 2::4]
57 | dh = deltas[:, 3::4]
58 |
59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 | pred_w = np.exp(dw) * widths[:, np.newaxis]
62 | pred_h = np.exp(dh) * heights[:, np.newaxis]
63 |
64 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
65 | # x1
66 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
67 | # y1
68 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
69 | # x2
70 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
71 | # y2
72 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
73 |
74 | return pred_boxes
75 |
76 | def clip_boxes(boxes, im_shape):
77 | """
78 | Clip boxes to image boundaries.
79 | """
80 |
81 | # x1 >= 0
82 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
83 | # y1 >= 0
84 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
85 | # x2 < im_shape[1]
86 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
87 | # y2 < im_shape[0]
88 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
89 | return boxes
90 |
--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | from .config import cfg
10 | from ..nms.gpu_nms import gpu_nms
11 | from ..nms.cpu_nms import cpu_nms
12 |
13 | def nms(dets, thresh, force_cpu=False):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS and not force_cpu:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None):
24 | """
25 | post-process the results of im_detect
26 | :param scores: N * (K * 4) numpy
27 | :param boxes: N * K numpy
28 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus')
29 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]}
30 | """
31 | num_class = scores.shape[1] if class_sets is None else len(class_sets)
32 | assert num_class * 4 == boxes.shape[1],\
33 | 'Detection scores and boxes dont match'
34 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets
35 |
36 | res = []
37 | for ind, cls in enumerate(class_sets[1:]):
38 | ind += 1 # skip background
39 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)]
40 | cls_scores = scores[:, ind]
41 | # if ind == 1:
42 | print(np.max(cls_scores))
43 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
44 | keep = nms(dets, thresh=0.3)
45 | dets = dets[keep, :]
46 | # if ind == 1:
47 | print(np.max(dets[:, 4]))
48 | dets = dets[np.where(dets[:, 4] > threshold)]
49 | r = {}
50 | if dets.shape[0] > 0:
51 | r['class'], r['dets'] = cls, dets
52 | else:
53 | r['class'], r['dets'] = cls, None
54 | res.append(r)
55 | return res
--------------------------------------------------------------------------------
/lib/gt_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from . import roidb
8 | # from layer import GtDataLayer
--------------------------------------------------------------------------------
/lib/gt_data_layer/layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """The data layer used during training to train a Fast R-CNN network.
9 |
10 | GtDataLayer implements a Caffe Python layer.
11 | """
12 | # TODO: make caffe irrelevant, or remove caffe backend from this projcet
13 | import caffe
14 |
15 | import numpy as np
16 | import yaml
17 | from multiprocessing import Process, Queue
18 |
19 | from .minibatch import get_minibatch
20 |
21 | # TODO: make fast_rcnn irrelevant
22 | # >>>> obsolete, because it depends on sth outside of this project
23 | from ..fast_rcnn.config import cfg
24 | # <<<< obsolete
25 |
26 | class GtDataLayer(caffe.Layer):
27 | """Fast R-CNN data layer used for training."""
28 |
29 | def _shuffle_roidb_inds(self):
30 | """Randomly permute the training roidb."""
31 | self._perm = np.random.permutation(np.arange(len(self._roidb)))
32 | self._cur = 0
33 |
34 | def _get_next_minibatch_inds(self):
35 | """Return the roidb indices for the next minibatch."""
36 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
37 | self._shuffle_roidb_inds()
38 |
39 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
40 | self._cur += cfg.TRAIN.IMS_PER_BATCH
41 |
42 | """
43 | # sample images with gt objects
44 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
45 | i = 0
46 | while (i < cfg.TRAIN.IMS_PER_BATCH):
47 | ind = self._perm[self._cur]
48 | num_objs = self._roidb[ind]['boxes'].shape[0]
49 | if num_objs != 0:
50 | db_inds[i] = ind
51 | i += 1
52 |
53 | self._cur += 1
54 | if self._cur >= len(self._roidb):
55 | self._shuffle_roidb_inds()
56 | """
57 |
58 | return db_inds
59 |
60 | def _get_next_minibatch(self):
61 | """Return the blobs to be used for the next minibatch."""
62 | db_inds = self._get_next_minibatch_inds()
63 | minibatch_db = [self._roidb[i] for i in db_inds]
64 | return get_minibatch(minibatch_db, self._num_classes)
65 |
66 | # this function is called in training the net
67 | def set_roidb(self, roidb):
68 | """Set the roidb to be used by this layer during training."""
69 | self._roidb = roidb
70 | self._shuffle_roidb_inds()
71 |
72 | def setup(self, bottom, top):
73 | """Setup the GtDataLayer."""
74 |
75 | # parse the layer parameter string, which must be valid YAML
76 | layer_params = yaml.load(self.param_str_)
77 |
78 | self._num_classes = layer_params['num_classes']
79 |
80 | self._name_to_top_map = {
81 | 'data': 0,
82 | 'info_boxes': 1,
83 | 'parameters': 2}
84 |
85 | # data blob: holds a batch of N images, each with 3 channels
86 | # The height and width (100 x 100) are dummy values
87 | num_scale_base = len(cfg.TRAIN.SCALES_BASE)
88 | top[0].reshape(num_scale_base, 3, 100, 100)
89 |
90 | # info boxes blob
91 | top[1].reshape(1, 18)
92 |
93 | # parameters blob
94 | num_scale = len(cfg.TRAIN.SCALES)
95 | num_aspect = len(cfg.TRAIN.ASPECTS)
96 | top[2].reshape(2 + 2*num_scale + 2*num_aspect)
97 |
98 | def forward(self, bottom, top):
99 | """Get blobs and copy them into this layer's top blob vector."""
100 | blobs = self._get_next_minibatch()
101 |
102 | for blob_name, blob in blobs.items():
103 | top_ind = self._name_to_top_map[blob_name]
104 | # Reshape net's input blobs
105 | top[top_ind].reshape(*(blob.shape))
106 | # Copy data into net's input blobs
107 | top[top_ind].data[...] = blob.astype(np.float32, copy=False)
108 |
109 | def backward(self, top, propagate_down, bottom):
110 | """This layer does not propagate gradients."""
111 | pass
112 |
113 | def reshape(self, bottom, top):
114 | """Reshaping happens during the call to forward."""
115 | pass
116 |
--------------------------------------------------------------------------------
/lib/gt_data_layer/minibatch.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Compute minibatch blobs for training a Fast R-CNN network."""
9 |
10 | import numpy as np
11 | import numpy.random as npr
12 | import cv2
13 |
14 | from ..utils.blob import prep_im_for_blob, im_list_to_blob
15 |
16 | # TODO: make fast_rcnn irrelevant
17 | # >>>> obsolete, because it depends on sth outside of this project
18 | from ..fast_rcnn.config import cfg
19 | # <<<< obsolete
20 |
21 | def get_minibatch(roidb, num_classes):
22 | """Given a roidb, construct a minibatch sampled from it."""
23 | num_images = len(roidb)
24 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
25 | 'num_images ({}) must divide BATCH_SIZE ({})'. \
26 | format(num_images, cfg.TRAIN.BATCH_SIZE)
27 |
28 | # Get the input image blob, formatted for caffe
29 | im_blob = _get_image_blob(roidb)
30 |
31 | # build the box information blob
32 | info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
33 | num_scale = len(cfg.TRAIN.SCALES)
34 | for i in range(num_images):
35 | info_boxes = roidb[i]['info_boxes']
36 |
37 | # change the batch index
38 | info_boxes[:,2] += i * num_scale
39 | info_boxes[:,7] += i * num_scale
40 |
41 | info_boxes_blob = np.vstack((info_boxes_blob, info_boxes))
42 |
43 | # build the parameter blob
44 | num_aspect = len(cfg.TRAIN.ASPECTS)
45 | num = 2 + 2 * num_scale + 2 * num_aspect
46 | parameters_blob = np.zeros((num), dtype=np.float32)
47 | parameters_blob[0] = num_scale
48 | parameters_blob[1] = num_aspect
49 | parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES
50 | parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING
51 | parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS
52 | parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS
53 |
54 | # For debug visualizations
55 | # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
56 |
57 | blobs = {'data': im_blob,
58 | 'info_boxes': info_boxes_blob,
59 | 'parameters': parameters_blob}
60 |
61 | return blobs
62 |
63 | def _get_image_blob(roidb):
64 | """Builds an input blob from the images in the roidb at the different scales.
65 | """
66 | num_images = len(roidb)
67 | processed_ims = []
68 |
69 | for i in range(num_images):
70 | # read image
71 | im = cv2.imread(roidb[i]['image'])
72 | if roidb[i]['flipped']:
73 | im = im[:, ::-1, :]
74 |
75 | im_orig = im.astype(np.float32, copy=True)
76 | im_orig -= cfg.PIXEL_MEANS
77 |
78 | # build image pyramid
79 | for im_scale in cfg.TRAIN.SCALES_BASE:
80 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
81 | interpolation=cv2.INTER_LINEAR)
82 |
83 | processed_ims.append(im)
84 |
85 | # Create a blob to hold the input images
86 | blob = im_list_to_blob(processed_ims)
87 |
88 | return blob
89 |
90 | def _project_im_rois(im_rois, im_scale_factor):
91 | """Project image RoIs into the rescaled training image."""
92 | rois = im_rois * im_scale_factor
93 | return rois
94 |
95 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
96 | """Bounding-box regression targets are stored in a compact form in the
97 | roidb.
98 |
99 | This function expands those targets into the 4-of-4*K representation used
100 | by the network (i.e. only one class has non-zero targets). The loss weights
101 | are similarly expanded.
102 |
103 | Returns:
104 | bbox_target_data (ndarray): N x 4K blob of regression targets
105 | bbox_loss_weights (ndarray): N x 4K blob of loss weights
106 | """
107 | clss = bbox_target_data[:, 0]
108 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
109 | bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
110 | inds = np.where(clss > 0)[0]
111 | for ind in inds:
112 | cls = clss[ind]
113 | start = 4 * cls
114 | end = start + 4
115 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
116 | bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
117 | return bbox_targets, bbox_loss_weights
118 |
119 |
120 | def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
121 | """Visualize a mini-batch for debugging."""
122 | import matplotlib.pyplot as plt
123 | for i in range(rois_blob.shape[0]):
124 | rois = rois_blob[i, :]
125 | im_ind = rois[0]
126 | roi = rois[2:]
127 | im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
128 | im += cfg.PIXEL_MEANS
129 | im = im[:, :, (2, 1, 0)]
130 | im = im.astype(np.uint8)
131 | cls = labels_blob[i]
132 | subcls = sublabels_blob[i]
133 | plt.imshow(im)
134 | print('class: ', cls, ' subclass: ', subcls)
135 | plt.gca().add_patch(
136 | plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
137 | roi[3] - roi[1], fill=False,
138 | edgecolor='r', linewidth=3)
139 | )
140 | plt.show()
141 |
--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
3 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
4 | NSYNC_INC=$TF_INC"/external/nsync/public"
5 | # please modify $ARCH according to the following list and your gpu model.
6 | ARCH=sm_60
7 | echo $TF_INC
8 |
9 |
10 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly.
11 |
12 | # Which CUDA capabilities do we want to pre-build for?
13 | # https://developer.nvidia.com/cuda-gpus
14 | # Compute/shader model Cards
15 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61
16 | # 6.0 P100 so CUDA_MODEL = 60
17 | # 5.2 M40
18 | # 3.7 K80
19 | # 3.5 K40, K20
20 | # 3.0 K10, Grid K520 (AWS G2)
21 | # Other Nvidia shader models should work, but they will require extra startup
22 | # time as the code is pre-optimized for them.
23 | # CUDA_MODELS=30 35 37 52 60 61
24 |
25 |
26 |
27 | CUDA_HOME=/usr/local/cuda/
28 |
29 | if [ ! -f $TF_INC/tensorflow/stream_executor/cuda/cuda_config.h ]; then
30 | cp ./cuda_config.h $TF_INC/tensorflow/stream_executor/cuda/
31 | fi
32 |
33 | cd roi_pooling_layer
34 |
35 | #nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \
36 | # -I $TF_INC -D GOOGLE_CUDA=1 -L $CUDA_HOME/lib64 -x cu -Xcompiler -fPIC -arch=$ARCH --expt-relaxed-constexpr
37 |
38 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below
39 | #g++-4.9 -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
40 | # roi_pooling_op.cu.o -I TF_INC -fPIC -lcudart -L CUDA_HOME/lib64 -D GOOGLE_CUDA=1 -Wfatal-errors -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0
41 |
42 | # for gcc5-built tf
43 | # g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
44 | # roi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0
45 | cd ..
46 |
47 |
48 | # add building psroi_pooling layer
49 | cd psroi_pooling_layer
50 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o psroi_pooling_op.cu.o psroi_pooling_op_gpu.cu.cc \
51 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr
52 |
53 |
54 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below
55 | g++-4.9 -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc psroi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart \
56 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0
57 | # for gcc5-built tf
58 | #g++ -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc \
59 | # psroi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0
60 |
61 | cd ..
62 |
63 | cd deform_psroi_pooling_layer
64 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o deform_psroi_pooling_op.cu.o deform_psroi_pooling_op_gpu.cu.cc \
65 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr -L $TF_LIB -ltensorflow_framework
66 |
67 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below
68 | g++-4.9 -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart \
69 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0
70 | # for gcc5-built tf
71 | #g++ -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o \
72 | # -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 -L $TF_LIB -ltensorflow_framework
73 | cd ..
74 |
75 | cd deform_conv_layer
76 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o deform_conv.cu.o deform_conv.cu.cc \
77 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ -arch=$ARCH --expt-relaxed-constexpr
78 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below
79 | g++-4.9 -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o -I $TF_INC -fPIC -lcudart \
80 | -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 -Wfatal-errors -L $TF_LIB -I $CUDA_HOME/include -D_GLIBCXX_USE_CXX11_ABI=0
81 | # for gcc5-built tf
82 | #g++ -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o \
83 | # -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0
84 |
85 | cd ..
86 |
87 |
--------------------------------------------------------------------------------
/lib/networks/.VGGnet.py.swo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/networks/.VGGnet.py.swo
--------------------------------------------------------------------------------
/lib/networks/VGGnet_test.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from .network import Network
3 | from ..fast_rcnn.config import cfg
4 |
5 |
6 | class VGGnet_test(Network):
7 | def __init__(self, trainable=True):
8 | self.inputs = []
9 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
10 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3])
11 | self.keep_prob = tf.placeholder(tf.float32)
12 | self.layers = dict({'data': self.data, 'im_info': self.im_info})
13 | self.trainable = trainable
14 | self.setup()
15 |
16 | def setup(self):
17 | # n_classes = 21
18 | n_classes = cfg.NCLASSES
19 | # anchor_scales = [8, 16, 32]
20 | anchor_scales = cfg.ANCHOR_SCALES
21 | _feat_stride = [16, ]
22 |
23 | (self.feed('data')
24 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False)
25 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False)
26 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
27 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False)
28 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False)
29 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
30 | .conv(3, 3, 256, 1, 1, name='conv3_1')
31 | .conv(3, 3, 256, 1, 1, name='conv3_2')
32 | .conv(3, 3, 256, 1, 1, name='conv3_3')
33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
34 | .conv(3, 3, 512, 1, 1, name='conv4_1')
35 | .conv(3, 3, 512, 1, 1, name='conv4_2')
36 | .conv(3, 3, 512, 1, 1, name='conv4_3')
37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
38 | .conv(3, 3, 512, 1, 1, name='conv5_1')
39 | .conv(3, 3, 512, 1, 1, name='conv5_2')
40 | .conv(3, 3, 512, 1, 1, name='conv5_3'))
41 |
42 | (self.feed('conv5_3')
43 | .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')
44 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score'))
45 |
46 | (self.feed('rpn_conv/3x3')
47 | .conv(1, 1, len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu=False, name='rpn_bbox_pred'))
48 |
49 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2)
50 | (self.feed('rpn_cls_score')
51 | .spatial_reshape_layer(2, name='rpn_cls_score_reshape')
52 | .spatial_softmax(name='rpn_cls_prob'))
53 |
54 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2)
55 | (self.feed('rpn_cls_prob')
56 | .spatial_reshape_layer(len(anchor_scales) * 3 * 2, name='rpn_cls_prob_reshape'))
57 |
58 | (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info')
59 | .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois'))
60 |
61 | (self.feed('conv5_3')
62 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros'))
63 | (self.feed('conv5_3', 'conv6_1_offset')
64 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1'))
65 | (self.feed('conv6_1')
66 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros'))
67 | (self.feed('conv6_1', 'conv6_2_offset')
68 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2'))
69 | (self.feed('conv6_2', 'rois')
70 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t')
71 | .fc(num_out=7 * 7 * 2, name='offset', relu=False)
72 | .reshape(shape=(-1,2,7,7), name='offset_reshape'))
73 | (self.feed('conv6_2', 'rois', 'offset_reshape')
74 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6')
75 | .fc(4096, name='fc6')
76 | .dropout(0.5, name='drop6')
77 | .fc(4096, name='fc7')
78 | .dropout(0.5, name='drop7')
79 | .fc(n_classes, relu=False, name='cls_score')
80 | .softmax(name='cls_prob'))
81 |
82 | (self.feed('drop7')
83 | .fc(n_classes*4, relu=False, name='bbox_pred'))
84 |
85 |
--------------------------------------------------------------------------------
/lib/networks/VGGnet_train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from .network import Network
3 | from ..fast_rcnn.config import cfg
4 |
5 | class VGGnet_train(Network):
6 | def __init__(self, trainable=True):
7 | self.inputs = []
8 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data')
9 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info')
10 | self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes')
11 | self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard')
12 | self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas')
13 | self.keep_prob = tf.placeholder(tf.float32)
14 | self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\
15 | 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas})
16 | self.trainable = trainable
17 | self.setup()
18 |
19 | def setup(self):
20 |
21 | # n_classes = 21
22 | n_classes = cfg.NCLASSES
23 | # anchor_scales = [8, 16, 32]
24 | anchor_scales = cfg.ANCHOR_SCALES
25 | _feat_stride = [16, ]
26 |
27 | (self.feed('data')
28 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False)
29 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False)
30 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
31 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False)
32 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False)
33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
34 | .conv(3, 3, 256, 1, 1, name='conv3_1')
35 | .conv(3, 3, 256, 1, 1, name='conv3_2')
36 | .conv(3, 3, 256, 1, 1, name='conv3_3')
37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
38 | .conv(3, 3, 512, 1, 1, name='conv4_1')
39 | .conv(3, 3, 512, 1, 1, name='conv4_2')
40 | .conv(3, 3, 512, 1, 1, name='conv4_3')
41 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
42 | .conv(3, 3, 512, 1, 1, name='conv5_1')
43 | .conv(3, 3, 512, 1, 1, name='conv5_2')
44 | .conv(3, 3, 512, 1, 1, name='conv5_3'))
45 | #========= RPN ============
46 | (self.feed('conv5_3')
47 | .conv(3,3,512,1,1,name='rpn_conv/3x3'))
48 |
49 | # Loss of rpn_cls & rpn_boxes
50 | # shape is (1, H, W, A x 4) and (1, H, W, A x 2)
51 | (self.feed('rpn_conv/3x3')
52 | .conv(1,1,len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred'))
53 | (self.feed('rpn_conv/3x3')
54 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score'))
55 |
56 | # generating training labels on the fly
57 | # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights
58 | (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info')
59 | .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' ))
60 |
61 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2)
62 | (self.feed('rpn_cls_score')
63 | .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape')
64 | .spatial_softmax(name='rpn_cls_prob'))
65 |
66 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2)
67 | (self.feed('rpn_cls_prob')
68 | .spatial_reshape_layer(len(anchor_scales)*3*2, name = 'rpn_cls_prob_reshape'))
69 |
70 | # ========= RoI Proposal ============
71 | # add the delta(output) to anchors then
72 | # choose some reasonabel boxes, considering scores, ratios, size and iou
73 | # rpn_rois <- (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
74 | (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info')
75 | .proposal_layer(_feat_stride, anchor_scales, 'TRAIN', name = 'rpn_rois'))
76 |
77 | # matching boxes and groundtruth,
78 | # and randomly sample some rois and labels for RCNN
79 | (self.feed('rpn_rois','gt_boxes', 'gt_ishard', 'dontcare_areas')
80 | .proposal_target_layer(n_classes,name = 'roi-data'))
81 |
82 | #========= RCNN ============
83 | (self.feed('conv5_3')
84 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros'))
85 | (self.feed('conv5_3', 'conv6_1_offset')
86 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1'))
87 | (self.feed('conv6_1')
88 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros'))
89 | (self.feed('conv6_1', 'conv6_2_offset')
90 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2'))
91 | (self.feed('conv6_2', 'rois')
92 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t')
93 | .fc(num_out=7 * 7 * 2, name='offset', relu=False)
94 | .reshape(shape=(-1,2,7,7), name='offset_reshape'))
95 | (self.feed('conv6_2', 'rois', 'offset_reshape')
96 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6')
97 | .fc(4096, name='fc6')
98 | .dropout(0.5, name='drop6')
99 | .fc(4096, name='fc7')
100 | .dropout(0.5, name='drop7')
101 | .fc(n_classes, relu=False, name='cls_score')
102 | .softmax(name='cls_prob'))
103 |
104 | (self.feed('drop7')
105 | .fc(n_classes*4, relu=False, name='bbox_pred'))
106 |
--------------------------------------------------------------------------------
/lib/networks/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from .VGGnet_train import VGGnet_train
9 | from .VGGnet_test import VGGnet_test
10 | from .Resnet50_train import Resnet50_train
11 | from .Resnet50_test import Resnet50_test
12 | from .Resnet101_train import Resnet101_train
13 | from .Resnet101_test import Resnet101_test
14 | from . import factory
15 |
--------------------------------------------------------------------------------
/lib/networks/caffenet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from .network import Network
3 |
4 | class caffenet(Network):
5 | def __init__(self, trainable=True):
6 | self.inputs = []
7 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
8 | self.rois = tf.placeholder(tf.float32, shape=[None, 5])
9 | self.keep_prob = tf.placeholder(tf.float32)
10 | self.layers = dict({'data':self.data, 'rois':self.rois})
11 | self.trainable = trainable
12 | self.setup()
13 |
14 | def setup(self):
15 | (self.feed('data')
16 | .conv(11, 11, 96, 4, 4, padding='VALID', name='conv1', trainable=False)
17 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
18 | .lrn(2, 2e-05, 0.75, name='norm1')
19 | .conv(5, 5, 256, 1, 1, group=2, name='conv2')
20 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
21 | .lrn(2, 2e-05, 0.75, name='norm2')
22 | .conv(3, 3, 384, 1, 1, name='conv3')
23 | .conv(3, 3, 384, 1, 1, group=2, name='conv4')
24 | .conv(3, 3, 256, 1, 1, group=2, name='conv5')
25 | .feature_extrapolating([1.0, 2.0, 3.0, 4.0], 4, 4, name='conv5_feature'))
26 |
27 | (self.feed('conv5_feature','im_info')
28 | .conv(3,3,)
29 |
30 | (self.feed('conv5_feature', 'rois')
31 | .roi_pool(6, 6, 1.0/16, name='pool5')
32 | .fc(4096, name='fc6')
33 | .dropout(self.keep_prob, name='drop6')
34 | .fc(4096, name='fc7')
35 | .dropout(self.keep_prob, name='drop7')
36 | .fc(174, relu=False, name='subcls_score')
37 | .softmax(name='subcls_prob'))
38 |
39 | (self.feed('subcls_score')
40 | .fc(4, relu=False, name='cls_score')
41 | .softmax(name='cls_prob'))
42 |
43 | (self.feed('subcls_score')
44 | .fc(16, relu=False, name='bbox_pred')))
45 |
--------------------------------------------------------------------------------
/lib/networks/factory.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SubCNN_TF
3 | # Copyright (c) 2016 CVGL Stanford
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Yu Xiang
6 | # --------------------------------------------------------
7 |
8 | """Factory method for easily getting imdbs by name."""
9 |
10 | __sets = {}
11 |
12 | from .VGGnet_test import VGGnet_test
13 | from .VGGnet_testold import VGGnet_testold
14 | from .VGGnet_train import VGGnet_train
15 | from .Resnet50_test import Resnet50_test
16 | from .Resnet50_train import Resnet50_train
17 | from .Resnet101_test import Resnet101_test
18 | from .Resnet101_train import Resnet101_train
19 | from .PVAnet_train import PVAnet_train
20 | from .PVAnet_test import PVAnet_test
21 |
22 |
23 | def get_network(name):
24 | """Get a network by name."""
25 | if name.split('_')[0] == 'VGGnet':
26 | if name.split('_')[1] == 'test':
27 | return VGGnet_test()
28 | elif name.split('_')[1] == 'train':
29 | return VGGnet_train()
30 | elif name.split('_')[1] == 'testold':
31 | return VGGnet_testold()
32 | else:
33 | raise KeyError('Unknown dataset: {}'.format(name))
34 | elif name.split('_')[0] == 'Resnet50':
35 | if name.split('_')[1] == 'test':
36 | return Resnet50_test()
37 | elif name.split('_')[1] == 'train':
38 | return Resnet50_train()
39 | else:
40 | raise KeyError('Unknown dataset: {}'.format(name))
41 | elif name.split('_')[0] == 'Resnet101':
42 | if name.split('_')[1] == 'test':
43 | return Resnet101_test()
44 | elif name.split('_')[1] == 'train':
45 | return Resnet101_train()
46 | else:
47 | raise KeyError('Unknown dataset: {}'.format(name))
48 | elif name.split('_')[0] == 'PVAnet':
49 | if name.split('_')[1] == 'test':
50 | return PVAnet_test()
51 | elif name.split('_')[1] == 'train':
52 | return PVAnet_train()
53 | else:
54 | raise KeyError('Unknown dataset: {}'.format(name))
55 | else:
56 | raise KeyError('Unknown dataset: {}'.format(name))
57 |
58 | def list_networks():
59 | """List all registered imdbs."""
60 | return list(__sets.keys())
61 |
--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/nms/__init__.py
--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/lib/psroi_pooling_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # R-FCN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Revised by Minyue Jiang
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/psroi_pooling_layer/psroi_pooling_op.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 |
4 | filename = osp.join(osp.dirname(__file__), 'psroi_pooling.so')
5 | _psroi_pooling_module = tf.load_op_library(filename)
6 | psroi_pool = _psroi_pooling_module.psroi_pool
7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad
--------------------------------------------------------------------------------
/lib/psroi_pooling_layer/psroi_pooling_op_gpu.h:
--------------------------------------------------------------------------------
1 | #if !GOOGLE_CUDA
2 | #error This file must only be included when building with Cuda support
3 | #endif
4 |
5 | #ifndef TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_
6 | #define TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_
7 |
8 | #define EIGEN_USE_GPU
9 |
10 | #include "tensorflow/core/framework/tensor_types.h"
11 | #include "tensorflow/core/platform/types.h"
12 |
13 | namespace tensorflow {
14 |
15 | // Run the forward pass of max pooling, optionally writing the argmax indices to
16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the
17 | // argmax indices are not written.
18 | bool PSROIPoolForwardLauncher(
19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height,
20 | const int width, const int pooled_height, const int pooled_width, const float* bottom_rois,
21 | const int output_dim, const int group_size, float* top_data, int* mapping_channel, const Eigen::GpuDevice& d);
22 |
23 | bool PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int num_rois, const float spatial_scale,
24 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width,
25 | const int output_dim, float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d);
26 |
27 | } // namespace tensorflow
28 |
29 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_
--------------------------------------------------------------------------------
/lib/psroi_pooling_layer/psroi_pooling_op_grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | from . import psroi_pooling_op
4 | import pdb
5 |
6 |
7 | @tf.RegisterShape("PSROIPool")
8 | def _psroi_pool_shape(op):
9 | """Shape function for the PSROIPool op.
10 |
11 | """
12 | dims_data = op.inputs[0].get_shape().as_list()
13 | channels = dims_data[3]
14 | dims_rois = op.inputs[1].get_shape().as_list()
15 | num_rois = dims_rois[0]
16 | output_dim = op.get_attr('output_dim')
17 | group_size = op.get_attr('group_size')
18 | pooled_height = group_size
19 | pooled_width = group_size
20 |
21 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim])
22 | return [output_shape, output_shape]
23 |
24 | @ops.RegisterGradient("PSROIPool")
25 | def _psroi_pool_grad(op, grad, _):
26 | """The gradients for `PSROI_pool`.
27 | Args:
28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use
29 | to find the inputs and outputs of the original op.
30 | grad: Gradient with respect to the output of the `roi_pool` op.
31 | Returns:
32 | Gradients with respect to the input of `zero_out`.
33 | """
34 |
35 | data = op.inputs[0]
36 | rois = op.inputs[1]
37 | mapping_channel = op.outputs[1]
38 | spatial_scale = op.get_attr('spatial_scale')
39 |
40 | # compute gradient
41 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
42 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale)
43 |
44 | return [data_grad, None] # List of one Tensor, since we have one input
45 |
46 |
--------------------------------------------------------------------------------
/lib/psroi_pooling_layer/psroi_pooling_op_test.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from . import psroi_pooling_op
4 | from . import psroi_pooling_op_grad
5 | import pdb
6 |
7 | pdb.set_trace()
8 |
9 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32)
10 | hh=tf.convert_to_tensor(np.random.rand(1,5,5,25),dtype=tf.float32)
11 | [y2, channels] = psroi_pooling_op.psroi_pool(hh, rois, output_dim=1, group_size=5, spatial_scale=1.0)
12 |
13 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
14 | print(( sess.run(hh)))
15 | print(( sess.run(y2)))
16 | pdb.set_trace()
17 |
--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
27 |
--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | from . import _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | encode = _mask.encode
77 | decode = _mask.decode
78 | iou = _mask.iou
79 | merge = _mask.merge
80 | area = _mask.area
81 | toBbox = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects
--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 | #include
9 |
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 |
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 |
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 |
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 |
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 |
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 |
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 |
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 |
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 |
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 |
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 |
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 |
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 |
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from . import roidb
--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """The data layer used during training to train a Fast R-CNN network.
9 |
10 | RoIDataLayer implements a Caffe Python layer.
11 | """
12 |
13 | import numpy as np
14 |
15 | # TODO: make fast_rcnn irrelevant
16 | # >>>> obsolete, because it depends on sth outside of this project
17 | from ..fast_rcnn.config import cfg
18 | # <<<< obsolete
19 | from ..roi_data_layer.minibatch import get_minibatch
20 |
21 | class RoIDataLayer(object):
22 | """Fast R-CNN data layer used for training."""
23 |
24 | def __init__(self, roidb, num_classes):
25 | """Set the roidb to be used by this layer during training."""
26 | self._roidb = roidb
27 | self._num_classes = num_classes
28 | self._shuffle_roidb_inds()
29 |
30 | def _shuffle_roidb_inds(self):
31 | """Randomly permute the training roidb."""
32 | self._perm = np.random.permutation(np.arange(len(self._roidb)))
33 | self._cur = 0
34 |
35 | def _get_next_minibatch_inds(self):
36 | """Return the roidb indices for the next minibatch."""
37 |
38 | if cfg.TRAIN.HAS_RPN:
39 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
40 | self._shuffle_roidb_inds()
41 |
42 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
43 | self._cur += cfg.TRAIN.IMS_PER_BATCH
44 | else:
45 | # sample images
46 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
47 | i = 0
48 | while (i < cfg.TRAIN.IMS_PER_BATCH):
49 | ind = self._perm[self._cur]
50 | num_objs = self._roidb[ind]['boxes'].shape[0]
51 | if num_objs != 0:
52 | db_inds[i] = ind
53 | i += 1
54 |
55 | self._cur += 1
56 | if self._cur >= len(self._roidb):
57 | self._shuffle_roidb_inds()
58 |
59 | return db_inds
60 |
61 | def _get_next_minibatch(self):
62 | """Return the blobs to be used for the next minibatch.
63 |
64 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
65 | separate process and made available through self._blob_queue.
66 | """
67 | db_inds = self._get_next_minibatch_inds()
68 | minibatch_db = [self._roidb[i] for i in db_inds]
69 | return get_minibatch(minibatch_db, self._num_classes)
70 |
71 | def forward(self):
72 | """Get blobs and copy them into this layer's top blob vector."""
73 | blobs = self._get_next_minibatch()
74 | return blobs
75 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
9 |
10 | import numpy as np
11 |
12 | import PIL
13 |
14 | # TODO: make fast_rcnn irrelevant
15 | # >>>> obsolete, because it depends on sth outside of this project
16 | from ..fast_rcnn.config import cfg
17 | from ..fast_rcnn.bbox_transform import bbox_transform
18 | # <<<< obsolete
19 | from ..utils.cython_bbox import bbox_overlaps
20 |
21 | def prepare_roidb(imdb):
22 | """Enrich the imdb's roidb by adding some derived quantities that
23 | are useful for training. This function precomputes the maximum
24 | overlap, taken over ground-truth boxes, between each ROI and
25 | each ground-truth box. The class with maximum overlap is also
26 | recorded.
27 | """
28 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size
29 | for i in range(imdb.num_images)]
30 | roidb = imdb.roidb
31 | for i in range(len(imdb.image_index)):
32 | roidb[i]['image'] = imdb.image_path_at(i)
33 | roidb[i]['width'] = sizes[i][0]
34 | roidb[i]['height'] = sizes[i][1]
35 | # need gt_overlaps as a dense array for argmax
36 | gt_overlaps = roidb[i]['gt_overlaps'].toarray()
37 | # max overlap with gt over classes (columns)
38 | max_overlaps = gt_overlaps.max(axis=1)
39 | # gt class that had the max overlap
40 | max_classes = gt_overlaps.argmax(axis=1)
41 | roidb[i]['max_classes'] = max_classes
42 | roidb[i]['max_overlaps'] = max_overlaps
43 | # sanity checks
44 | # max overlap of 0 => class should be zero (background)
45 | zero_inds = np.where(max_overlaps == 0)[0]
46 | assert all(max_classes[zero_inds] == 0)
47 | # max overlap > 0 => class should not be zero (must be a fg class)
48 | nonzero_inds = np.where(max_overlaps > 0)[0]
49 | assert all(max_classes[nonzero_inds] != 0)
50 |
51 | def add_bbox_regression_targets(roidb):
52 | """
53 | Add information needed to train bounding-box regressors.
54 | For each roi find the corresponding gt box, and compute the distance.
55 | then normalize the distance into Gaussian by minus mean and divided by std
56 | """
57 | assert len(roidb) > 0
58 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
59 |
60 | num_images = len(roidb)
61 | # Infer number of classes from the number of columns in gt_overlaps
62 | num_classes = roidb[0]['gt_overlaps'].shape[1]
63 | for im_i in range(num_images):
64 | rois = roidb[im_i]['boxes']
65 | max_overlaps = roidb[im_i]['max_overlaps']
66 | max_classes = roidb[im_i]['max_classes']
67 | roidb[im_i]['bbox_targets'] = \
68 | _compute_targets(rois, max_overlaps, max_classes)
69 |
70 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
71 | # Use fixed / precomputed "means" and "stds" instead of empirical values
72 | means = np.tile(
73 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
74 | stds = np.tile(
75 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
76 | else:
77 | # Compute values needed for means and stds
78 | # var(x) = E(x^2) - E(x)^2
79 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS
80 | sums = np.zeros((num_classes, 4))
81 | squared_sums = np.zeros((num_classes, 4))
82 | for im_i in range(num_images):
83 | targets = roidb[im_i]['bbox_targets']
84 | for cls in range(1, num_classes):
85 | cls_inds = np.where(targets[:, 0] == cls)[0]
86 | if cls_inds.size > 0:
87 | class_counts[cls] += cls_inds.size
88 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
89 | squared_sums[cls, :] += \
90 | (targets[cls_inds, 1:] ** 2).sum(axis=0)
91 |
92 | means = sums / class_counts
93 | stds = np.sqrt(squared_sums / class_counts - means ** 2)
94 | # too small number will cause nan error
95 | assert np.min(stds) < 0.01, \
96 | 'Boxes std is too small, std:{}'.format(stds)
97 |
98 | print('bbox target means:')
99 | print(means)
100 | print(means[1:, :].mean(axis=0)) # ignore bg class
101 | print('bbox target stdevs:')
102 | print(stds)
103 | print(stds[1:, :].mean(axis=0)) # ignore bg class
104 |
105 | # Normalize targets
106 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
107 | print("Normalizing targets")
108 | for im_i in range(num_images):
109 | targets = roidb[im_i]['bbox_targets']
110 | for cls in range(1, num_classes):
111 | cls_inds = np.where(targets[:, 0] == cls)[0]
112 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
113 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
114 | else:
115 | print("NOT normalizing targets")
116 |
117 | # These values will be needed for making predictions
118 | # (the predicts will need to be unnormalized and uncentered)
119 | return means.ravel(), stds.ravel()
120 |
121 | def _compute_targets(rois, overlaps, labels):
122 | """
123 | Compute bounding-box regression targets for an image.
124 | for each roi find the corresponding gt_box, then compute the distance.
125 | """
126 | # Indices of ground-truth ROIs
127 | gt_inds = np.where(overlaps == 1)[0]
128 | if len(gt_inds) == 0:
129 | # Bail if the image has no ground-truth ROIs
130 | return np.zeros((rois.shape[0], 5), dtype=np.float32)
131 | # Indices of examples for which we try to make predictions
132 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
133 |
134 | # Get IoU overlap between each ex ROI and gt ROI
135 | ex_gt_overlaps = bbox_overlaps(
136 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
137 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
138 |
139 | # Find which gt ROI each ex ROI has max overlap with:
140 | # this will be the ex ROI's gt target
141 | gt_assignment = ex_gt_overlaps.argmax(axis=1)
142 | gt_rois = rois[gt_inds[gt_assignment], :]
143 | ex_rois = rois[ex_inds, :]
144 |
145 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
146 | targets[ex_inds, 0] = labels[ex_inds]
147 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
148 | return targets
149 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb2.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
9 |
10 | import numpy as np
11 |
12 | # TODO: make fast_rcnn irrelevant
13 | # >>>> obsolete, because it depends on sth outside of this project
14 | from ..fast_rcnn.config import cfg
15 | from ..fast_rcnn.bbox_transform import bbox_transform
16 | # <<<< obsolete
17 | from ..utils.cython_bbox import bbox_overlaps
18 |
19 | def prepare_roidb(imdb):
20 | """Enrich the imdb's roidb by adding some derived quantities that
21 | are useful for training. This function precomputes the maximum
22 | overlap, taken over ground-truth boxes, between each ROI and
23 | each ground-truth box. The class with maximum overlap is also
24 | recorded.
25 | """
26 | roidb = imdb.roidb
27 | for i in range(len(imdb.image_index)):
28 | roidb[i]['image'] = imdb.image_path_at(i)
29 | # need gt_overlaps as a dense array for argmax
30 | gt_overlaps = roidb[i]['gt_overlaps'].toarray()
31 | # max overlap with gt over classes (columns)
32 | max_overlaps = gt_overlaps.max(axis=1)
33 | # gt class that had the max overlap
34 | max_classes = gt_overlaps.argmax(axis=1)
35 |
36 | roidb[i]['max_classes'] = max_classes
37 | roidb[i]['max_overlaps'] = max_overlaps
38 |
39 | # sanity checks
40 | # max overlap of 0 => class should be zero (background)
41 | zero_inds = np.where(max_overlaps == 0)[0]
42 | assert all(max_classes[zero_inds] == 0)
43 | # max overlap > 0 => class should not be zero (must be a fg class)
44 | nonzero_inds = np.where(max_overlaps > 0)[0]
45 | assert all(max_classes[nonzero_inds] != 0)
46 |
47 | def add_bbox_regression_targets(roidb):
48 | """Add information needed to train bounding-box regressors."""
49 | assert len(roidb) > 0
50 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
51 |
52 | num_images = len(roidb)
53 | # Infer number of classes from the number of columns in gt_overlaps
54 | num_classes = roidb[0]['gt_overlaps'].shape[1]
55 | for im_i in range(num_images):
56 | rois = roidb[im_i]['boxes']
57 | max_overlaps = roidb[im_i]['max_overlaps']
58 | max_classes = roidb[im_i]['max_classes']
59 | roidb[im_i]['bbox_targets'] = \
60 | _compute_targets(rois, max_overlaps, max_classes, num_classes)
61 |
62 | # Compute values needed for means and stds
63 | # var(x) = E(x^2) - E(x)^2
64 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS
65 | sums = np.zeros((num_classes, 4))
66 | squared_sums = np.zeros((num_classes, 4))
67 | for im_i in range(num_images):
68 | targets = roidb[im_i]['bbox_targets']
69 | for cls in range(1, num_classes):
70 | cls_inds = np.where(targets[:, 0] == cls)[0]
71 | if cls_inds.size > 0:
72 | class_counts[cls] += cls_inds.size
73 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
74 | squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0)
75 |
76 | means = sums / class_counts
77 | stds = np.sqrt(squared_sums / class_counts - means ** 2)
78 |
79 | # Normalize targets
80 | for im_i in range(num_images):
81 | targets = roidb[im_i]['bbox_targets']
82 | for cls in range(1, num_classes):
83 | cls_inds = np.where(targets[:, 0] == cls)[0]
84 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
85 | if stds[cls, 0] != 0:
86 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
87 |
88 | # These values will be needed for making predictions
89 | # (the predicts will need to be unnormalized and uncentered)
90 | return means.ravel(), stds.ravel()
91 |
92 | def _compute_targets(rois, overlaps, labels, num_classes):
93 | """Compute bounding-box regression targets for an image."""
94 | # Ensure ROIs are floats
95 | rois = rois.astype(np.float, copy=False)
96 |
97 | # Indices of ground-truth ROIs
98 | gt_inds = np.where(overlaps == 1)[0]
99 | # Indices of examples for which we try to make predictions
100 | ex_inds = []
101 | for i in range(1, num_classes):
102 | ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] )
103 |
104 | # Get IoU overlap between each ex ROI and gt ROI
105 | ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :],
106 | rois[gt_inds, :])
107 |
108 | # Find which gt ROI each ex ROI has max overlap with:
109 | # this will be the ex ROI's gt target
110 | if ex_gt_overlaps.shape[0] != 0:
111 | gt_assignment = ex_gt_overlaps.argmax(axis=1)
112 | else:
113 | gt_assignment = []
114 | gt_rois = rois[gt_inds[gt_assignment], :]
115 | ex_rois = rois[ex_inds, :]
116 |
117 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
118 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
119 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
120 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
121 |
122 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
123 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
124 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
125 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
126 |
127 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
128 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
129 | targets_dw = np.log(gt_widths / ex_widths)
130 | targets_dh = np.log(gt_heights / ex_heights)
131 |
132 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
133 | targets[ex_inds, 0] = labels[ex_inds]
134 | targets[ex_inds, 1] = targets_dx
135 | targets[ex_inds, 2] = targets_dy
136 | targets[ex_inds, 3] = targets_dw
137 | targets[ex_inds, 4] = targets_dh
138 | return targets
139 |
--------------------------------------------------------------------------------
/lib/roi_pooling_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from . import roi_pooling_op
8 | from . import roi_pooling_op_grad
--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 |
4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so')
5 | _roi_pooling_module = tf.load_op_library(filename)
6 | roi_pool = _roi_pooling_module.roi_pool
7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad
8 |
--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_gpu.h:
--------------------------------------------------------------------------------
1 | #if !GOOGLE_CUDA
2 | #error This file must only be included when building with Cuda support
3 | #endif
4 |
5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_
6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_
7 |
8 | #define EIGEN_USE_GPU
9 |
10 | #include "tensorflow/core/framework/tensor_types.h"
11 | #include "tensorflow/core/platform/types.h"
12 |
13 | namespace tensorflow {
14 |
15 | // Run the forward pass of max pooling, optionally writing the argmax indices to
16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the
17 | // argmax indices are not written.
18 | bool ROIPoolForwardLaucher(
19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
20 | const int width, const int channels, const int pooled_height,
21 | const int pooled_width, const float* bottom_rois,
22 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d);
23 |
24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 | const int height, const int width, const int channels, const int pooled_height,
26 | const int pooled_width, const float* bottom_rois,
27 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d);
28 |
29 | } // namespace tensorflow
30 |
31 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_
32 |
--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | from . import roi_pooling_op
4 |
5 | @ops.RegisterGradient("RoiPool")
6 | def _roi_pool_grad(op, grad, _):
7 | """The gradients for `roi_pool`.
8 | Args:
9 | op: The `roi_pool` `Operation` that we are differentiating, which we can use
10 | to find the inputs and outputs of the original op.
11 | grad: Gradient with respect to the output of the `roi_pool` op.
12 | Returns:
13 | Gradients with respect to the input of `zero_out`.
14 | """
15 | data = op.inputs[0]
16 | rois = op.inputs[1]
17 | argmax = op.outputs[1]
18 | pooled_height = op.get_attr('pooled_height')
19 | pooled_width = op.get_attr('pooled_width')
20 | spatial_scale = op.get_attr('spatial_scale')
21 |
22 | # compute gradient
23 | data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
24 |
25 | return [data_grad, None] # List of one Tensor, since we have one input
26 |
--------------------------------------------------------------------------------
/lib/roi_pooling_layer/roi_pooling_op_test.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from . import roi_pooling_op
4 | from . import roi_pooling_op_grad
5 | import tensorflow as tf
6 | import pdb
7 |
8 |
9 | def weight_variable(shape):
10 | initial = tf.truncated_normal(shape, stddev=0.1)
11 | return tf.Variable(initial)
12 |
13 | def conv2d(x, W):
14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
15 |
16 | array = np.random.rand(32, 100, 100, 3)
17 | data = tf.convert_to_tensor(array, dtype=tf.float32)
18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32)
19 |
20 | W = weight_variable([3, 3, 3, 1])
21 | h = conv2d(data, W)
22 |
23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3)
24 | pdb.set_trace()
25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32)
26 | print(y_data, y, argmax)
27 |
28 | # Minimize the mean squared errors.
29 | loss = tf.reduce_mean(tf.square(y - y_data))
30 | optimizer = tf.train.GradientDescentOptimizer(0.5)
31 | train = optimizer.minimize(loss)
32 |
33 | init = tf.global_variables_initializer()
34 |
35 | # Launch the graph.
36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
37 | sess.run(init)
38 | pdb.set_trace()
39 | for step in range(10):
40 | sess.run(train)
41 | print((step, sess.run(W)))
42 | print((sess.run(y)))
43 |
44 | #with tf.device('/gpu:0'):
45 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1)
46 | # print result.eval()
47 | #with tf.device('/cpu:0'):
48 | # run(init)
49 |
--------------------------------------------------------------------------------
/lib/rpn_tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/.DS_Store
--------------------------------------------------------------------------------
/lib/rpn_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/__init__.py
--------------------------------------------------------------------------------
/lib/rpn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/anchor_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/anchor_target_layer_modified.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def bbox_transform(ex_rois, gt_rois):
11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 |
16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 |
21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 | targets_dw = np.log(gt_widths / ex_widths)
24 | targets_dh = np.log(gt_heights / ex_heights)
25 |
26 | targets = np.vstack(
27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 | return targets
29 |
30 | def bbox_transform_inv(boxes, deltas):
31 | if boxes.shape[0] == 0:
32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 |
34 | boxes = boxes.astype(deltas.dtype, copy=False)
35 |
36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 | ctr_x = boxes[:, 0] + 0.5 * widths
39 | ctr_y = boxes[:, 1] + 0.5 * heights
40 |
41 | dx = deltas[:, 0::4]
42 | dy = deltas[:, 1::4]
43 | dw = deltas[:, 2::4]
44 | dh = deltas[:, 3::4]
45 |
46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 | pred_w = np.exp(dw) * widths[:, np.newaxis]
49 | pred_h = np.exp(dh) * heights[:, np.newaxis]
50 |
51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 | # x1
53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 | # y1
55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 | # x2
57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 | # y2
59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 |
61 | return pred_boxes
62 |
63 | def clip_boxes(boxes, im_shape):
64 | """
65 | Clip boxes to image boundaries.
66 | """
67 |
68 | # x1 >= 0
69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 | # y1 >= 0
71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 | # x2 < im_shape[1]
73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 | # y2 < im_shape[0]
75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 | return boxes
77 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/bbox_transform.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/config.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/__init__.py
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/cpu_nms.so
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/gpu_nms.so
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/mv.so
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from config import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/nms_wrapper.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/nms_wrapper.py~:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/test.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/fast_rcnn/train.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/fast_rcnn/train.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/generate_anchors.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
11 | #
12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
13 | # >> anchors
14 | #
15 | # anchors =
16 | #
17 | # -83 -39 100 56
18 | # -175 -87 192 104
19 | # -359 -183 376 200
20 | # -55 -55 72 72
21 | # -119 -119 136 136
22 | # -247 -247 264 264
23 | # -35 -79 52 96
24 | # -79 -167 96 184
25 | # -167 -343 184 360
26 |
27 | #array([[ -83., -39., 100., 56.],
28 | # [-175., -87., 192., 104.],
29 | # [-359., -183., 376., 200.],
30 | # [ -55., -55., 72., 72.],
31 | # [-119., -119., 136., 136.],
32 | # [-247., -247., 264., 264.],
33 | # [ -35., -79., 52., 96.],
34 | # [ -79., -167., 96., 184.],
35 | # [-167., -343., 184., 360.]])
36 |
37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
38 | scales=2**np.arange(3, 6)):
39 | """
40 | Generate anchor (reference) windows by enumerating aspect ratios X
41 | scales wrt a reference (0, 0, 15, 15) window.
42 | """
43 |
44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
45 | ratio_anchors = _ratio_enum(base_anchor, ratios)
46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
47 | for i in xrange(ratio_anchors.shape[0])])
48 | return anchors
49 |
50 | def _whctrs(anchor):
51 | """
52 | Return width, height, x center, and y center for an anchor (window).
53 | """
54 |
55 | w = anchor[2] - anchor[0] + 1
56 | h = anchor[3] - anchor[1] + 1
57 | x_ctr = anchor[0] + 0.5 * (w - 1)
58 | y_ctr = anchor[1] + 0.5 * (h - 1)
59 | return w, h, x_ctr, y_ctr
60 |
61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
62 | """
63 | Given a vector of widths (ws) and heights (hs) around a center
64 | (x_ctr, y_ctr), output a set of anchors (windows).
65 | """
66 |
67 | ws = ws[:, np.newaxis]
68 | hs = hs[:, np.newaxis]
69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
70 | y_ctr - 0.5 * (hs - 1),
71 | x_ctr + 0.5 * (ws - 1),
72 | y_ctr + 0.5 * (hs - 1)))
73 | return anchors
74 |
75 | def _ratio_enum(anchor, ratios):
76 | """
77 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
78 | """
79 |
80 | w, h, x_ctr, y_ctr = _whctrs(anchor)
81 | size = w * h
82 | size_ratios = size / ratios
83 | ws = np.round(np.sqrt(size_ratios))
84 | hs = np.round(ws * ratios)
85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
86 | return anchors
87 |
88 | def _scale_enum(anchor, scales):
89 | """
90 | Enumerate a set of anchors for each scale wrt an anchor.
91 | """
92 |
93 | w, h, x_ctr, y_ctr = _whctrs(anchor)
94 | ws = w * scales
95 | hs = h * scales
96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
97 | return anchors
98 |
99 | if __name__ == '__main__':
100 | import time
101 | t = time.time()
102 | a = generate_anchors()
103 | print time.time() - t
104 | print a
105 | from IPython import embed; embed()
106 |
--------------------------------------------------------------------------------
/lib/rpn_tools/generate_anchors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/generate_anchors.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/my_anchor_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/my_anchor_target_layer_modified.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/__init__.py
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/cpu_nms.so
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/gpu_nms.so
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/mv.so
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/lib/rpn_tools/proposal_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/proposal_layer_modified.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/proposal_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/proposal_target_layer_modified.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/__init__.py
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import random
12 | import cv2
13 | from utils.cython_bbox import bbox_overlaps
14 | from mnc_config import cfg
15 |
16 |
17 | def im_list_to_blob(ims):
18 | """
19 | Convert a list of images into a network input.
20 | Assumes images are already prepared (means subtracted, BGR order, ...).
21 | """
22 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 | num_images = len(ims)
24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 | dtype=np.float32)
26 | for i in xrange(num_images):
27 | im = ims[i]
28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | # Move channels (axis 3) to axis 1
30 | # Axis order will become: (batch elem, channel, height, width)
31 | channel_swap = (0, 3, 1, 2)
32 | blob = blob.transpose(channel_swap)
33 | return blob
34 |
35 |
36 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
37 | """Mean subtract and scale an image for use in a blob."""
38 | im = im.astype(np.float32, copy=False)
39 | im -= pixel_means
40 | im_shape = im.shape
41 | im_size_min = np.min(im_shape[0:2])
42 | im_size_max = np.max(im_shape[0:2])
43 | im_scale = float(target_size) / float(im_size_min)
44 | # Prevent the biggest axis from being more than MAX_SIZE
45 | if np.round(im_scale * im_size_max) > max_size:
46 | im_scale = float(max_size) / float(im_size_max)
47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
48 | interpolation=cv2.INTER_LINEAR)
49 |
50 | return im, im_scale
51 |
52 |
53 | def prep_im_for_blob_cfm(im, input_scales):
54 | """Converts an image into a network input.
55 | Arguments:
56 | im (ndarray): a color image in BGR order
57 | Returns:
58 | blob (ndarray): a data blob holding an image pyramid
59 | im_scale_factors (list): list of image scales (relative to im) used
60 | in the image pyramid
61 | """
62 | im_orig = im.astype(np.float32, copy=True)
63 | im_orig -= cfg.PIXEL_MEANS
64 |
65 | im_shape = im_orig.shape
66 | im_size_min = np.min(im_shape[0:2])
67 | im_size_max = np.max(im_shape[0:2])
68 |
69 | processed_ims = []
70 | im_scale_factors = []
71 |
72 | for target_size in input_scales:
73 | im_scale = float(target_size) / float(im_size_min)
74 | # Prevent the biggest axis from being more than MAX_SIZE
75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
78 | interpolation=cv2.INTER_LINEAR)
79 | im_scale_factors.append(im_scale)
80 | processed_ims.append(im)
81 |
82 | # Create a blob to hold the input images
83 | blob = im_list_to_blob(processed_ims)
84 |
85 | return blob, np.array(im_scale_factors)
86 |
87 |
88 | def pred_rois_for_blob(im_rois, im_scales):
89 | """
90 | Convert rois to network input
91 | support multi-scale testing
92 | """
93 | im_rois = im_rois.astype(np.float, copy=False)
94 | if len(im_scales) > 1:
95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1
96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1
97 |
98 | areas = widths * heights
99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2)
100 | diff_areas = np.abs(scaled_areas - 224 * 224)
101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis]
102 | else:
103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
104 | im_rois = im_rois * im_scales[levels]
105 | rois_blob = np.hstack((levels.astype(np.float), im_rois))
106 | return rois_blob
107 |
108 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/blob.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/cython_bbox.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/cython_bbox.so
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/mnc_config.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/timer.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/unmap.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 |
11 | def unmap(data, count, inds, fill=0):
12 | """ Unmap a subset of item (data) back to the original set of items (of
13 | size count) """
14 | if len(data.shape) == 1:
15 | ret = np.empty((count, ), dtype=np.float32)
16 | ret.fill(fill)
17 | ret[inds] = data
18 | else:
19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
20 | ret.fill(fill)
21 | ret[inds, :] = data
22 | return ret
23 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/unmap.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/unmap.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/__init__.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import cv2
12 |
13 | def im_list_to_blob(ims):
14 | """Convert a list of images into a network input.
15 |
16 | Assumes images are already prepared (means subtracted, BGR order, ...).
17 | """
18 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 | num_images = len(ims)
20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 | dtype=np.float32)
22 | for i in xrange(num_images):
23 | im = ims[i]
24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 | # Move channels (axis 3) to axis 1
26 | # Axis order will become: (batch elem, channel, height, width)
27 | channel_swap = (0, 3, 1, 2)
28 | blob = blob.transpose(channel_swap)
29 | return blob
30 |
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 | """Mean subtract and scale an image for use in a blob."""
33 | im = im.astype(np.float32, copy=False)
34 | im -= pixel_means
35 | im_shape = im.shape
36 | im_size_min = np.min(im_shape[0:2])
37 | im_size_max = np.max(im_shape[0:2])
38 | im_scale = float(target_size) / float(im_size_min)
39 | # Prevent the biggest axis from being more than MAX_SIZE
40 | if np.round(im_scale * im_size_max) > max_size:
41 | im_scale = float(max_size) / float(im_size_max)
42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 | interpolation=cv2.INTER_LINEAR)
44 |
45 | return im, im_scale
46 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/blob.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 | class Timer(object):
11 | """A simple timer."""
12 | def __init__(self):
13 | self.total_time = 0.
14 | self.calls = 0
15 | self.start_time = 0.
16 | self.diff = 0.
17 | self.average_time = 0.
18 |
19 | def tic(self):
20 | # using time.time instead of time.clock because time time.clock
21 | # does not normalize for multithreading
22 | self.start_time = time.time()
23 |
24 | def toc(self, average=True):
25 | self.diff = time.time() - self.start_time
26 | self.total_time += self.diff
27 | self.calls += 1
28 | self.average_time = self.total_time / self.calls
29 | if average:
30 | return self.average_time
31 | else:
32 | return self.diff
33 |
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/lib/rpn_tools/utils/utils/timer.pyc
--------------------------------------------------------------------------------
/lib/rpn_tools/utils/vis_seg.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Written by Haozhi Qi
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import cPickle
10 | import os
11 | import cv2
12 | import Image
13 | from mnc_config import cfg
14 |
15 |
16 | def vis_seg(img_names, cls_names, output_dir, gt_dir):
17 | """
18 | This function plot segmentation results to specific directory
19 | Args:
20 | img_names: list
21 | """
22 | assert os.path.exists(output_dir)
23 | # a list of dictionary
24 | inst_dir = os.path.join(output_dir, 'SegInst')
25 | cls_dir = os.path.join(output_dir, 'SegCls')
26 | res_dir = os.path.join(output_dir, 'SegRes')
27 | if not os.path.isdir(inst_dir):
28 | os.mkdir(inst_dir)
29 | if not os.path.isdir(cls_dir):
30 | os.mkdir(cls_dir)
31 | if not os.path.isdir(res_dir):
32 | os.mkdir(res_dir)
33 |
34 | res_list = _prepare_dict(img_names, cls_names, output_dir)
35 | for img_ind, image_name in enumerate(img_names):
36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg')
37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg')
38 | print image_name
39 | gt_image = gt_dir + '/img/' + image_name + '.jpg'
40 | img_data = cv2.imread(gt_image)
41 | img_width = img_data.shape[1]
42 | img_height = img_data.shape[0]
43 | pred_dict = res_list[img_ind]
44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict)
45 | color_map = _get_voc_color_map()
46 | inst_out_img = np.zeros((img_height, img_width, 3))
47 | cls_out_img = np.zeros((img_height, img_width, 3))
48 | for i in xrange(img_height):
49 | for j in xrange(img_width):
50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1]
51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1]
52 |
53 | cv2.imwrite(target_inst_file, inst_out_img)
54 | cv2.imwrite(target_cls_file, cls_out_img)
55 | background = Image.open(gt_image)
56 | mask = Image.open(target_cls_file)
57 | background = background.convert('RGBA')
58 | mask = mask.convert('RGBA')
59 | superimpose_image = Image.blend(background, mask, 0.8)
60 | name = os.path.join(res_dir, image_name + '.png')
61 | superimpose_image.save(name, 'PNG')
62 |
63 |
64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5):
65 | """
66 | Returns:
67 | list, each list is a dictionary contains mask list, box list
68 | """
69 | res_list = []
70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl')
71 | with open(det_file, 'rb') as f:
72 | det_pkl = cPickle.load(f)
73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl')
74 | with open(seg_file, 'rb') as f:
75 | seg_pkl = cPickle.load(f)
76 |
77 | for img_ind, image_name in enumerate(img_names):
78 | box_for_img = []
79 | mask_for_img = []
80 | cls_for_img = []
81 | for cls_ind, cls_name in enumerate(cls_names):
82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0:
83 | continue
84 | det_for_img = det_pkl[cls_ind][img_ind]
85 | seg_for_img = seg_pkl[cls_ind][img_ind]
86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0]
87 | for keep in keep_inds:
88 | box_for_img.append(det_for_img[keep])
89 | # TODO: remove this annoying 0
90 | mask_for_img.append(seg_for_img[keep][0])
91 | cls_for_img.append(cls_ind)
92 | res_dict = {'image_name': image_name,
93 | 'cls_name': cls_for_img,
94 | 'boxes': box_for_img,
95 | 'masks': mask_for_img}
96 | res_list.append(res_dict)
97 |
98 | return res_list
99 |
100 |
101 | def _convert_pred_to_image(img_width, img_height, pred_dict):
102 | num_inst = len(pred_dict['boxes'])
103 | inst_img = np.zeros((img_height, img_width))
104 | cls_img = np.zeros((img_height, img_width))
105 | for i in xrange(num_inst):
106 | box = np.round(pred_dict['boxes'][i]).astype(int)
107 | mask = pred_dict['masks'][i]
108 | cls_num = pred_dict['cls_name'][i]
109 | # clip box into image space
110 | box[0] = min(max(box[0], 0), img_width - 1)
111 | box[1] = min(max(box[1], 0), img_height - 1)
112 | box[2] = min(max(box[2], 0), img_width - 1)
113 | box[3] = min(max(box[3], 0), img_height - 1)
114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1))
115 | mask = mask >= cfg.BINARIZE_THRESH
116 |
117 | part1 = (i+1) * mask.astype(np.float32)
118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1])
119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1])
120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2
121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3
122 | # Plot bounding boxes simultaneously
123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150
124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150
125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150
126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150
127 |
128 | inst_img = inst_img.astype(int)
129 | cls_img = cls_img.astype(int)
130 | return inst_img, cls_img
131 |
132 |
133 | def _get_voc_color_map(n=256):
134 | color_map = np.zeros((n, 3))
135 | for i in xrange(n):
136 | r = b = g = 0
137 | cid = i
138 | for j in xrange(0, 8):
139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j))
140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j))
141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j))
142 | cid = np.right_shift(cid, 3)
143 |
144 | color_map[i][0] = r
145 | color_map[i][1] = g
146 | color_map[i][2] = b
147 | return color_map
148 |
--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | import numpy as np
11 | from distutils.core import setup
12 | from distutils.extension import Extension
13 | from Cython.Distutils import build_ext
14 |
15 | def find_in_path(name, path):
16 | "Find a file in a search path"
17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
18 | for dir in path.split(os.pathsep):
19 | binpath = pjoin(dir, name)
20 | if os.path.exists(binpath):
21 | return os.path.abspath(binpath)
22 | return None
23 |
24 | def locate_cuda():
25 | """Locate the CUDA environment on the system
26 |
27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
28 | and values giving the absolute path to each directory.
29 |
30 | Starts by looking for the CUDAHOME env variable. If not found, everything
31 | is based on finding 'nvcc' in the PATH.
32 | """
33 |
34 | # first check if the CUDAHOME env variable is in use
35 | if 'CUDAHOME' in os.environ:
36 | home = os.environ['CUDAHOME']
37 | nvcc = pjoin(home, 'bin', 'nvcc')
38 | else:
39 | # otherwise, search the PATH for NVCC
40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
42 | if nvcc is None:
43 | raise EnvironmentError('The nvcc binary could not be '
44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
45 | home = os.path.dirname(os.path.dirname(nvcc))
46 |
47 | cudaconfig = {'home':home, 'nvcc':nvcc,
48 | 'include': pjoin(home, 'include'),
49 | 'lib64': pjoin(home, 'lib64')}
50 | for k, v in list(cudaconfig.items()):
51 | if not os.path.exists(v):
52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
53 |
54 | return cudaconfig
55 | CUDA = locate_cuda()
56 |
57 | # Obtain the numpy include directory. This logic works across numpy versions.
58 | try:
59 | numpy_include = np.get_include()
60 | except AttributeError:
61 | numpy_include = np.get_numpy_include()
62 |
63 | def customize_compiler_for_nvcc(self):
64 | """inject deep into distutils to customize how the dispatch
65 | to gcc/nvcc works.
66 |
67 | If you subclass UnixCCompiler, it's not trivial to get your subclass
68 | injected in, and still have the right customizations (i.e.
69 | distutils.sysconfig.customize_compiler) run on it. So instead of going
70 | the OO route, I have this. Note, it's kindof like a wierd functional
71 | subclassing going on."""
72 |
73 | # tell the compiler it can processes .cu
74 | self.src_extensions.append('.cu')
75 |
76 | # save references to the default compiler_so and _comple methods
77 | default_compiler_so = self.compiler_so
78 | super = self._compile
79 |
80 | # now redefine the _compile method. This gets executed for each
81 | # object but distutils doesn't have the ability to change compilers
82 | # based on source extension: we add it.
83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
84 | print(extra_postargs)
85 | if os.path.splitext(src)[1] == '.cu':
86 | # use the cuda for .cu files
87 | self.set_executable('compiler_so', CUDA['nvcc'])
88 | # use only a subset of the extra_postargs, which are 1-1 translated
89 | # from the extra_compile_args in the Extension class
90 | postargs = extra_postargs['nvcc']
91 | else:
92 | postargs = extra_postargs['gcc']
93 |
94 | super(obj, src, ext, cc_args, postargs, pp_opts)
95 | # reset the default compiler_so, which we might have changed for cuda
96 | self.compiler_so = default_compiler_so
97 |
98 | # inject our redefined _compile method into the class
99 | self._compile = _compile
100 |
101 |
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 | def build_extensions(self):
105 | customize_compiler_for_nvcc(self.compiler)
106 | build_ext.build_extensions(self)
107 |
108 | ext_modules = [
109 | Extension(
110 | "utils.cython_bbox",
111 | ["utils/bbox.pyx"],
112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
113 | include_dirs = [numpy_include]
114 | ),
115 | Extension(
116 | "utils.cython_nms",
117 | ["utils/nms.pyx"],
118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 | include_dirs = [numpy_include]
120 | ),
121 | Extension(
122 | "nms.cpu_nms",
123 | ["nms/cpu_nms.pyx"],
124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 | include_dirs = [numpy_include]
126 | ),
127 | Extension('nms.gpu_nms',
128 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
129 | library_dirs=[CUDA['lib64']],
130 | libraries=['cudart'],
131 | language='c++',
132 | runtime_library_dirs=[CUDA['lib64']],
133 | # this syntax is specific to this build system
134 | # we're only going to use certain compiler args with nvcc and not with gcc
135 | # the implementation of this trick is in customize_compiler() below
136 | extra_compile_args={'gcc': ["-Wno-unused-function"],
137 | 'nvcc': ['-arch=sm_35',
138 | '--ptxas-options=-v',
139 | '-c',
140 | '--compiler-options',
141 | "'-fPIC'"]},
142 | include_dirs = [numpy_include, CUDA['include']]
143 | ),
144 | Extension(
145 | 'pycocotools._mask',
146 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
147 | include_dirs = [numpy_include, 'pycocotools'],
148 | extra_compile_args={
149 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
150 | ),
151 | ]
152 |
153 | setup(
154 | name='fast_rcnn',
155 | ext_modules=ext_modules,
156 | # inject our custom trigger
157 | cmdclass={'build_ext': custom_build_ext},
158 | )
159 |
--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from . import cython_nms
8 | from . import cython_bbox
9 | from . import boxes_grid
10 | from . import blob
11 | from . import nms
12 | from . import timer
--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
57 | def bbox_intersections(
58 | np.ndarray[DTYPE_t, ndim=2] boxes,
59 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
60 | """
61 | For each query box compute the intersection ratio covered by boxes
62 | ----------
63 | Parameters
64 | ----------
65 | boxes: (N, 4) ndarray of float
66 | query_boxes: (K, 4) ndarray of float
67 | Returns
68 | -------
69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes
70 | """
71 | cdef unsigned int N = boxes.shape[0]
72 | cdef unsigned int K = query_boxes.shape[0]
73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
74 | cdef DTYPE_t iw, ih, box_area
75 | cdef DTYPE_t ua
76 | cdef unsigned int k, n
77 | for k in range(K):
78 | box_area = (
79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
81 | )
82 | for n in range(N):
83 | iw = (
84 | min(boxes[n, 2], query_boxes[k, 2]) -
85 | max(boxes[n, 0], query_boxes[k, 0]) + 1
86 | )
87 | if iw > 0:
88 | ih = (
89 | min(boxes[n, 3], query_boxes[k, 3]) -
90 | max(boxes[n, 1], query_boxes[k, 1]) + 1
91 | )
92 | if ih > 0:
93 | intersec[n, k] = iw * ih / box_area
94 | return intersec
--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import cv2
12 | from ..fast_rcnn.config import cfg
13 |
14 | def im_list_to_blob(ims):
15 | """Convert a list of images into a network input.
16 |
17 | Assumes images are already prepared (means subtracted, BGR order, ...).
18 | """
19 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
20 | num_images = len(ims)
21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
22 | dtype=np.float32)
23 | for i in range(num_images):
24 | im = ims[i]
25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
26 |
27 | return blob
28 |
29 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
30 | """Mean subtract and scale an image for use in a blob."""
31 | im = im.astype(np.float32, copy=False)
32 | im -= pixel_means
33 | im_shape = im.shape
34 | im_size_min = np.min(im_shape[0:2])
35 | im_size_max = np.max(im_shape[0:2])
36 | im_scale = float(target_size) / float(im_size_min)
37 | # Prevent the biggest axis from being more than MAX_SIZE
38 | if np.round(im_scale * im_size_max) > max_size:
39 | im_scale = float(max_size) / float(im_size_max)
40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE:
41 | r = 0.6 + np.random.rand() * 0.4
42 | im_scale *= r
43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
44 | interpolation=cv2.INTER_LINEAR)
45 |
46 | return im, im_scale
47 |
--------------------------------------------------------------------------------
/lib/utils/boxes_grid.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Subcategory CNN
3 | # Copyright (c) 2015 CVGL Stanford
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Yu Xiang
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import math
10 | # TODO: make fast_rcnn irrelevant
11 | # >>>> obsolete, because it depends on sth outside of this project
12 | from ..fast_rcnn.config import cfg
13 | # <<<< obsolete
14 |
15 | def get_boxes_grid(image_height, image_width):
16 | """
17 | Return the boxes on image grid.
18 | calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead.
19 | """
20 |
21 | # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE
22 | # coz, here needs a ratio around 1.0, not the accutual size.
23 | # height and width of the feature map
24 | if cfg.NET_NAME == 'CaffeNet':
25 | height = np.floor((image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
26 | height = np.floor((height - 1) / 2.0 + 1 + 0.5)
27 | height = np.floor((height - 1) / 2.0 + 1 + 0.5)
28 |
29 | width = np.floor((image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
30 | width = np.floor((width - 1) / 2.0 + 1 + 0.5)
31 | width = np.floor((width - 1) / 2.0 + 1 + 0.5)
32 | elif cfg.NET_NAME == 'VGGnet':
33 | height = np.floor(image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
34 | height = np.floor(height / 2.0 + 0.5)
35 | height = np.floor(height / 2.0 + 0.5)
36 | height = np.floor(height / 2.0 + 0.5)
37 |
38 | width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
39 | width = np.floor(width / 2.0 + 0.5)
40 | width = np.floor(width / 2.0 + 0.5)
41 | width = np.floor(width / 2.0 + 0.5)
42 | else:
43 | assert (1), 'The network architecture is not supported in utils.get_boxes_grid!'
44 |
45 | # compute the grid box centers
46 | h = np.arange(height)
47 | w = np.arange(width)
48 | y, x = np.meshgrid(h, w, indexing='ij')
49 | centers = np.dstack((x, y))
50 | centers = np.reshape(centers, (-1, 2))
51 | num = centers.shape[0]
52 |
53 | # compute width and height of grid box
54 | area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE
55 | aspect = cfg.TRAIN.ASPECTS # height / width
56 | num_aspect = len(aspect)
57 | widths = np.zeros((1, num_aspect), dtype=np.float32)
58 | heights = np.zeros((1, num_aspect), dtype=np.float32)
59 | for i in range(num_aspect):
60 | widths[0,i] = math.sqrt(area / aspect[i])
61 | heights[0,i] = widths[0,i] * aspect[i]
62 |
63 | # construct grid boxes
64 | centers = np.repeat(centers, num_aspect, axis=0)
65 | widths = np.tile(widths, num).transpose()
66 | heights = np.tile(heights, num).transpose()
67 |
68 | x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5
69 | x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5
70 | y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5
71 | y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5
72 |
73 | boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE
74 |
75 | return boxes_grid, centers[:,0], centers[:,1]
76 |
--------------------------------------------------------------------------------
/lib/utils/nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def nms(dets, thresh):
11 | x1 = dets[:, 0]
12 | y1 = dets[:, 1]
13 | x2 = dets[:, 2]
14 | y2 = dets[:, 3]
15 | scores = dets[:, 4]
16 |
17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 | order = scores.argsort()[::-1]
19 |
20 | keep = []
21 | while order.size > 0:
22 | i = order[0]
23 | keep.append(i)
24 | xx1 = np.maximum(x1[i], x1[order[1:]])
25 | yy1 = np.maximum(y1[i], y1[order[1:]])
26 | xx2 = np.minimum(x2[i], x2[order[1:]])
27 | yy2 = np.minimum(y2[i], y2[order[1:]])
28 |
29 | w = np.maximum(0.0, xx2 - xx1 + 1)
30 | h = np.maximum(0.0, yy2 - yy1 + 1)
31 | inter = w * h
32 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 |
34 | inds = np.where(ovr <= thresh)[0]
35 | order = order[inds + 1]
36 |
37 | return keep
38 |
--------------------------------------------------------------------------------
/lib/utils/nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
76 |
77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
79 |
80 | cdef int ndets = dets.shape[0]
81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
82 | np.zeros((ndets), dtype=np.int)
83 |
84 | # nominal indices
85 | cdef int _i, _j
86 | # sorted indices
87 | cdef int i, j
88 | # temp variables for box i's (the box currently under consideration)
89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
90 | # variables for computing overlap with box j (lower scoring box)
91 | cdef np.float32_t xx1, yy1, xx2, yy2
92 | cdef np.float32_t w, h
93 | cdef np.float32_t inter, ovr
94 |
95 | keep = []
96 | for _i in range(ndets):
97 | i = order[_i]
98 | if suppressed[i] == 1:
99 | continue
100 | keep.append(i)
101 | ix1 = x1[i]
102 | iy1 = y1[i]
103 | ix2 = x2[i]
104 | iy2 = y2[i]
105 | iarea = areas[i]
106 | for _j in range(_i + 1, ndets):
107 | j = order[_j]
108 | if suppressed[j] == 1:
109 | continue
110 | xx1 = max(ix1, x1[j])
111 | yy1 = max(iy1, y1[j])
112 | xx2 = min(ix2, x2[j])
113 | yy2 = min(iy2, y2[j])
114 | w = max(0.0, xx2 - xx1 + 1)
115 | h = max(0.0, yy2 - yy1 + 1)
116 | inter = w * h
117 | ovr = inter / (iarea + areas[j] - inter)
118 | ovr1 = inter / iarea
119 | ovr2 = inter / areas[j]
120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 | suppressed[j] = 1
122 |
123 | return keep
124 |
--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 | class Timer(object):
11 | """A simple timer."""
12 | def __init__(self):
13 | self.total_time = 0.
14 | self.calls = 0
15 | self.start_time = 0.
16 | self.diff = 0.
17 | self.average_time = 0.
18 |
19 | def tic(self):
20 | # using time.time instead of time.clock because time time.clock
21 | # does not normalize for multithreading
22 | self.start_time = time.time()
23 |
24 | def toc(self, average=True):
25 | self.diff = time.time() - self.start_time
26 | self.total_time += self.diff
27 | self.calls += 1
28 | self.average_time = self.total_time / self.calls
29 | if average:
30 | return self.average_time
31 | else:
32 | return self.diff
33 |
--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__init__.py
--------------------------------------------------------------------------------
/nets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/resnet_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet_v1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FakerYFX/InceptText-Tensorflow/bdb5c1bd4a7db277ddf9550e40c5a1fad0230ac4/nets/__pycache__/resnet_v1.cpython-36.pyc
--------------------------------------------------------------------------------
/nets/googlenet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.layers as layers
3 | import tensorflow.contrib.framework as ops
4 |
5 |
6 | def get_inception_layer(inputs, conv11_size, conv33_11_size, conv33_size,
7 | conv55_11_size, conv55_size, pool11_size):
8 | with tf.variable_scope("conv_1x1"):
9 | conv11 = layers.conv2d(inputs, conv11_size, [1, 1])
10 | with tf.variable_scope("conv_3x3"):
11 | conv33_11 = layers.conv2d(inputs, conv33_11_size, [1, 1])
12 | conv33 = layers.conv2d(conv33_11, conv33_size, [3, 3])
13 | with tf.variable_scope("conv_5x5"):
14 | conv55_11 = layers.conv2d(inputs, conv55_11_size, [1, 1])
15 | conv55 = layers.conv2d(conv55_11, conv55_size, [5, 5])
16 | with tf.variable_scope("pool_proj"):
17 | pool_proj = layers.max_pool2d(inputs, [3, 3], stride=1)
18 | pool11 = layers.conv2d(pool_proj, pool11_size, [1, 1])
19 | if tf.__version__ == '0.11.0rc0':
20 | return tf.concat(3, [conv11, conv33, conv55, pool11])
21 | return tf.concat([conv11, conv33, conv55, pool11], 3)
22 |
23 |
24 | def aux_logit_layer(inputs, num_classes, is_training):
25 | with tf.variable_scope("pool2d"):
26 | pooled = layers.avg_pool2d(inputs, [5, 5], stride=3)
27 | with tf.variable_scope("conv11"):
28 | conv11 = layers.conv2d(pooled, 128, [1, 1])
29 | with tf.variable_scope("flatten"):
30 | flat = tf.reshape(conv11, [-1, 2048])
31 | with tf.variable_scope("fc"):
32 | fc = layers.fully_connected(flat, 1024, activation_fn=None)
33 | with tf.variable_scope("drop"):
34 | drop = layers.dropout(fc, 0.3, is_training=is_training)
35 | with tf.variable_scope("linear"):
36 | linear = layers.fully_connected(drop, num_classes, activation_fn=None)
37 | with tf.variable_scope("soft"):
38 | soft = tf.nn.softmax(linear)
39 | return soft
40 |
41 |
42 | def googlenet(inputs,
43 | dropout_keep_prob=0.4,
44 | num_classes=1000,
45 | is_training=True,
46 | restore_logits=None,
47 | scope=''):
48 | '''
49 | Implementation of https://arxiv.org/pdf/1409.4842.pdf
50 | '''
51 |
52 | end_points = {}
53 | with tf.name_scope(scope, "googlenet", [inputs]):
54 | with ops.arg_scope([layers.max_pool2d], padding='SAME'):
55 | end_points['conv0'] = layers.conv2d(inputs, 64, [7, 7], stride=2, scope='conv0')
56 | end_points['pool0'] = layers.max_pool2d(end_points['conv0'], [3, 3], scope='pool0')
57 | end_points['conv1_a'] = layers.conv2d(end_points['pool0'], 64, [1, 1], scope='conv1_a')
58 | end_points['conv1_b'] = layers.conv2d(end_points['conv1_a'], 192, [3, 3], scope='conv1_b')
59 | end_points['pool1'] = layers.max_pool2d(end_points['conv1_b'], [3, 3], scope='pool1')
60 |
61 | with tf.variable_scope("inception_3a"):
62 | end_points['inception_3a'] = get_inception_layer(end_points['pool1'], 64, 96, 128, 16, 32, 32)
63 |
64 | with tf.variable_scope("inception_3b"):
65 | end_points['inception_3b'] = get_inception_layer(end_points['inception_3a'], 128, 128, 192, 32, 96, 64)
66 |
67 | end_points['pool2'] = layers.max_pool2d(end_points['inception_3b'], [3, 3], scope='pool2')
68 |
69 | with tf.variable_scope("inception_4a"):
70 | end_points['inception_4a'] = get_inception_layer(end_points['pool2'], 192, 96, 208, 16, 48, 64)
71 |
72 | with tf.variable_scope("aux_logits_1"):
73 | end_points['aux_logits_1'] = aux_logit_layer(end_points['inception_4a'], num_classes, is_training)
74 |
75 | with tf.variable_scope("inception_4b"):
76 | end_points['inception_4b'] = get_inception_layer(end_points['inception_4a'], 160, 112, 224, 24, 64, 64)
77 |
78 | with tf.variable_scope("inception_4c"):
79 | end_points['inception_4c'] = get_inception_layer(end_points['inception_4b'], 128, 128, 256, 24, 64, 64)
80 |
81 | with tf.variable_scope("inception_4d"):
82 | end_points['inception_4d'] = get_inception_layer(end_points['inception_4c'], 112, 144, 288, 32, 64, 64)
83 |
84 | with tf.variable_scope("aux_logits_2"):
85 | end_points['aux_logits_2'] = aux_logit_layer(end_points['inception_4d'], num_classes, is_training)
86 |
87 | with tf.variable_scope("inception_4e"):
88 | end_points['inception_4e'] = get_inception_layer(end_points['inception_4d'], 256, 160, 320, 32, 128,
89 | 128)
90 |
91 | end_points['pool3'] = layers.max_pool2d(end_points['inception_4e'], [3, 3], scope='pool3')
92 |
93 | with tf.variable_scope("inception_5a"):
94 | end_points['inception_5a'] = get_inception_layer(end_points['pool3'], 256, 160, 320, 32, 128, 128)
95 |
96 | with tf.variable_scope("inception_5b"):
97 | end_points['inception_5b'] = get_inception_layer(end_points['inception_5a'], 384, 192, 384, 48, 128,
98 | 128)
99 |
100 | end_points['pool4'] = layers.avg_pool2d(end_points['inception_5b'], [7, 7], stride=1, scope='pool4')
101 |
102 | end_points['reshape'] = tf.reshape(end_points['pool4'], [-1, 1024])
103 |
104 | end_points['dropout'] = layers.dropout(end_points['reshape'], dropout_keep_prob, is_training=is_training)
105 |
106 | end_points['logits'] = layers.fully_connected(end_points['dropout'], num_classes, activation_fn=None,
107 | scope='logits')
108 |
109 | end_points['predictions'] = tf.nn.softmax(end_points['logits'], name='predictions')
110 |
111 | return end_points['logits'], end_points
--------------------------------------------------------------------------------