├── src ├── utils │ ├── __init__.py │ ├── timer.py │ └── cpu_nms.pyx ├── layers │ ├── __init__.py │ └── text_proposal_layer.py ├── text_proposal_connector.py ├── detectors.py ├── anchor.py ├── other.py └── text_proposal_graph_builder.py ├── .gitignore ├── demo_images ├── img_1.jpg ├── img_2.jpg └── img_3.jpg ├── .gitmodules ├── Makefile ├── tools ├── cfg.py ├── demo.py └── demo_vid.py ├── LICENSE ├── README.md └── models └── deploy.prototxt /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'zhitian' 2 | -------------------------------------------------------------------------------- /src/layers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tianzhi' 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.caffemodel 3 | .idea 4 | *.so 5 | results 6 | *.xml 7 | 8 | -------------------------------------------------------------------------------- /demo_images/img_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingswu/CTPN/HEAD/demo_images/img_1.jpg -------------------------------------------------------------------------------- /demo_images/img_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingswu/CTPN/HEAD/demo_images/img_2.jpg -------------------------------------------------------------------------------- /demo_images/img_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingswu/CTPN/HEAD/demo_images/img_3.jpg -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe"] 2 | path = caffe 3 | url = https://github.com/qingswu/caffe 4 | branch = CTPN 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cython src/utils/cpu_nms.pyx 3 | gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing \ 4 | -I/usr/include/python2.7 -o src/utils/cpu_nms.so src/utils/cpu_nms.c 5 | rm -rf src/utils/cpu_nms.c 6 | -------------------------------------------------------------------------------- /tools/cfg.py: -------------------------------------------------------------------------------- 1 | # MUST be imported firstly 2 | import sys 3 | import numpy as np 4 | 5 | class Config: 6 | MEAN=np.float32([102.9801, 115.9465, 122.7717]) 7 | TEST_GPU_ID=0 8 | SCALE=600 9 | MAX_SCALE=1000 10 | 11 | LINE_MIN_SCORE=0.7 12 | TEXT_PROPOSALS_MIN_SCORE=0.7 13 | TEXT_PROPOSALS_NMS_THRESH=0.3 14 | MAX_HORIZONTAL_GAP=50 15 | TEXT_LINE_NMS_THRESH=0.3 16 | MIN_NUM_PROPOSALS=2 17 | MIN_RATIO=1.2 18 | MIN_V_OVERLAPS=0.7 19 | MIN_SIZE_SIM=0.7 20 | TEXT_PROPOSALS_WIDTH=16 21 | 22 | def init(): 23 | sys.path.insert(0, "./tools") 24 | sys.path.insert(0, "./caffe/python") 25 | sys.path.insert(0, "./src") 26 | init() 27 | -------------------------------------------------------------------------------- /src/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | ARE THIRD PARTY CODES ARE LICENSED TO YOU UNDER THEIR ORIGINAL LICENSE TERMS. 24 | -------------------------------------------------------------------------------- /src/layers/text_proposal_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import yaml, caffe 3 | from other import clip_boxes 4 | from anchor import AnchorText 5 | 6 | 7 | class ProposalLayer(caffe.Layer): 8 | def setup(self, bottom, top): 9 | # parse the layer parameter string, which must be valid YAML 10 | layer_params = yaml.load(self.param_str) 11 | 12 | self._feat_stride = layer_params['feat_stride'] 13 | self.anchor_generator=AnchorText() 14 | self._num_anchors = self.anchor_generator.anchor_num 15 | 16 | top[0].reshape(1, 4) 17 | top[1].reshape(1, 1, 1, 1) 18 | 19 | def forward(self, bottom, top): 20 | assert bottom[0].data.shape[0]==1, \ 21 | 'Only single item batches are supported' 22 | 23 | scores = bottom[0].data[:, self._num_anchors:, :, :] 24 | 25 | bbox_deltas = bottom[1].data 26 | im_info = bottom[2].data[0, :] 27 | height, width = scores.shape[-2:] 28 | 29 | anchors=self.anchor_generator.locate_anchors((height, width), self._feat_stride) 30 | 31 | scores=scores.transpose((0, 2, 3, 1)).reshape(-1, 1) 32 | bbox_deltas=bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 2)) 33 | 34 | proposals=self.anchor_generator.apply_deltas_to_anchors(bbox_deltas, anchors) 35 | 36 | # clip the proposals in excess of the boundaries of the image 37 | proposals=clip_boxes(proposals, im_info[:2]) 38 | 39 | blob=proposals.astype(np.float32, copy=False) 40 | top[0].reshape(*(blob.shape)) 41 | top[0].data[...]=blob 42 | 43 | top[1].reshape(*(scores.shape)) 44 | top[1].data[...]=scores 45 | 46 | def backward(self, top, propagate_down, bottom): 47 | pass 48 | 49 | def reshape(self, bottom, top): 50 | pass 51 | -------------------------------------------------------------------------------- /src/text_proposal_connector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from other import clip_boxes 3 | from text_proposal_graph_builder import TextProposalGraphBuilder 4 | 5 | class TextProposalConnector: 6 | """ 7 | Connect text proposals into text lines 8 | """ 9 | def __init__(self): 10 | self.graph_builder=TextProposalGraphBuilder() 11 | 12 | def group_text_proposals(self, text_proposals, scores, im_size): 13 | graph=self.graph_builder.build_graph(text_proposals, scores, im_size) 14 | return graph.sub_graphs_connected() 15 | 16 | def fit_y(self, X, Y, x1, x2): 17 | len(X)!=0 18 | # if X only include one point, the function will get line y=Y[0] 19 | if np.sum(X==X[0])==len(X): 20 | return Y[0], Y[0] 21 | p=np.poly1d(np.polyfit(X, Y, 1)) 22 | return p(x1), p(x2) 23 | 24 | def get_text_lines(self, text_proposals, scores, im_size): 25 | # tp=text proposal 26 | tp_groups=self.group_text_proposals(text_proposals, scores, im_size) 27 | text_lines=np.zeros((len(tp_groups), 5), np.float32) 28 | 29 | for index, tp_indices in enumerate(tp_groups): 30 | text_line_boxes=text_proposals[list(tp_indices)] 31 | 32 | x0=np.min(text_line_boxes[:, 0]) 33 | x1=np.max(text_line_boxes[:, 2]) 34 | 35 | offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5 36 | 37 | lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) 38 | lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) 39 | 40 | # the score of a text line is the average score of the scores 41 | # of all text proposals contained in the text line 42 | score=scores[list(tp_indices)].sum()/float(len(tp_indices)) 43 | 44 | text_lines[index, 0]=x0 45 | text_lines[index, 1]=min(lt_y, rt_y) 46 | text_lines[index, 2]=x1 47 | text_lines[index, 3]=max(lb_y, rb_y) 48 | text_lines[index, 4]=score 49 | 50 | text_lines=clip_boxes(text_lines, im_size) 51 | 52 | return text_lines 53 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # The codes are used for implementing CTPN for scene text detection, described in: 4 | # 5 | # Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 6 | # Connectionist Text Proposal Network, ECCV, 2016. 7 | # 8 | # Online demo is available at: textdet.com 9 | # 10 | # These demo codes (with our trained model) are for text-line detection (without 11 | # side-refiement part). 12 | # 13 | # 14 | # ====== Copyright by Zhi Tian, Weilin Huang, Tong He, Pan He and Yu Qiao========== 15 | 16 | # Email: zhi.tian@siat.ac.cn; wl.huang@siat.ac.cn 17 | # 18 | # Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 19 | # 20 | # 21 | 22 | from cfg import Config as cfg 23 | from other import draw_boxes, resize_im, CaffeModel 24 | import cv2, os, caffe, sys 25 | from detectors import TextProposalDetector, TextDetector 26 | import os.path as osp 27 | from utils.timer import Timer 28 | 29 | DEMO_IMAGE_DIR="demo_images/" 30 | NET_DEF_FILE="models/deploy.prototxt" 31 | MODEL_FILE="models/ctpn_trained_model.caffemodel" 32 | 33 | if len(sys.argv)>1 and sys.argv[1]=="--no-gpu": 34 | caffe.set_mode_cpu() 35 | else: 36 | caffe.set_mode_gpu() 37 | caffe.set_device(cfg.TEST_GPU_ID) 38 | 39 | # initialize the detectors 40 | text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 41 | text_detector=TextDetector(text_proposals_detector) 42 | 43 | demo_imnames=os.listdir(DEMO_IMAGE_DIR) 44 | timer=Timer() 45 | 46 | for im_name in demo_imnames: 47 | print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 48 | print "Image: %s"%im_name 49 | 50 | im_file=osp.join(DEMO_IMAGE_DIR, im_name) 51 | im=cv2.imread(im_file) 52 | 53 | timer.tic() 54 | 55 | im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 56 | text_lines=text_detector.detect(im) 57 | 58 | print "Number of the detected text lines: %s"%len(text_lines) 59 | print "Time: %f"%timer.toc() 60 | 61 | im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, wait=False) 62 | 63 | print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 64 | print "Thank you for trying our demo. Press any key to exit..." 65 | cv2.waitKey(0) 66 | 67 | -------------------------------------------------------------------------------- /tools/demo_vid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # The codes are used for implementing CTPN for scene text detection, described in: 4 | # 5 | # Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 6 | # Connectionist Text Proposal Network, ECCV, 2016. 7 | # 8 | # Online demo is available at: textdet.com 9 | # 10 | # These demo codes (with our trained model) are for text-line detection (without 11 | # side-refiement part). 12 | # 13 | # 14 | # ====== Copyright by Zhi Tian, Weilin Huang, Tong He, Pan He and Yu Qiao========== 15 | 16 | # Email: zhi.tian@siat.ac.cn; wl.huang@siat.ac.cn 17 | # 18 | # Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 19 | # 20 | # 21 | 22 | from cfg import Config as cfg 23 | from other import draw_boxes, resize_im, CaffeModel 24 | import cv2, os, caffe, sys 25 | from detectors import TextProposalDetector, TextDetector 26 | import os.path as osp 27 | from utils.timer import Timer 28 | 29 | NET_DEF_FILE="models/deploy.prototxt" 30 | MODEL_FILE="models/ctpn_trained_model.caffemodel" 31 | 32 | if len(sys.argv)>1 and sys.argv[1]=="--no-gpu": 33 | caffe.set_mode_cpu() 34 | else: 35 | caffe.set_mode_gpu() 36 | caffe.set_device(cfg.TEST_GPU_ID) 37 | 38 | vid = 0 39 | if len(sys.argv) == 2 and sys.argv[1] != "--no-gpu": 40 | if os.path.exists(sys.argv[1]): 41 | vid = sys.argv[1] 42 | elif len(sys.argv) == 3: 43 | if os.path.exists(sys.argv[2]): 44 | vid = sys.argv[2] 45 | elif len(sys.argv) > 3: 46 | print "Wrong parameter." 47 | exit() 48 | 49 | # initialize the detectors 50 | text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 51 | text_detector=TextDetector(text_proposals_detector) 52 | 53 | timer=Timer() 54 | cam = cv2.VideoCapture(vid) 55 | 56 | while True: 57 | ret, im = cam.read() 58 | if not ret: 59 | break 60 | 61 | timer.tic() 62 | 63 | im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 64 | text_lines=text_detector.detect(im) 65 | 66 | print "Number of the detected text lines: %s"%len(text_lines) 67 | print "Time: %f"%timer.toc() 68 | 69 | im_with_text_lines=draw_boxes(im, text_lines, caption="Text Detection", wait=False) 70 | key = cv2.waitKey(1) 71 | if key == 27: 72 | break 73 | -------------------------------------------------------------------------------- /src/utils/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /src/detectors.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | from other import prepare_img, normalize 3 | import numpy as np 4 | from utils.cpu_nms import cpu_nms as nms 5 | from text_proposal_connector import TextProposalConnector 6 | 7 | 8 | class TextProposalDetector: 9 | """ 10 | Detect text proposals in an image 11 | """ 12 | def __init__(self, caffe_model): 13 | self.caffe_model=caffe_model 14 | 15 | def detect(self, im, mean): 16 | im_data=prepare_img(im, mean) 17 | _=self.caffe_model.forward2({ 18 | "data": im_data[np.newaxis, :], 19 | "im_info": np.array([[im_data.shape[1], im_data.shape[2]]], np.float32) 20 | }) 21 | rois=self.caffe_model.blob("rois") 22 | scores=self.caffe_model.blob("scores") 23 | return rois, scores 24 | 25 | 26 | class TextDetector: 27 | """ 28 | Detect text from an image 29 | """ 30 | def __init__(self, text_proposal_detector): 31 | self.text_proposal_detector=text_proposal_detector 32 | self.text_proposal_connector=TextProposalConnector() 33 | 34 | def detect(self, im): 35 | """ 36 | Detecting texts from an image 37 | :return: the bounding boxes of the detected texts 38 | """ 39 | text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) 40 | keep_inds=np.where(scores>cfg.TEXT_PROPOSALS_MIN_SCORE)[0] 41 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 42 | 43 | sorted_indices=np.argsort(scores.ravel())[::-1] 44 | text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices] 45 | 46 | # nms for text proposals 47 | keep_inds=nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH) 48 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 49 | 50 | scores=normalize(scores) 51 | 52 | text_lines=self.text_proposal_connector.get_text_lines(text_proposals, scores, im.shape[:2]) 53 | 54 | keep_inds=self.filter_boxes(text_lines) 55 | text_lines=text_lines[keep_inds] 56 | 57 | # nms for text lines 58 | if text_lines.shape[0]!=0: 59 | keep_inds=nms(text_lines, cfg.TEXT_LINE_NMS_THRESH) 60 | text_lines=text_lines[keep_inds] 61 | 62 | return text_lines 63 | 64 | def filter_boxes(self, boxes): 65 | heights=boxes[:, 3]-boxes[:, 1]+1 66 | widths=boxes[:, 2]-boxes[:, 0]+1 67 | scores=boxes[:, -1] 68 | return np.where((widths/heights>cfg.MIN_RATIO) & (scores>cfg.LINE_MIN_SCORE) & 69 | (widths>(cfg.TEXT_PROPOSALS_WIDTH*cfg.MIN_NUM_PROPOSALS)))[0] 70 | -------------------------------------------------------------------------------- /src/anchor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class AnchorText: 5 | def __init__(self): 6 | self.anchor_num=10 7 | 8 | def generate_basic_anchors(self, sizes, base_size=16): 9 | """ 10 | :param sizes: [(h1, w1), (h2, w2)...] 11 | :param base_size 12 | :return: 13 | """ 14 | assert(self.anchor_num==len(sizes)) 15 | base_anchor=np.array([0, 0, base_size-1, base_size-1], np.int32) 16 | anchors=np.zeros((len(sizes), 4), np.int32) 17 | index=0 18 | for h, w in sizes: 19 | anchors[index]=self.scale_anchor(base_anchor, h, w) 20 | index+=1 21 | return anchors 22 | 23 | def scale_anchor(self, anchor, h, w): 24 | x_ctr=(anchor[0]+anchor[2])*0.5 25 | y_ctr=(anchor[1]+anchor[3])*0.5 26 | scaled_anchor=anchor.copy() 27 | scaled_anchor[0]=x_ctr-w/2 28 | scaled_anchor[2]=x_ctr+w/2 29 | scaled_anchor[1]=y_ctr-h/2 30 | scaled_anchor[3]=y_ctr+h/2 31 | return scaled_anchor 32 | 33 | def apply_deltas_to_anchors(self, boxes_delta, anchors): 34 | """ 35 | :return [l t r b] 36 | """ 37 | anchor_y_ctr=(anchors[:, 1]+anchors[:, 3])/2. 38 | anchor_h=anchors[:, 3]-anchors[:, 1]+1. 39 | global_coords=np.zeros_like(boxes_delta, np.float32) 40 | global_coords[:, 1]=np.exp(boxes_delta[:, 1])*anchor_h 41 | global_coords[:, 0]=boxes_delta[:, 0]*anchor_h+anchor_y_ctr-global_coords[:, 1]/2. 42 | return np.hstack((anchors[:, [0]], global_coords[:, [0]], anchors[:, [2]], 43 | global_coords[:, [0]]+global_coords[:, [1]])).astype(np.float32) 44 | 45 | def basic_anchors(self): 46 | """ 47 | anchor [l t r b] 48 | """ 49 | heights=[11, 16, 23, 33, 48, 68, 97, 139, 198, 283] 50 | widths=[16] 51 | sizes=[] 52 | for h in heights: 53 | for w in widths: 54 | sizes.append((h, w)) 55 | return self.generate_basic_anchors(sizes) 56 | 57 | def locate_anchors(self, feat_map_size, feat_stride): 58 | """ 59 | return all anchors on the feature map 60 | """ 61 | basic_anchors_=self.basic_anchors() 62 | anchors=np.zeros((basic_anchors_.shape[0]*feat_map_size[0]*feat_map_size[1], 4), np.int32) 63 | index=0 64 | for y_ in range(feat_map_size[0]): 65 | for x_ in range(feat_map_size[1]): 66 | shift=np.array([x_, y_, x_, y_])*feat_stride 67 | anchors[index:index+basic_anchors_.shape[0], :]=basic_anchors_+shift 68 | index+=basic_anchors_.shape[0] 69 | return anchors 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CUDA 8.0 compatible version 2 | 1 . Updated caffe to current version, keeping the files that the official version doesn't have. 3 | 4 | 2 . Small fix in CTPN code to adapt to the new caffe. 5 | ```{bash} 6 | git clone --recursive https://github.com/qingswu/CTPN.git 7 | ...compile caffe following official steps 8 | # and goto root folder, compile cython code 9 | make 10 | # download model 11 | wget http://textdet.com/downloads/ctpn_trained_model.caffemodel -P models/ 12 | # run the demo 13 | ./tools/demo.py 14 | ``` 15 | 16 | # Detecting Text in Natural Image with Connectionist Text Proposal Network 17 | The codes are used for implementing CTPN for scene text detection, described in: 18 | 19 | Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 20 | Connectionist Text Proposal Network, ECCV, 2016. 21 | 22 | Online demo is available at: [textdet.com](http://textdet.com) 23 | 24 | These demo codes (with our trained model) are for text-line detection (without 25 | side-refiement part). 26 | 27 | # Required hardware 28 | You need a GPU. If you use CUDNN, about 1.5GB free memory is required. If you don't use CUDNN, you will need about 5GB free memory, and the testing time will slightly increase. Therefore, we strongly recommend to use CUDNN. 29 | 30 | It's also possible to run the program on CPU only, but it's extremely slow due to the non-optimal CPU implementation. 31 | # Required softwares 32 | Python2.7, cython and all what Caffe depends on. 33 | 34 | # How to run this code 35 | 36 | 1. Clone this repository with `git clone https://github.com/tianzhi0549/CTPN.git`. It will checkout the codes of CTPN and Caffe we ship. 37 | 38 | 2. Install the caffe we ship with codes bellow. 39 | * Install caffe's dependencies. You can follow [this tutorial](http://caffe.berkeleyvision.org/installation.html). *Note: we need Python support. The CUDA version we need is 7.0.* 40 | * Enter the directory `caffe`. 41 | * Run `cp Makefile.config.example Makefile.config`. 42 | * Open Makefile.config and set `WITH_PYTHON_LAYER := 1`. If you want to use CUDNN, please also set `CUDNN := 1`. Uncomment the `CPU_ONLY :=1` if you want to compile it without GPU. 43 | 44 | *Note: To use CUDNN, you need to download CUDNN from NVIDIA's official website, and install it in advance. The CUDNN version we use is 3.0.* 45 | * Run `make -j && make pycaffe`. 46 | 47 | 3. After Caffe is set up, you need to download a trained model (about 78M) from [Google Drive](https://drive.google.com/open?id=0B7c5Ix-XO7hqQWtKQ0lxTko4ZGs) or [our website](http://textdet.com/downloads/ctpn_trained_model.caffemodel), and then populate it into directory `models`. The model's name should be ` ctpn_trained_model.caffemodel`. 48 | 49 | 4. Now, be sure you are in the root directory of the codes. Run `make` to compile some cython files. 50 | 51 | 5. Run `python tools/demo.py` for a demo. Or `python tools/demo.py --no-gpu` to run it under CPU mode. 52 | 53 | # License 54 | The codes are released under the MIT License. 55 | -------------------------------------------------------------------------------- /src/other.py: -------------------------------------------------------------------------------- 1 | import cv2, caffe 2 | import numpy as np 3 | from matplotlib import cm 4 | 5 | 6 | def prepare_img(im, mean): 7 | """ 8 | transform img into caffe's input img. 9 | """ 10 | im_data=np.transpose(im-mean, (2, 0, 1)) 11 | return im_data 12 | 13 | 14 | def draw_boxes(im, bboxes, is_display=True, color=None, caption="Image", wait=True): 15 | """ 16 | boxes: bounding boxes 17 | """ 18 | im=im.copy() 19 | for box in bboxes: 20 | if color==None: 21 | if len(box)==5 or len(box)==9: 22 | c=tuple(cm.jet([box[-1]])[0, 2::-1]*255) 23 | else: 24 | c=tuple(np.random.randint(0, 256, 3)) 25 | else: 26 | c=color 27 | cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c) 28 | if is_display: 29 | cv2.imshow(caption, im) 30 | if wait: 31 | cv2.waitKey(0) 32 | return im 33 | 34 | 35 | def threshold(coords, min_, max_): 36 | return np.maximum(np.minimum(coords, max_), min_) 37 | 38 | 39 | def clip_boxes(boxes, im_shape): 40 | """ 41 | Clip boxes to image boundaries. 42 | """ 43 | boxes[:, 0::2]=threshold(boxes[:, 0::2], 0, im_shape[1]-1) 44 | boxes[:, 1::2]=threshold(boxes[:, 1::2], 0, im_shape[0]-1) 45 | return boxes 46 | 47 | 48 | def normalize(data): 49 | if data.shape[0]==0: 50 | return data 51 | max_=data.max() 52 | min_=data.min() 53 | return (data-min_)/(max_-min_) if max_-min_!=0 else data-min_ 54 | 55 | 56 | def resize_im(im, scale, max_scale=None): 57 | f=float(scale)/min(im.shape[0], im.shape[1]) 58 | if max_scale!=None and f*max(im.shape[0], im.shape[1])>max_scale: 59 | f=float(max_scale)/max(im.shape[0], im.shape[1]) 60 | return cv2.resize(im, (0, 0), fx=f, fy=f), f 61 | 62 | 63 | class Graph: 64 | def __init__(self, graph): 65 | self.graph=graph 66 | 67 | def sub_graphs_connected(self): 68 | sub_graphs=[] 69 | for index in xrange(self.graph.shape[0]): 70 | if not self.graph[:, index].any() and self.graph[index, :].any(): 71 | v=index 72 | sub_graphs.append([v]) 73 | while self.graph[v, :].any(): 74 | v=np.where(self.graph[v, :])[0][0] 75 | sub_graphs[-1].append(v) 76 | return sub_graphs 77 | 78 | 79 | class CaffeModel: 80 | def __init__(self, net_def_file, model_file): 81 | self.net_def_file=net_def_file 82 | self.net=caffe.Net(net_def_file, model_file, caffe.TEST) 83 | 84 | def blob(self, key): 85 | return self.net.blobs[key].data.copy() 86 | 87 | def forward(self, input_data): 88 | return self.forward2({"data": input_data[np.newaxis, :]}) 89 | 90 | def forward2(self, input_data): 91 | for k, v in input_data.items(): 92 | self.net.blobs[k].reshape(*v.shape) 93 | self.net.blobs[k].data[...]=v 94 | return self.net.forward() 95 | 96 | def net_def_file(self): 97 | return self.net_def_file 98 | -------------------------------------------------------------------------------- /src/text_proposal_graph_builder.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | import numpy as np 3 | from other import Graph 4 | 5 | 6 | class TextProposalGraphBuilder: 7 | """ 8 | Build Text proposals into a graph. 9 | """ 10 | def get_successions(self, index): 11 | box=self.text_proposals[index] 12 | results=[] 13 | for left in range(int(box[0])+1, min(int(box[0])+cfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])): 14 | adj_box_indices=self.boxes_table[left] 15 | for adj_box_index in adj_box_indices: 16 | if self.meet_v_iou(adj_box_index, index): 17 | results.append(adj_box_index) 18 | if len(results)!=0: 19 | return results 20 | return results 21 | 22 | def get_precursors(self, index): 23 | box=self.text_proposals[index] 24 | results=[] 25 | for left in range(int(box[0])-1, max(int(box[0]-cfg.MAX_HORIZONTAL_GAP), 0)-1, -1): 26 | adj_box_indices=self.boxes_table[left] 27 | for adj_box_index in adj_box_indices: 28 | if self.meet_v_iou(adj_box_index, index): 29 | results.append(adj_box_index) 30 | if len(results)!=0: 31 | return results 32 | return results 33 | 34 | def is_succession_node(self, index, succession_index): 35 | precursors=self.get_precursors(succession_index) 36 | if self.scores[index]>=np.max(self.scores[precursors]): 37 | return True 38 | return False 39 | 40 | def meet_v_iou(self, index1, index2): 41 | def overlaps_v(index1, index2): 42 | h1=self.heights[index1] 43 | h2=self.heights[index2] 44 | y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1]) 45 | y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3]) 46 | return max(0, y1-y0+1)/min(h1, h2) 47 | 48 | def size_similarity(index1, index2): 49 | h1=self.heights[index1] 50 | h2=self.heights[index2] 51 | return min(h1, h2)/max(h1, h2) 52 | 53 | return overlaps_v(index1, index2)>=cfg.MIN_V_OVERLAPS and \ 54 | size_similarity(index1, index2)>=cfg.MIN_SIZE_SIM 55 | 56 | def build_graph(self, text_proposals, scores, im_size): 57 | self.text_proposals=text_proposals 58 | self.scores=scores 59 | self.im_size=im_size 60 | self.heights=text_proposals[:, 3]-text_proposals[:, 1]+1 61 | 62 | boxes_table=[[] for _ in range(self.im_size[1])] 63 | for index, box in enumerate(text_proposals): 64 | boxes_table[int(box[0])].append(index) 65 | self.boxes_table=boxes_table 66 | 67 | graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool) 68 | 69 | for index, box in enumerate(text_proposals): 70 | successions=self.get_successions(index) 71 | if len(successions)==0: 72 | continue 73 | succession_index=successions[np.argmax(scores[successions])] 74 | if self.is_succession_node(index, succession_index): 75 | # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) 76 | # have equal scores. 77 | graph[index, succession_index]=True 78 | return Graph(graph) 79 | -------------------------------------------------------------------------------- /models/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | 3 | input: 'data' 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 600 8 | dim: 900 9 | } 10 | 11 | input: 'im_info' 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | layer { 18 | name: "conv1_1" 19 | type: "Convolution" 20 | bottom: "data" 21 | top: "conv1_1" 22 | param { 23 | lr_mult: 0 24 | decay_mult: 0 25 | } 26 | param { 27 | lr_mult: 0 28 | decay_mult: 0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | name: "relu1_1" 38 | type: "ReLU" 39 | bottom: "conv1_1" 40 | top: "conv1_1" 41 | } 42 | layer { 43 | name: "conv1_2" 44 | type: "Convolution" 45 | bottom: "conv1_1" 46 | top: "conv1_2" 47 | param { 48 | lr_mult: 0 49 | decay_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | pad: 1 58 | kernel_size: 3 59 | } 60 | } 61 | layer { 62 | name: "relu1_2" 63 | type: "ReLU" 64 | bottom: "conv1_2" 65 | top: "conv1_2" 66 | } 67 | layer { 68 | name: "pool1" 69 | type: "Pooling" 70 | bottom: "conv1_2" 71 | top: "pool1" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv2_1" 80 | type: "Convolution" 81 | bottom: "pool1" 82 | top: "conv2_1" 83 | param { 84 | lr_mult: 0 85 | decay_mult: 0 86 | } 87 | param { 88 | lr_mult: 0 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 128 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layer { 98 | name: "relu2_1" 99 | type: "ReLU" 100 | bottom: "conv2_1" 101 | top: "conv2_1" 102 | } 103 | layer { 104 | name: "conv2_2" 105 | type: "Convolution" 106 | bottom: "conv2_1" 107 | top: "conv2_2" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | } 150 | convolution_param { 151 | num_output: 256 152 | pad: 1 153 | kernel_size: 3 154 | } 155 | } 156 | layer { 157 | name: "relu3_1" 158 | type: "ReLU" 159 | bottom: "conv3_1" 160 | top: "conv3_1" 161 | } 162 | layer { 163 | name: "conv3_2" 164 | type: "Convolution" 165 | bottom: "conv3_1" 166 | top: "conv3_2" 167 | param { 168 | lr_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | } 173 | convolution_param { 174 | num_output: 256 175 | pad: 1 176 | kernel_size: 3 177 | } 178 | } 179 | layer { 180 | name: "relu3_2" 181 | type: "ReLU" 182 | bottom: "conv3_2" 183 | top: "conv3_2" 184 | } 185 | layer { 186 | name: "conv3_3" 187 | type: "Convolution" 188 | bottom: "conv3_2" 189 | top: "conv3_3" 190 | param { 191 | lr_mult: 1 192 | } 193 | param { 194 | lr_mult: 2 195 | } 196 | convolution_param { 197 | num_output: 256 198 | pad: 1 199 | kernel_size: 3 200 | } 201 | } 202 | layer { 203 | name: "relu3_3" 204 | type: "ReLU" 205 | bottom: "conv3_3" 206 | top: "conv3_3" 207 | } 208 | layer { 209 | name: "pool3" 210 | type: "Pooling" 211 | bottom: "conv3_3" 212 | top: "pool3" 213 | pooling_param { 214 | pool: MAX 215 | kernel_size: 2 216 | stride: 2 217 | } 218 | } 219 | layer { 220 | name: "conv4_1" 221 | type: "Convolution" 222 | bottom: "pool3" 223 | top: "conv4_1" 224 | param { 225 | lr_mult: 1 226 | } 227 | param { 228 | lr_mult: 2 229 | } 230 | convolution_param { 231 | num_output: 512 232 | pad: 1 233 | kernel_size: 3 234 | } 235 | } 236 | layer { 237 | name: "relu4_1" 238 | type: "ReLU" 239 | bottom: "conv4_1" 240 | top: "conv4_1" 241 | } 242 | layer { 243 | name: "conv4_2" 244 | type: "Convolution" 245 | bottom: "conv4_1" 246 | top: "conv4_2" 247 | param { 248 | lr_mult: 1 249 | } 250 | param { 251 | lr_mult: 2 252 | } 253 | convolution_param { 254 | num_output: 512 255 | pad: 1 256 | kernel_size: 3 257 | } 258 | } 259 | layer { 260 | name: "relu4_2" 261 | type: "ReLU" 262 | bottom: "conv4_2" 263 | top: "conv4_2" 264 | } 265 | layer { 266 | name: "conv4_3" 267 | type: "Convolution" 268 | bottom: "conv4_2" 269 | top: "conv4_3" 270 | param { 271 | lr_mult: 1 272 | } 273 | param { 274 | lr_mult: 2 275 | } 276 | convolution_param { 277 | num_output: 512 278 | pad: 1 279 | kernel_size: 3 280 | } 281 | } 282 | layer { 283 | name: "relu4_3" 284 | type: "ReLU" 285 | bottom: "conv4_3" 286 | top: "conv4_3" 287 | } 288 | layer { 289 | name: "pool4" 290 | type: "Pooling" 291 | bottom: "conv4_3" 292 | top: "pool4" 293 | pooling_param { 294 | pool: MAX 295 | kernel_size: 2 296 | stride: 2 297 | } 298 | } 299 | layer { 300 | name: "conv5_1" 301 | type: "Convolution" 302 | bottom: "pool4" 303 | top: "conv5_1" 304 | param { 305 | lr_mult: 1 306 | } 307 | param { 308 | lr_mult: 2 309 | } 310 | convolution_param { 311 | num_output: 512 312 | pad: 1 313 | kernel_size: 3 314 | } 315 | } 316 | layer { 317 | name: "relu5_1" 318 | type: "ReLU" 319 | bottom: "conv5_1" 320 | top: "conv5_1" 321 | } 322 | layer { 323 | name: "conv5_2" 324 | type: "Convolution" 325 | bottom: "conv5_1" 326 | top: "conv5_2" 327 | param { 328 | lr_mult: 1 329 | } 330 | param { 331 | lr_mult: 2 332 | } 333 | convolution_param { 334 | num_output: 512 335 | pad: 1 336 | kernel_size: 3 337 | } 338 | } 339 | layer { 340 | name: "relu5_2" 341 | type: "ReLU" 342 | bottom: "conv5_2" 343 | top: "conv5_2" 344 | } 345 | layer { 346 | name: "conv5_3" 347 | type: "Convolution" 348 | bottom: "conv5_2" 349 | top: "conv5_3" 350 | param { 351 | lr_mult: 1 352 | } 353 | param { 354 | lr_mult: 2 355 | } 356 | convolution_param { 357 | num_output: 512 358 | pad: 1 359 | kernel_size: 3 360 | } 361 | } 362 | layer { 363 | name: "relu5_3" 364 | type: "ReLU" 365 | bottom: "conv5_3" 366 | top: "conv5_3" 367 | } 368 | 369 | #========= RPN ============ 370 | 371 | # prepare lstm inputs 372 | layer { 373 | name: "im2col" 374 | bottom: "conv5_3" 375 | top: "im2col" 376 | type: "Im2col" 377 | convolution_param { 378 | pad: 1 379 | kernel_size: 3 380 | } 381 | } 382 | layer { 383 | name: "im2col_transpose" 384 | top: "im2col_transpose" 385 | bottom: "im2col" 386 | type: "Transpose" 387 | transpose_param { 388 | dim: 3 389 | dim: 2 390 | dim: 0 391 | dim: 1 392 | } 393 | } 394 | layer { 395 | name: "lstm_input" 396 | type: "Reshape" 397 | bottom: "im2col_transpose" 398 | top: "lstm_input" 399 | reshape_param { 400 | shape { dim: -1 } 401 | axis: 1 402 | num_axes: 2 403 | } 404 | } 405 | 406 | layer { 407 | name: "lstm" 408 | type: "Lstm" 409 | bottom: "lstm_input" 410 | top: "lstm" 411 | lstm_param { 412 | num_output: 128 413 | weight_filler { 414 | type: "gaussian" 415 | std: 0.01 416 | } 417 | bias_filler { 418 | type: "constant" 419 | } 420 | clipping_threshold: 1 421 | } 422 | } 423 | 424 | 425 | # ===================== rlstm =================== 426 | layer { 427 | name: "lstm-reverse1" 428 | type: "Reverse" 429 | bottom: "lstm_input" 430 | top: "rlstm_input" 431 | reverse_param { 432 | axis: 0 433 | } 434 | } 435 | layer { 436 | name: "rlstm" 437 | type: "Lstm" 438 | bottom: "rlstm_input" 439 | top: "rlstm-output" 440 | lstm_param { 441 | num_output: 128 442 | } 443 | } 444 | layer { 445 | name: "lstm-reverse2" 446 | type: "Reverse" 447 | bottom: "rlstm-output" 448 | top: "rlstm" 449 | reverse_param { 450 | axis: 0 451 | } 452 | } 453 | 454 | 455 | # merge lstm and rlstm 456 | layer { 457 | name: "merge_lstm_rlstm" 458 | type: "Concat" 459 | bottom: "lstm" 460 | bottom: "rlstm" 461 | top: "merge_lstm_rlstm" 462 | concat_param { 463 | axis: 2 464 | } 465 | } 466 | layer { 467 | name: "lstm_output_reshape" 468 | type: "Reshape" 469 | bottom: "merge_lstm_rlstm" 470 | top: "lstm_output_reshape" 471 | reshape_param { 472 | shape { dim: -1 dim: 1 } 473 | axis: 1 474 | num_axes: 1 475 | } 476 | } 477 | # transpose size of output as (N, C, H, W) 478 | layer { 479 | name: "lstm_output" 480 | type: "Transpose" 481 | bottom: "lstm_output_reshape" 482 | top: "lstm_output" 483 | transpose_param { 484 | dim: 2 485 | dim: 3 486 | dim: 1 487 | dim: 0 488 | } 489 | } 490 | layer { 491 | name: "fc" 492 | bottom: "lstm_output" 493 | top: "fc" 494 | type: "Convolution" 495 | convolution_param { 496 | num_output: 512 497 | kernel_size: 1 498 | } 499 | } 500 | layer { 501 | name: "relu_fc" 502 | type: "ReLU" 503 | bottom: "fc" 504 | top: "fc" 505 | } 506 | layer { 507 | name: "rpn_cls_score" 508 | type: "Convolution" 509 | bottom: "fc" 510 | top: "rpn_cls_score" 511 | param { lr_mult: 1.0 } 512 | param { lr_mult: 2.0 } 513 | convolution_param { 514 | num_output: 20 515 | kernel_size: 1 pad: 0 stride: 1 516 | } 517 | } 518 | layer { 519 | bottom: "rpn_cls_score" 520 | top: "rpn_cls_score_reshape" 521 | name: "rpn_cls_score_reshape" 522 | type: "Reshape" 523 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 524 | } 525 | layer { 526 | name: "rpn_bbox_pred" 527 | type: "Convolution" 528 | bottom: "fc" 529 | top: "rpn_bbox_pred" 530 | param { lr_mult: 1.0 } 531 | param { lr_mult: 2.0 } 532 | convolution_param { 533 | num_output: 20 534 | kernel_size: 1 pad: 0 stride: 1 535 | } 536 | } 537 | layer { 538 | name: "rpn_cls_prob" 539 | type: "Softmax" 540 | bottom: "rpn_cls_score_reshape" 541 | top: "rpn_cls_prob" 542 | } 543 | 544 | layer { 545 | name: 'rpn_cls_prob_reshape' 546 | type: 'Reshape' 547 | bottom: 'rpn_cls_prob' 548 | top: 'rpn_cls_prob_reshape' 549 | reshape_param { shape { dim: 0 dim: 20 dim: -1 dim: 0 } } 550 | } 551 | 552 | layer { 553 | name: 'proposal' 554 | type: 'Python' 555 | bottom: 'rpn_cls_prob_reshape' 556 | bottom: 'rpn_bbox_pred' 557 | bottom: 'im_info' 558 | top: 'rois' 559 | top: 'scores' 560 | python_param { 561 | module: 'layers.text_proposal_layer' 562 | layer: 'ProposalLayer' 563 | param_str: "'feat_stride': 16" 564 | } 565 | } 566 | --------------------------------------------------------------------------------