├── crnn.pytorch ├── models │ ├── __init__.py │ └── crnn.py ├── data │ └── demo.png ├── LICENSE.md ├── demo.py ├── README.md ├── tool │ ├── convert_t7.lua │ └── convert_t7.py ├── test │ └── test_utils.py ├── dataset.py ├── crnn_utils.py ├── keys.py └── crnn_main.py ├── CTPN ├── src │ ├── utils │ │ ├── __init__.py │ │ ├── timer.py │ │ └── cpu_nms.pyx │ ├── layers │ │ ├── __init__.py │ │ └── text_proposal_layer.py │ ├── text_proposal_connector.py │ ├── detectors.py │ ├── anchor.py │ ├── other.py │ └── text_proposal_graph_builder.py ├── demo_images │ ├── img_1.jpg │ ├── img_2.jpg │ └── img_3.jpg ├── Makefile ├── tools │ ├── cfg.py │ └── demo.py ├── LICENSE ├── README.md └── Dockerfile ├── IMG_1556.png ├── .gitignore ├── README.md ├── demo.py ├── crnnport.py ├── ctpnport.py └── models └── CTPN └── deploy.prototxt /crnn.pytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CTPN/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'zhitian' 2 | -------------------------------------------------------------------------------- /CTPN/src/layers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tianzhi' 2 | -------------------------------------------------------------------------------- /IMG_1556.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/IMG_1556.png -------------------------------------------------------------------------------- /CTPN/demo_images/img_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_1.jpg -------------------------------------------------------------------------------- /CTPN/demo_images/img_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_2.jpg -------------------------------------------------------------------------------- /CTPN/demo_images/img_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_3.jpg -------------------------------------------------------------------------------- /crnn.pytorch/data/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/crnn.pytorch/data/demo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pth 3 | *.pyc 4 | *.pyo 5 | *.log 6 | *.tmp 7 | *.so 8 | 9 | # specific file 10 | out.jpg 11 | *.caffemodel 12 | 13 | -------------------------------------------------------------------------------- /CTPN/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cython src/utils/cpu_nms.pyx 3 | gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing \ 4 | -I/usr/include/python2.7 -o src/utils/cpu_nms.so src/utils/cpu_nms.c 5 | rm -rf src/utils/cpu_nms.c 6 | -------------------------------------------------------------------------------- /CTPN/tools/cfg.py: -------------------------------------------------------------------------------- 1 | # MUST be imported firstly 2 | import sys 3 | import numpy as np 4 | 5 | class Config: 6 | MEAN=np.float32([102.9801, 115.9465, 122.7717]) 7 | TEST_GPU_ID=0 8 | SCALE=600 9 | MAX_SCALE=1000 10 | 11 | LINE_MIN_SCORE=0.7 12 | TEXT_PROPOSALS_MIN_SCORE=0.7 13 | TEXT_PROPOSALS_NMS_THRESH=0.3 14 | MAX_HORIZONTAL_GAP=50 15 | TEXT_LINE_NMS_THRESH=0.3 16 | MIN_NUM_PROPOSALS=2 17 | MIN_RATIO=1.2 18 | MIN_V_OVERLAPS=0.7 19 | MIN_SIZE_SIM=0.7 20 | TEXT_PROPOSALS_WIDTH=16 21 | 22 | def init(): 23 | sys.path.insert(0, "./tools") 24 | sys.path.insert(0, "./caffe/python") 25 | sys.path.insert(0, "./src") 26 | init() 27 | -------------------------------------------------------------------------------- /CTPN/src/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /crnn.pytorch/LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Jieru Mei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /crnn.pytorch/demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import utils 4 | import dataset 5 | from PIL import Image 6 | 7 | import models.crnn as crnn 8 | 9 | 10 | model_path = './data/crnn.pth' 11 | img_path = './data/demo.png' 12 | alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' 13 | 14 | model = crnn.CRNN(32, 1, 37, 256) 15 | if torch.cuda.is_available(): 16 | model = model.cuda() 17 | print('loading pretrained model from %s' % model_path) 18 | model.load_state_dict(torch.load(model_path)) 19 | 20 | converter = utils.strLabelConverter(alphabet) 21 | 22 | transformer = dataset.resizeNormalize((100, 32)) 23 | image = Image.open(img_path).convert('L') 24 | image = transformer(image) 25 | if torch.cuda.is_available(): 26 | image = image.cuda() 27 | image = image.view(1, *image.size()) 28 | image = Variable(image) 29 | 30 | model.eval() 31 | preds = model(image) 32 | 33 | _, preds = preds.max(2) 34 | preds = preds.transpose(1, 0).contiguous().view(-1) 35 | 36 | preds_size = Variable(torch.IntTensor([preds.size(0)])) 37 | raw_pred = converter.decode(preds.data, preds_size.data, raw=True) 38 | sim_pred = converter.decode(preds.data, preds_size.data, raw=False) 39 | print('%-20s => %-20s' % (raw_pred, sim_pred)) 40 | -------------------------------------------------------------------------------- /CTPN/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | ARE THIRD PARTY CODES ARE LICENSED TO YOU UNDER THEIR ORIGINAL LICENSE TERMS. 24 | -------------------------------------------------------------------------------- /crnn.pytorch/README.md: -------------------------------------------------------------------------------- 1 | Convolutional Recurrent Neural Network 2 | ====================================== 3 | 4 | This software implements the Convolutional Recurrent Neural Network (CRNN) in pytorch. 5 | Origin software could be found in [crnn](https://github.com/bgshih/crnn) 6 | 7 | Run demo 8 | -------- 9 | A demo program can be found in ``src/demo.py``. Before running the demo, download a pretrained model 10 | from [Baidu Netdisk](https://pan.baidu.com/s/1pLbeCND) or [Dropbox](https://www.dropbox.com/s/dboqjk20qjkpta3/crnn.pth?dl=0). 11 | This pretrained model is converted from auther offered one by ``tool``. 12 | Put the downloaded model file ``crnn.pth`` into directory ``data/``. Then launch the demo by: 13 | 14 | python demo.py 15 | 16 | The demo reads an example image and recognizes its text content. 17 | 18 | Example image: 19 | ![Example Image](./data/demo.png) 20 | 21 | Expected output: 22 | loading pretrained model from ./data/crnn.pth 23 | a-----v--a-i-l-a-bb-l-ee-- => available 24 | 25 | Dependence 26 | ---------- 27 | * [warp_ctc_pytorch](https://github.com/SeanNaren/warp-ctc/tree/pytorch_bindings/pytorch_binding) 28 | * lmdb 29 | 30 | Train a new model 31 | ----------------- 32 | 1. Construct dataset following origin guide. For training with variable length, please sort the image according to the text length. 33 | 2. ``python crnn_main.py [--param val]``. Explore ``crnn_main.py`` for details. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | scene-text-recog 2 | =================================== 3 | Scene character recognition combining of CTPN + CRNN. 4 | 5 | code based on [bear63/sceneReco commit-ae1731e](https://github.com/bear63/sceneReco/commit/ae1731e4a344359e1ff5a147ec8d3834afb16ba1) 6 | 7 | # CTPN: 8 | 9 | Caffe model in [frcnn](https://github.com/makefile/frcnn/tree/fpn/src/caffe/CTPN), adapt from [tianzhi0549/CTPN](https://github.com/tianzhi0549/CTPN) of old Caffe. 10 | 11 | # CRNN: 12 | 13 | Thsi repo uses Convolutional recurrent network in PyTorch: [meijieru/crnn.pytorch](https://github.com/meijieru/crnn.pytorch), while the original implementation [bgshih/crnn](https://github.com/bgshih/crnn) uses Torch. 14 | 15 | # trained models: 16 | 17 | 1. pure English set `alphabet` to English in `crnn.pytorch/keys.py`. 18 | CPU model [crnn.pth](https://www.dropbox.com/s/dboqjk20qjkpta3/crnn.pth?dl=0) 19 | 2. Chinese with English recognition : [netCRNN63.pth](https://drive.google.com/open?id=1R1tvM_HVo5eJLqnTDpxFgMANPRk4_QHB) for GPU, [netCRNNcpu.pth](https://drive.google.com/open?id=1p8yWQ3j3hHiRA9pBYmge542Y1xetcg1x) for CPU. 20 | 21 | copy [ctpn_trained_model.caffemodel](https://drive.google.com/open?id=0B7c5Ix-XO7hqQWtKQ0lxTko4ZGs) to ./models/CTPN 22 | 23 | copy CRNN model to ./models, CPU model can be used for both CPU/GPU. 24 | 25 | CTPN models are tested on Caffe 1.0 CUDA8.0, CRNN models are tested on PyTorch '0.4.0' and '0.3.1'. 26 | 27 | # Run demo 28 | 29 | python demo.py 30 | 31 | ![Example Image](./IMG_1556.png) 32 | -------------------------------------------------------------------------------- /CTPN/src/layers/text_proposal_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import yaml, caffe 3 | from other import clip_boxes 4 | from anchor import AnchorText 5 | 6 | 7 | class ProposalLayer(caffe.Layer): 8 | def setup(self, bottom, top): 9 | # parse the layer parameter string, which must be valid YAML 10 | layer_params = yaml.load(self.param_str) 11 | 12 | self._feat_stride = layer_params['feat_stride'] 13 | self.anchor_generator=AnchorText() 14 | self._num_anchors = self.anchor_generator.anchor_num 15 | 16 | top[0].reshape(1, 4) 17 | top[1].reshape(1, 1, 1, 1) 18 | 19 | def forward(self, bottom, top): 20 | assert bottom[0].data.shape[0]==1, \ 21 | 'Only single item batches are supported' 22 | 23 | scores = bottom[0].data[:, self._num_anchors:, :, :] 24 | 25 | bbox_deltas = bottom[1].data 26 | im_info = bottom[2].data[0, :] 27 | height, width = scores.shape[-2:] 28 | 29 | anchors=self.anchor_generator.locate_anchors((height, width), self._feat_stride) 30 | 31 | scores=scores.transpose((0, 2, 3, 1)).reshape(-1, 1) 32 | bbox_deltas=bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 2)) 33 | 34 | proposals=self.anchor_generator.apply_deltas_to_anchors(bbox_deltas, anchors) 35 | 36 | # clip the proposals in excess of the boundaries of the image 37 | proposals=clip_boxes(proposals, im_info[:2]) 38 | 39 | blob=proposals.astype(np.float32, copy=False) 40 | top[0].reshape(*(blob.shape)) 41 | top[0].data[...]=blob 42 | 43 | top[1].reshape(*(scores.shape)) 44 | top[1].data[...]=scores 45 | 46 | def backward(self, top, propagate_down, bottom): 47 | pass 48 | 49 | def reshape(self, bottom, top): 50 | pass 51 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import sys 3 | caffe_path = '/home/s02/fyk/frcnn' 4 | sys.path.insert(0, "%s/python"%caffe_path) 5 | import caffe # import first to avoid problems like "CUDNN_STATUS_BAD_PARAM" 6 | sys.path.insert(0, "./CTPN/tools") 7 | sys.path.insert(1, "./CTPN/src") 8 | sys.path.append("./crnn.pytorch") 9 | 10 | from ctpnport import CTPNDetector 11 | from crnnport import CRNNRecognizer 12 | import time 13 | import cv2 14 | 15 | use_gpu = False 16 | #use_gpu = True 17 | base_dir = './models/' 18 | demo_dir = '/home/s02/hgf/text-recog/sceneReco/test/' 19 | gpu_id = -1 20 | if use_gpu: gpu_id = 0 21 | # model_path = base_dir + 'netCRNN63.pth' 22 | #else: 23 | # CPU model can be used for both CPU/GPU 24 | model_path = base_dir + 'netCRNNcpu.pth' 25 | #model_path = base_dir + 'crnn.pth' 26 | # another one is crnn.pth 27 | 28 | NET_DEF_FILE = base_dir + "CTPN/deploy.prototxt" 29 | MODEL_FILE = base_dir + "CTPN/ctpn_trained_model.caffemodel" 30 | 31 | #ctpn 32 | ctpn_detector = CTPNDetector(NET_DEF_FILE, MODEL_FILE, caffe_path) 33 | #crnn 34 | crnn_recog = CRNNRecognizer(model_path) 35 | 36 | #timer=Timer() 37 | print "\ninput exit break\n" 38 | while 1 : 39 | #im_name = raw_input("\nplease input file name:") 40 | im_name = 'image_8.jpg' 41 | if im_name == "exit": 42 | break 43 | im_path = demo_dir + im_name 44 | im = cv2.imread(im_path) 45 | if im is None: 46 | continue 47 | #timer.tic() 48 | start = time.time() 49 | text_lines, resize_im, resize_ratio = ctpn_detector.getCharBlock(im, gpu_id) 50 | print 'boxes:',len(text_lines) 51 | text_recs = ctpn_detector.convert_bbox(text_lines) 52 | print text_recs 53 | texts = crnn_recog.crnnRec(resize_im,text_recs, use_gpu) 54 | print texts 55 | end = time.time() 56 | #print "Time: %f"%timer.toc() 57 | print "Time ms: %f"%(end - start) 58 | box_im, text_recs = ctpn_detector.draw_boxes8(resize_im,text_lines, is_display=False) 59 | cv2.imwrite("out.jpg", box_im) 60 | break 61 | #cv2.waitKey(0) 62 | 63 | 64 | -------------------------------------------------------------------------------- /CTPN/tools/demo.py: -------------------------------------------------------------------------------- 1 | # 2 | # The codes are used for implementing CTPN for scene text detection, described in: 3 | # 4 | # Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 5 | # Connectionist Text Proposal Network, ECCV, 2016. 6 | # 7 | # Online demo is available at: textdet.com 8 | # 9 | # These demo codes (with our trained model) are for text-line detection (without 10 | # side-refiement part). 11 | # 12 | # 13 | # ====== Copyright by Zhi Tian, Weilin Huang, Tong He, Pan He and Yu Qiao========== 14 | 15 | # Email: zhi.tian@siat.ac.cn; wl.huang@siat.ac.cn 16 | # 17 | # Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 18 | # 19 | # 20 | import cv2, os, caffe, sys 21 | from cfg import Config as cfg 22 | from other import draw_boxes, resize_im, CaffeModel 23 | 24 | from detectors import TextProposalDetector, TextDetector 25 | import os.path as osp 26 | from utils.timer import Timer 27 | 28 | DEMO_IMAGE_DIR="demo_images/" 29 | NET_DEF_FILE="models/deploy.prototxt" 30 | MODEL_FILE="models/ctpn_trained_model.caffemodel" 31 | 32 | if len(sys.argv)>1 and sys.argv[1]=="--no-gpu": 33 | caffe.set_mode_cpu() 34 | else: 35 | caffe.set_mode_gpu() 36 | caffe.set_device(cfg.TEST_GPU_ID) 37 | 38 | # initialize the detectors 39 | text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 40 | text_detector=TextDetector(text_proposals_detector) 41 | 42 | demo_imnames=os.listdir(DEMO_IMAGE_DIR) 43 | timer=Timer() 44 | 45 | for im_name in demo_imnames: 46 | print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 47 | print "Image: %s"%im_name 48 | 49 | im_file=osp.join(DEMO_IMAGE_DIR, im_name) 50 | im=cv2.imread(im_file) 51 | 52 | timer.tic() 53 | 54 | im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 55 | text_lines=text_detector.detect(im) 56 | 57 | print "Number of the detected text lines: %s"%len(text_lines) 58 | print "Time: %f"%timer.toc() 59 | 60 | im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, wait=False) 61 | 62 | print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 63 | print "Thank you for trying our demo. Press any key to exit..." 64 | cv2.waitKey(0) 65 | 66 | -------------------------------------------------------------------------------- /CTPN/src/utils/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /CTPN/src/text_proposal_connector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from other import clip_boxes 3 | from text_proposal_graph_builder import TextProposalGraphBuilder 4 | 5 | class TextProposalConnector: 6 | """ 7 | Connect text proposals into text lines 8 | """ 9 | def __init__(self): 10 | self.graph_builder=TextProposalGraphBuilder() 11 | 12 | def group_text_proposals(self, text_proposals, scores, im_size): 13 | graph=self.graph_builder.build_graph(text_proposals, scores, im_size) 14 | return graph.sub_graphs_connected() 15 | 16 | def fit_y(self, X, Y, x1, x2): 17 | len(X)!=0 18 | # if X only include one point, the function will get line y=Y[0] 19 | if np.sum(X==X[0])==len(X): 20 | return Y[0], Y[0] 21 | p=np.poly1d(np.polyfit(X, Y, 1)) 22 | return p(x1), p(x2) 23 | 24 | def get_text_lines(self, text_proposals, scores, im_size): 25 | # tp=text proposal 26 | tp_groups=self.group_text_proposals(text_proposals, scores, im_size) 27 | text_lines=np.zeros((len(tp_groups), 8), np.float32) 28 | 29 | for index, tp_indices in enumerate(tp_groups): 30 | text_line_boxes=text_proposals[list(tp_indices)] 31 | num = np.size(text_line_boxes) 32 | X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2 33 | Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2 34 | z1 = np.polyfit(X,Y,1) 35 | p1 = np.poly1d(z1) 36 | 37 | 38 | x0=np.min(text_line_boxes[:, 0]) 39 | x1=np.max(text_line_boxes[:, 2]) 40 | 41 | offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5 42 | 43 | lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) 44 | lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) 45 | 46 | # the score of a text line is the average score of the scores 47 | # of all text proposals contained in the text line 48 | score=scores[list(tp_indices)].sum()/float(len(tp_indices)) 49 | 50 | text_lines[index, 0]=x0 51 | text_lines[index, 1]=min(lt_y, rt_y) 52 | text_lines[index, 2]=x1 53 | text_lines[index, 3]=max(lb_y, rb_y) 54 | text_lines[index, 4]=score 55 | text_lines[index, 5]=z1[0] 56 | text_lines[index, 6]=z1[1] 57 | height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) ) 58 | text_lines[index, 7]= height + 2.5 59 | #text_lines=clip_boxes(text_lines, im_size) 60 | 61 | 62 | return text_lines 63 | -------------------------------------------------------------------------------- /CTPN/src/detectors.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | from other import prepare_img, normalize 3 | import numpy as np 4 | from utils.cpu_nms import cpu_nms as nms 5 | from text_proposal_connector import TextProposalConnector 6 | 7 | 8 | class TextProposalDetector: 9 | """ 10 | Detect text proposals in an image 11 | """ 12 | def __init__(self, caffe_model): 13 | self.caffe_model=caffe_model 14 | 15 | def detect(self, im, mean): 16 | im_data=prepare_img(im, mean) 17 | _=self.caffe_model.forward2({ 18 | "data": im_data[np.newaxis, :], 19 | "im_info": np.array([[im_data.shape[1], im_data.shape[2]]], np.float32) 20 | }) 21 | rois=self.caffe_model.blob("rois") 22 | scores=self.caffe_model.blob("scores") 23 | return rois, scores 24 | 25 | 26 | class TextDetector: 27 | """ 28 | Detect text from an image 29 | """ 30 | def __init__(self, text_proposal_detector): 31 | self.text_proposal_detector=text_proposal_detector 32 | self.text_proposal_connector=TextProposalConnector() 33 | 34 | def detect(self, im): 35 | """ 36 | Detecting texts from an image 37 | :return: the bounding boxes of the detected texts 38 | """ 39 | text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) 40 | keep_inds=np.where(scores>cfg.TEXT_PROPOSALS_MIN_SCORE)[0] 41 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 42 | 43 | sorted_indices=np.argsort(scores.ravel())[::-1] 44 | text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices] 45 | 46 | # nms for text proposals 47 | keep_inds=nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH) 48 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 49 | 50 | scores=normalize(scores) 51 | 52 | text_lines=self.text_proposal_connector.get_text_lines(text_proposals, scores, im.shape[:2]) 53 | 54 | keep_inds=self.filter_boxes(text_lines) 55 | text_lines=text_lines[keep_inds] 56 | 57 | # nms for text lines 58 | if text_lines.shape[0]!=0: 59 | keep_inds=nms(text_lines, cfg.TEXT_LINE_NMS_THRESH) 60 | text_lines=text_lines[keep_inds] 61 | 62 | return text_lines 63 | 64 | def filter_boxes(self, boxes): 65 | heights=boxes[:, 3]-boxes[:, 1]+1 66 | widths=boxes[:, 2]-boxes[:, 0]+1 67 | scores=boxes[:, -1] 68 | return np.where((widths/heights>cfg.MIN_RATIO) & (scores>cfg.LINE_MIN_SCORE) & 69 | (widths>(cfg.TEXT_PROPOSALS_WIDTH*cfg.MIN_NUM_PROPOSALS)))[0] 70 | -------------------------------------------------------------------------------- /CTPN/README.md: -------------------------------------------------------------------------------- 1 | # Detecting Text in Natural Image with Connectionist Text Proposal Network 2 | The codes are used for implementing CTPN for scene text detection, described in: 3 | 4 | Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 5 | Connectionist Text Proposal Network, ECCV, 2016. 6 | 7 | Online demo is available at: [textdet.com](http://textdet.com) 8 | 9 | These demo codes (with our trained model) are for text-line detection (without 10 | side-refiement part). 11 | 12 | # Required hardware 13 | You need a GPU. If you use CUDNN, about 1.5GB free memory is required. If you don't use CUDNN, you will need about 5GB free memory, and the testing time will slightly increase. Therefore, we strongly recommend to use CUDNN. 14 | 15 | It's also possible to run the program on CPU only, but it's extremely slow due to the non-optimal CPU implementation. 16 | # Required softwares 17 | Python2.7, cython and all what Caffe depends on. 18 | 19 | # How to run this code 20 | 21 | 1. Clone this repository with `git clone https://github.com/tianzhi0549/CTPN.git`. It will checkout the codes of CTPN and Caffe we ship. 22 | 23 | 2. Install the caffe we ship with codes bellow. 24 | * Install caffe's dependencies. You can follow [this tutorial](http://caffe.berkeleyvision.org/installation.html). *Note: we need Python support. The CUDA version we need is 7.0.* 25 | * Enter the directory `caffe`. 26 | * Run `cp Makefile.config.example Makefile.config`. 27 | * Open Makefile.config and set `WITH_PYTHON_LAYER := 1`. If you want to use CUDNN, please also set `CUDNN := 1`. Uncomment the `CPU_ONLY :=1` if you want to compile it without GPU. 28 | 29 | *Note: To use CUDNN, you need to download CUDNN from NVIDIA's official website, and install it in advance. The CUDNN version we use is 3.0.* 30 | * Run `make -j && make pycaffe`. 31 | 32 | 3. After Caffe is set up, you need to download a trained model (about 78M) from [Google Drive](https://drive.google.com/open?id=0B7c5Ix-XO7hqQWtKQ0lxTko4ZGs) or [our website](http://textdet.com/downloads/ctpn_trained_model.caffemodel), and then populate it into directory `models`. The model's name should be ` ctpn_trained_model.caffemodel`. 33 | 34 | 4. Now, be sure you are in the root directory of the codes. Run `make` to compile some cython files. 35 | 36 | 5. Run `python tools/demo.py` for a demo. Or `python tools/demo.py --no-gpu` to run it under CPU mode. 37 | 38 | # How to use other Caffe 39 | If you may want to use other Caffe instead of the one we ship for some reasons, you need to migrate the following layers into the Caffe. 40 | * Reverse 41 | * Transpose 42 | * Lstm 43 | 44 | # License 45 | The codes are released under the MIT License. 46 | -------------------------------------------------------------------------------- /CTPN/src/anchor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class AnchorText: 5 | def __init__(self): 6 | self.anchor_num=10 7 | 8 | def generate_basic_anchors(self, sizes, base_size=16): 9 | """ 10 | :param sizes: [(h1, w1), (h2, w2)...] 11 | :param base_size 12 | :return: 13 | """ 14 | assert(self.anchor_num==len(sizes)) 15 | base_anchor=np.array([0, 0, base_size-1, base_size-1], np.int32) 16 | anchors=np.zeros((len(sizes), 4), np.int32) 17 | index=0 18 | for h, w in sizes: 19 | anchors[index]=self.scale_anchor(base_anchor, h, w) 20 | index+=1 21 | return anchors 22 | 23 | def scale_anchor(self, anchor, h, w): 24 | x_ctr=(anchor[0]+anchor[2])*0.5 25 | y_ctr=(anchor[1]+anchor[3])*0.5 26 | scaled_anchor=anchor.copy() 27 | scaled_anchor[0]=x_ctr-w/2 28 | scaled_anchor[2]=x_ctr+w/2 29 | scaled_anchor[1]=y_ctr-h/2 30 | scaled_anchor[3]=y_ctr+h/2 31 | return scaled_anchor 32 | 33 | def apply_deltas_to_anchors(self, boxes_delta, anchors): 34 | """ 35 | :return [l t r b] 36 | """ 37 | anchor_y_ctr=(anchors[:, 1]+anchors[:, 3])/2. 38 | anchor_h=anchors[:, 3]-anchors[:, 1]+1. 39 | global_coords=np.zeros_like(boxes_delta, np.float32) 40 | global_coords[:, 1]=np.exp(boxes_delta[:, 1])*anchor_h 41 | global_coords[:, 0]=boxes_delta[:, 0]*anchor_h+anchor_y_ctr-global_coords[:, 1]/2. 42 | return np.hstack((anchors[:, [0]], global_coords[:, [0]], anchors[:, [2]], 43 | global_coords[:, [0]]+global_coords[:, [1]])).astype(np.float32) 44 | 45 | def basic_anchors(self): 46 | """ 47 | anchor [l t r b] 48 | """ 49 | heights=[11, 16, 23, 33, 48, 68, 97, 139, 198, 283] 50 | widths=[16] 51 | sizes=[] 52 | for h in heights: 53 | for w in widths: 54 | sizes.append((h, w)) 55 | return self.generate_basic_anchors(sizes) 56 | 57 | def locate_anchors(self, feat_map_size, feat_stride): 58 | """ 59 | return all anchors on the feature map 60 | """ 61 | basic_anchors_=self.basic_anchors() 62 | anchors=np.zeros((basic_anchors_.shape[0]*feat_map_size[0]*feat_map_size[1], 4), np.int32) 63 | index=0 64 | for y_ in range(feat_map_size[0]): 65 | for x_ in range(feat_map_size[1]): 66 | shift=np.array([x_, y_, x_, y_])*feat_stride 67 | anchors[index:index+basic_anchors_.shape[0], :]=basic_anchors_+shift 68 | index+=basic_anchors_.shape[0] 69 | return anchors 70 | -------------------------------------------------------------------------------- /crnn.pytorch/models/crnn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class BidirectionalLSTM(nn.Module): 5 | 6 | def __init__(self, nIn, nHidden, nOut): 7 | super(BidirectionalLSTM, self).__init__() 8 | 9 | self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) 10 | self.embedding = nn.Linear(nHidden * 2, nOut) 11 | 12 | def forward(self, input): 13 | recurrent, _ = self.rnn(input) 14 | T, b, h = recurrent.size() 15 | t_rec = recurrent.view(T * b, h) 16 | 17 | output = self.embedding(t_rec) # [T * b, nOut] 18 | output = output.view(T, b, -1) 19 | 20 | return output 21 | 22 | 23 | class CRNN(nn.Module): 24 | 25 | def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False): 26 | super(CRNN, self).__init__() 27 | assert imgH % 16 == 0, 'imgH has to be a multiple of 16' 28 | 29 | ks = [3, 3, 3, 3, 3, 3, 2] 30 | ps = [1, 1, 1, 1, 1, 1, 0] 31 | ss = [1, 1, 1, 1, 1, 1, 1] 32 | nm = [64, 128, 256, 256, 512, 512, 512] 33 | 34 | cnn = nn.Sequential() 35 | 36 | def convRelu(i, batchNormalization=False): 37 | nIn = nc if i == 0 else nm[i - 1] 38 | nOut = nm[i] 39 | cnn.add_module('conv{0}'.format(i), 40 | nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) 41 | if batchNormalization: 42 | cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) 43 | if leakyRelu: 44 | cnn.add_module('relu{0}'.format(i), 45 | nn.LeakyReLU(0.2, inplace=True)) 46 | else: 47 | cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) 48 | 49 | convRelu(0) 50 | cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 51 | convRelu(1) 52 | cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 53 | convRelu(2, True) 54 | convRelu(3) 55 | cnn.add_module('pooling{0}'.format(2), 56 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 57 | convRelu(4, True) 58 | convRelu(5) 59 | cnn.add_module('pooling{0}'.format(3), 60 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 61 | convRelu(6, True) # 512x1x16 62 | 63 | self.cnn = cnn 64 | self.rnn = nn.Sequential( 65 | BidirectionalLSTM(512, nh, nh), 66 | BidirectionalLSTM(nh, nh, nclass)) 67 | 68 | def forward(self, input): 69 | # conv features 70 | conv = self.cnn(input) 71 | b, c, h, w = conv.size() 72 | assert h == 1, "the height of conv must be 1" 73 | conv = conv.squeeze(2) 74 | conv = conv.permute(2, 0, 1) # [w, b, c] 75 | 76 | # rnn features 77 | output = self.rnn(conv) 78 | 79 | return output 80 | -------------------------------------------------------------------------------- /crnn.pytorch/tool/convert_t7.lua: -------------------------------------------------------------------------------- 1 | require('table') 2 | require('torch') 3 | require('os') 4 | 5 | function clone(t) 6 | -- deep-copy a table 7 | if type(t) ~= "table" then return t end 8 | local meta = getmetatable(t) 9 | local target = {} 10 | for k, v in pairs(t) do 11 | if type(v) == "table" then 12 | target[k] = clone(v) 13 | else 14 | target[k] = v 15 | end 16 | end 17 | setmetatable(target, meta) 18 | return target 19 | end 20 | 21 | 22 | function tableMerge(lhs, rhs) 23 | output = clone(lhs) 24 | for _, v in pairs(rhs) do 25 | table.insert(output, v) 26 | end 27 | return output 28 | end 29 | 30 | 31 | function isInTable(val, val_list) 32 | for _, item in pairs(val_list) do 33 | if val == item then 34 | return true 35 | end 36 | end 37 | return false 38 | end 39 | 40 | 41 | function modelToList(model) 42 | local ignoreList = { 43 | 'nn.Copy', 44 | 'nn.AddConstant', 45 | 'nn.MulConstant', 46 | 'nn.View', 47 | 'nn.Transpose', 48 | 'nn.SplitTable', 49 | 'nn.SharedParallelTable', 50 | 'nn.JoinTable', 51 | } 52 | local state = {} 53 | local param 54 | for i, layer in pairs(model.modules) do 55 | local typeName = torch.type(layer) 56 | if not isInTable(typeName, ignoreList) then 57 | if typeName == 'nn.Sequential' or typeName == 'nn.ConcatTable' then 58 | param = modelToList(layer) 59 | elseif typeName == 'cudnn.SpatialConvolution' or typeName == 'nn.SpatialConvolution' then 60 | param = layer:parameters() 61 | elseif typeName == 'cudnn.SpatialBatchNormalization' or typeName == 'nn.SpatialBatchNormalization' then 62 | param = layer:parameters() 63 | bn_vars = {layer.running_mean, layer.running_var} 64 | param = tableMerge(param, bn_vars) 65 | elseif typeName == 'nn.LstmLayer' then 66 | param = layer:parameters() 67 | elseif typeName == 'nn.BiRnnJoin' then 68 | param = layer:parameters() 69 | elseif typeName == 'cudnn.SpatialMaxPooling' or typeName == 'nn.SpatialMaxPooling' then 70 | param = {} 71 | elseif typeName == 'cudnn.ReLU' or typeName == 'nn.ReLU' then 72 | param = {} 73 | else 74 | print(string.format('Unknown class %s', typeName)) 75 | os.exit(0) 76 | end 77 | table.insert(state, {typeName, param}) 78 | else 79 | print(string.format('pass %s', typeName)) 80 | end 81 | end 82 | return state 83 | end 84 | 85 | 86 | function saveModel(model, output_path) 87 | local state = modelToList(model) 88 | torch.save(output_path, state) 89 | end 90 | -------------------------------------------------------------------------------- /CTPN/src/other.py: -------------------------------------------------------------------------------- 1 | import cv2, caffe 2 | import numpy as np 3 | from matplotlib import cm 4 | 5 | 6 | def prepare_img(im, mean): 7 | """ 8 | transform img into caffe's input img. 9 | """ 10 | im_data=np.transpose(im-mean, (2, 0, 1)) 11 | return im_data 12 | 13 | 14 | def draw_boxes(im, bboxes, is_display=True, color=None, caption="Image", wait=True): 15 | """ 16 | boxes: bounding boxes 17 | """ 18 | im=im.copy() 19 | for box in bboxes: 20 | if color==None: 21 | if len(box)==5 or len(box)==9: 22 | c=tuple(cm.jet([box[-1]])[0, 2::-1]*255) 23 | else: 24 | c=tuple(np.random.randint(0, 256, 3)) 25 | else: 26 | c=color 27 | cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c) 28 | if is_display: 29 | cv2.imshow(caption, im) 30 | if wait: 31 | cv2.waitKey(0) 32 | return im 33 | 34 | 35 | def threshold(coords, min_, max_): 36 | return np.maximum(np.minimum(coords, max_), min_) 37 | 38 | 39 | def clip_boxes(boxes, im_shape): 40 | """ 41 | Clip boxes to image boundaries. 42 | """ 43 | boxes[:, 0::2]=threshold(boxes[:, 0::2], 0, im_shape[1]-1) 44 | boxes[:, 1::2]=threshold(boxes[:, 1::2], 0, im_shape[0]-1) 45 | return boxes 46 | 47 | 48 | def normalize(data): 49 | if data.shape[0]==0: 50 | return data 51 | max_=data.max() 52 | min_=data.min() 53 | return (data-min_)/(max_-min_) if max_-min_!=0 else data-min_ 54 | 55 | 56 | def resize_im(im, scale, max_scale=None): 57 | f=float(scale)/min(im.shape[0], im.shape[1]) 58 | if max_scale!=None and f*max(im.shape[0], im.shape[1])>max_scale: 59 | f=float(max_scale)/max(im.shape[0], im.shape[1]) 60 | return cv2.resize(im, (0, 0), fx=f, fy=f), f 61 | 62 | 63 | class Graph: 64 | def __init__(self, graph): 65 | self.graph=graph 66 | 67 | def sub_graphs_connected(self): 68 | sub_graphs=[] 69 | for index in xrange(self.graph.shape[0]): 70 | if not self.graph[:, index].any() and self.graph[index, :].any(): 71 | v=index 72 | sub_graphs.append([v]) 73 | while self.graph[v, :].any(): 74 | v=np.where(self.graph[v, :])[0][0] 75 | sub_graphs[-1].append(v) 76 | return sub_graphs 77 | 78 | 79 | class CaffeModel: 80 | def __init__(self, net_def_file, model_file): 81 | self.net_def_file=net_def_file 82 | self.net=caffe.Net(net_def_file, model_file, caffe.TEST) 83 | 84 | def blob(self, key): 85 | return self.net.blobs[key].data.copy() 86 | 87 | def forward(self, input_data): 88 | return self.forward2({"data": input_data[np.newaxis, :]}) 89 | 90 | def forward2(self, input_data): 91 | for k, v in input_data.items(): 92 | self.net.blobs[k].reshape(*v.shape) 93 | self.net.blobs[k].data[...]=v 94 | return self.net.forward() 95 | 96 | def net_def_file(self): 97 | return self.net_def_file 98 | -------------------------------------------------------------------------------- /CTPN/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:7.0-runtime-ubuntu14.04 2 | MAINTAINER Varun Suresh 3 | 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | build-essential \ 6 | cmake \ 7 | git \ 8 | wget \ 9 | zip \ 10 | unzip \ 11 | libatlas-base-dev \ 12 | libboost-all-dev \ 13 | libgflags-dev \ 14 | libgoogle-glog-dev \ 15 | libhdf5-serial-dev \ 16 | libleveldb-dev \ 17 | liblmdb-dev \ 18 | libopencv-dev \ 19 | libprotobuf-dev \ 20 | libsnappy-dev \ 21 | protobuf-compiler \ 22 | python-dev \ 23 | python-numpy \ 24 | python-pip \ 25 | python-setuptools \ 26 | python-scipy && \ 27 | rm -rf /var/lib/apt/lists/* 28 | 29 | ENV CTPN_ROOT=/opt/ctpn 30 | WORKDIR $CTPN_ROOT 31 | 32 | RUN git clone --depth 1 https://github.com/tianzhi0549/CTPN.git 33 | WORKDIR $CTPN_ROOT/CTPN/caffe 34 | 35 | # Missing "packaging" package 36 | RUN pip install --upgrade pip 37 | RUN pip install packaging 38 | 39 | RUN cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. 40 | RUN git clone https://github.com/NVIDIA/nccl.git 41 | RUN apt-get update && apt-get install -y --no-install-recommends \ 42 | cuda=7.0-28 43 | WORKDIR / 44 | 45 | # Download the CUDA drivers from https://developer.nvidia.com/rdp/cudnn-archive and place it here : 46 | ADD cudnn-7.0-linux-x64-v3.0.8-prod.tgz / 47 | WORKDIR /cuda 48 | RUN cp -P include/cudnn.h /usr/include 49 | RUN cp -P lib64/libcudnn* /usr/lib/x86_64-linux-gnu/ 50 | 51 | WORKDIR $CTPN_ROOT/CTPN/caffe 52 | RUN cp Makefile.config.example Makefile.config 53 | RUN apt-get update && apt-get install -y --no-install-recommends \ 54 | vim 55 | RUN cd nccl && make -j install && cd .. && rm -rf nccl && \ 56 | mkdir build && cd build && \ 57 | cmake -DUSE_CUDNN=1 .. && \ 58 | WITH_PYTHON_LAYER=1 make -j"$(nproc)" && make pycaffe 59 | 60 | # Set the environment variables so that the paths are correctly configured 61 | ENV PYCAFFE_ROOT $CTPN_ROOT/CTPN/caffe/python 62 | ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH 63 | ENV PATH $CTPN_ROOT/CTPN/caffe/build/tools:$PYCAFFE_ROOT:$PATH 64 | RUN echo "$CTPN_ROOT/CTPN/caffe/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig 65 | 66 | # To make sure the python layer builds - Need to figure out a cleaner way to do this. 67 | RUN cp $CTPN_ROOT/CTPN/src/layers/* $CTPN_ROOT/CTPN/caffe/src/caffe/layers/ 68 | RUN cp $CTPN_ROOT/CTPN/src/*.py $CTPN_ROOT/CTPN/caffe/src/caffe/ 69 | RUN cp -r $CTPN_ROOT/CTPN/src/utils $CTPN_ROOT/CTPN/caffe/src/caffe/ 70 | 71 | # Install Opencv - 2.4.12 : 72 | 73 | RUN cd ~ && \ 74 | mkdir -p ocv-tmp && \ 75 | cd ocv-tmp && \ 76 | wget https://github.com/Itseez/opencv/archive/2.4.12.zip && \ 77 | unzip 2.4.12.zip && \ 78 | cd opencv-2.4.12 && \ 79 | mkdir release && \ 80 | cd release && \ 81 | cmake -D CMAKE_BUILD_TYPE=RELEASE \ 82 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 83 | -D BUILD_PYTHON_SUPPORT=ON \ 84 | .. && \ 85 | make -j8 && \ 86 | make install && \ 87 | rm -rf ~/ocv-tmp 88 | 89 | WORKDIR $CTPN_ROOT/CTPN 90 | RUN make 91 | -------------------------------------------------------------------------------- /CTPN/src/text_proposal_graph_builder.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | import numpy as np 3 | from other import Graph 4 | 5 | 6 | class TextProposalGraphBuilder: 7 | """ 8 | Build Text proposals into a graph. 9 | """ 10 | def get_successions(self, index): 11 | box=self.text_proposals[index] 12 | results=[] 13 | for left in range(int(box[0])+1, min(int(box[0])+cfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])): 14 | adj_box_indices=self.boxes_table[left] 15 | for adj_box_index in adj_box_indices: 16 | if self.meet_v_iou(adj_box_index, index): 17 | results.append(adj_box_index) 18 | if len(results)!=0: 19 | return results 20 | return results 21 | 22 | def get_precursors(self, index): 23 | box=self.text_proposals[index] 24 | results=[] 25 | for left in range(int(box[0])-1, max(int(box[0]-cfg.MAX_HORIZONTAL_GAP), 0)-1, -1): 26 | adj_box_indices=self.boxes_table[left] 27 | for adj_box_index in adj_box_indices: 28 | if self.meet_v_iou(adj_box_index, index): 29 | results.append(adj_box_index) 30 | if len(results)!=0: 31 | return results 32 | return results 33 | 34 | def is_succession_node(self, index, succession_index): 35 | precursors=self.get_precursors(succession_index) 36 | if self.scores[index]>=np.max(self.scores[precursors]): 37 | return True 38 | return False 39 | 40 | def meet_v_iou(self, index1, index2): 41 | def overlaps_v(index1, index2): 42 | h1=self.heights[index1] 43 | h2=self.heights[index2] 44 | y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1]) 45 | y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3]) 46 | return max(0, y1-y0+1)/min(h1, h2) 47 | 48 | def size_similarity(index1, index2): 49 | h1=self.heights[index1] 50 | h2=self.heights[index2] 51 | return min(h1, h2)/max(h1, h2) 52 | 53 | return overlaps_v(index1, index2)>=cfg.MIN_V_OVERLAPS and \ 54 | size_similarity(index1, index2)>=cfg.MIN_SIZE_SIM 55 | 56 | def build_graph(self, text_proposals, scores, im_size): 57 | self.text_proposals=text_proposals 58 | self.scores=scores 59 | self.im_size=im_size 60 | self.heights=text_proposals[:, 3]-text_proposals[:, 1]+1 61 | 62 | boxes_table=[[] for _ in range(self.im_size[1])] 63 | for index, box in enumerate(text_proposals): 64 | boxes_table[int(box[0])].append(index) 65 | self.boxes_table=boxes_table 66 | 67 | graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool) 68 | 69 | for index, box in enumerate(text_proposals): 70 | successions=self.get_successions(index) 71 | if len(successions)==0: 72 | continue 73 | succession_index=successions[np.argmax(scores[successions])] 74 | if self.is_succession_node(index, succession_index): 75 | # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) 76 | # have equal scores. 77 | graph[index, succession_index]=True 78 | return Graph(graph) 79 | -------------------------------------------------------------------------------- /crnn.pytorch/test/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import sys 5 | import unittest 6 | import torch 7 | from torch.autograd import Variable 8 | import collections 9 | origin_path = sys.path 10 | sys.path.append("..") 11 | import utils 12 | sys.path = origin_path 13 | 14 | 15 | def equal(a, b): 16 | if isinstance(a, torch.Tensor): 17 | return a.equal(b) 18 | elif isinstance(a, str): 19 | return a == b 20 | elif isinstance(a, collections.Iterable): 21 | res = True 22 | for (x, y) in zip(a, b): 23 | res = res & equal(x, y) 24 | return res 25 | else: 26 | return a == b 27 | 28 | 29 | class utilsTestCase(unittest.TestCase): 30 | 31 | def checkConverter(self): 32 | encoder = utils.strLabelConverter('abcdefghijklmnopqrstuvwxyz') 33 | 34 | # Encode 35 | # trivial mode 36 | result = encoder.encode('efa') 37 | target = (torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) 38 | self.assertTrue(equal(result, target)) 39 | 40 | # batch mode 41 | result = encoder.encode(['efa', 'ab']) 42 | target = (torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) 43 | self.assertTrue(equal(result, target)) 44 | 45 | # Decode 46 | # trivial mode 47 | result = encoder.decode( 48 | torch.IntTensor([5, 6, 1]), torch.IntTensor([3])) 49 | target = 'efa' 50 | self.assertTrue(equal(result, target)) 51 | 52 | # replicate mode 53 | result = encoder.decode( 54 | torch.IntTensor([5, 5, 0, 1]), torch.IntTensor([4])) 55 | target = 'ea' 56 | self.assertTrue(equal(result, target)) 57 | 58 | # raise AssertionError 59 | def f(): 60 | result = encoder.decode( 61 | torch.IntTensor([5, 5, 0, 1]), torch.IntTensor([3])) 62 | self.assertRaises(AssertionError, f) 63 | 64 | # batch mode 65 | result = encoder.decode( 66 | torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2])) 67 | target = ['efa', 'ab'] 68 | self.assertTrue(equal(result, target)) 69 | 70 | def checkOneHot(self): 71 | v = torch.LongTensor([1, 2, 1, 2, 0]) 72 | v_length = torch.LongTensor([2, 3]) 73 | v_onehot = utils.oneHot(v, v_length, 4) 74 | target = torch.FloatTensor([[[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]], 75 | [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]]) 76 | assert target.equal(v_onehot) 77 | 78 | def checkAverager(self): 79 | acc = utils.averager() 80 | acc.add(Variable(torch.Tensor([1, 2]))) 81 | acc.add(Variable(torch.Tensor([[5, 6]]))) 82 | assert acc.val() == 3.5 83 | 84 | acc = utils.averager() 85 | acc.add(torch.Tensor([1, 2])) 86 | acc.add(torch.Tensor([[5, 6]])) 87 | assert acc.val() == 3.5 88 | 89 | def checkAssureRatio(self): 90 | img = torch.Tensor([[1], [3]]).view(1, 1, 2, 1) 91 | img = Variable(img) 92 | img = utils.assureRatio(img) 93 | assert torch.Size([1, 1, 2, 2]) == img.size() 94 | 95 | 96 | def _suite(): 97 | suite = unittest.TestSuite() 98 | suite.addTest(utilsTestCase("checkConverter")) 99 | suite.addTest(utilsTestCase("checkOneHot")) 100 | suite.addTest(utilsTestCase("checkAverager")) 101 | suite.addTest(utilsTestCase("checkAssureRatio")) 102 | return suite 103 | 104 | 105 | if __name__ == "__main__": 106 | suite = _suite() 107 | runner = unittest.TextTestRunner() 108 | runner.run(suite) 109 | -------------------------------------------------------------------------------- /crnnport.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import random 4 | import torch 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import os 8 | import crnn_utils 9 | import dataset 10 | from PIL import Image 11 | import models.crnn as crnn 12 | import keys 13 | from math import * 14 | #import mahotas 15 | import cv2 16 | 17 | class CRNNRecognizer: 18 | 19 | def __init__(self, model_path): 20 | #def crnnSource(model_path, use_gpu=True): 21 | alphabet = keys.alphabet # Chinese words 22 | self.converter = crnn_utils.strLabelConverter(alphabet) 23 | # note that in https://github.com/bear63/sceneReco support multi GPU. 24 | # model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda() 25 | self.model = crnn.CRNN(32, 1, len(alphabet)+1, 256) 26 | self.cpu_model = crnn.CRNN(32, 1, len(alphabet)+1, 256) 27 | if torch.cuda.is_available(): 28 | self.model = self.model.cuda() 29 | print('loading pretrained model from %s' % model_path) 30 | #model_path = './crnn/samples/netCRNN63.pth' 31 | model_state_dict = torch.load(model_path) 32 | self.model.load_state_dict(model_state_dict) 33 | self.cpu_model.load_state_dict(model_state_dict) 34 | #self.use_gpu = use_gpu 35 | #return model,converter 36 | 37 | 38 | def crnnRec(self, im, text_recs, use_gpu=True): 39 | texts = [] 40 | index = 0 41 | for rec in text_recs: 42 | pt1 = (rec[0],rec[1]) 43 | pt2 = (rec[2],rec[3]) 44 | pt3 = (rec[6],rec[7]) 45 | pt4 = (rec[4],rec[5]) 46 | partImg = self.dumpRotateImage(im,degrees(atan2(pt2[1]-pt1[1],pt2[0]-pt1[0])),pt1,pt2,pt3,pt4) 47 | #mahotas.imsave('%s.jpg'%index, partImg) 48 | 49 | 50 | image = Image.fromarray(partImg).convert('L') 51 | #height,width,channel=partImg.shape[:3] 52 | #print(height,width,channel) 53 | #print(image.size) 54 | 55 | #image = Image.open('./img/t4.jpg').convert('L') 56 | scale = image.size[1]*1.0 / 32 57 | w = image.size[0] / scale 58 | w = int(w) 59 | #print(w) 60 | 61 | transformer = dataset.resizeNormalize((w, 32)) 62 | image = transformer(image) 63 | model = self.cpu_model 64 | if use_gpu and torch.cuda.is_available(): 65 | image = image.cuda() 66 | model = self.model 67 | 68 | image = image.view(1, *image.size()) 69 | image = Variable(image) 70 | model.eval() 71 | print(type(model),type(image)) 72 | preds = model(image) 73 | _, preds = preds.max(2) 74 | preds = preds.squeeze(0) 75 | preds = preds.transpose(1, 0).contiguous().view(-1) 76 | preds_size = Variable(torch.IntTensor([preds.size(0)])) 77 | raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True) 78 | sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) 79 | print('%-20s => %-20s' % (raw_pred, sim_pred)) 80 | #print(index) 81 | #print(sim_pred) 82 | index = index + 1 83 | texts.append(sim_pred) 84 | 85 | return texts 86 | 87 | def dumpRotateImage(self, img,degree,pt1,pt2,pt3,pt4): 88 | height,width=img.shape[:2] 89 | heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree)))) 90 | widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree)))) 91 | matRotation=cv2.getRotationMatrix2D((width/2,height/2),degree,1) 92 | matRotation[0, 2] += (widthNew - width) / 2 93 | matRotation[1, 2] += (heightNew - height) / 2 94 | imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) 95 | pt1 = list(pt1) 96 | pt3 = list(pt3) 97 | 98 | 99 | [[pt1[0]], [pt1[1]]] = np.dot(matRotation, np.array([[pt1[0]], [pt1[1]], [1]])) 100 | [[pt3[0]], [pt3[1]]] = np.dot(matRotation, np.array([[pt3[0]], [pt3[1]], [1]])) 101 | imgOut=imgRotation[int(pt1[1]):int(pt3[1]),int(pt1[0]):int(pt3[0])] 102 | height,width=imgOut.shape[:2] 103 | return imgOut 104 | 105 | -------------------------------------------------------------------------------- /crnn.pytorch/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import random 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torch.utils.data import sampler 8 | import torchvision.transforms as transforms 9 | import lmdb 10 | import six 11 | import sys 12 | from PIL import Image 13 | import numpy as np 14 | 15 | 16 | class lmdbDataset(Dataset): 17 | 18 | def __init__(self, root=None, transform=None, target_transform=None): 19 | self.env = lmdb.open( 20 | root, 21 | max_readers=1, 22 | readonly=True, 23 | lock=False, 24 | readahead=False, 25 | meminit=False) 26 | 27 | if not self.env: 28 | print('cannot creat lmdb from %s' % (root)) 29 | sys.exit(0) 30 | 31 | with self.env.begin(write=False) as txn: 32 | nSamples = int(txn.get('num-samples')) 33 | self.nSamples = nSamples 34 | 35 | self.transform = transform 36 | self.target_transform = target_transform 37 | 38 | def __len__(self): 39 | return self.nSamples 40 | 41 | def __getitem__(self, index): 42 | assert index <= len(self), 'index range error' 43 | index += 1 44 | with self.env.begin(write=False) as txn: 45 | img_key = 'image-%09d' % index 46 | imgbuf = txn.get(img_key) 47 | 48 | buf = six.BytesIO() 49 | buf.write(imgbuf) 50 | buf.seek(0) 51 | try: 52 | img = Image.open(buf).convert('L') 53 | except IOError: 54 | print('Corrupted image for %d' % index) 55 | return self[index + 1] 56 | 57 | if self.transform is not None: 58 | img = self.transform(img) 59 | 60 | label_key = 'label-%09d' % index 61 | label = str(txn.get(label_key)) 62 | 63 | if self.target_transform is not None: 64 | label = self.target_transform(label) 65 | 66 | return (img, label) 67 | 68 | 69 | class resizeNormalize(object): 70 | 71 | def __init__(self, size, interpolation=Image.BILINEAR): 72 | self.size = size 73 | self.interpolation = interpolation 74 | self.toTensor = transforms.ToTensor() 75 | 76 | def __call__(self, img): 77 | img = img.resize(self.size, self.interpolation) 78 | img = self.toTensor(img) 79 | img.sub_(0.5).div_(0.5) 80 | return img 81 | 82 | 83 | class randomSequentialSampler(sampler.Sampler): 84 | 85 | def __init__(self, data_source, batch_size): 86 | self.num_samples = len(data_source) 87 | self.batch_size = batch_size 88 | 89 | def __iter__(self): 90 | n_batch = len(self) // self.batch_size 91 | tail = len(self) % self.batch_size 92 | index = torch.LongTensor(len(self)).fill_(0) 93 | for i in range(n_batch): 94 | random_start = random.randint(0, len(self) - self.batch_size) 95 | batch_index = random_start + torch.range(0, self.batch_size - 1) 96 | index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index 97 | # deal with tail 98 | if tail: 99 | random_start = random.randint(0, len(self) - self.batch_size) 100 | tail_index = random_start + torch.range(0, tail - 1) 101 | index[(i + 1) * self.batch_size:] = tail_index 102 | 103 | return iter(index) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | 109 | class alignCollate(object): 110 | 111 | def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1): 112 | self.imgH = imgH 113 | self.imgW = imgW 114 | self.keep_ratio = keep_ratio 115 | self.min_ratio = min_ratio 116 | 117 | def __call__(self, batch): 118 | images, labels = zip(*batch) 119 | 120 | imgH = self.imgH 121 | imgW = self.imgW 122 | if self.keep_ratio: 123 | ratios = [] 124 | for image in images: 125 | w, h = image.size 126 | ratios.append(w / float(h)) 127 | ratios.sort() 128 | max_ratio = ratios[-1] 129 | imgW = int(np.floor(max_ratio * imgH)) 130 | imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW 131 | 132 | transform = resizeNormalize((imgW, imgH)) 133 | images = [transform(image) for image in images] 134 | images = torch.cat([t.unsqueeze(0) for t in images], 0) 135 | 136 | return images, labels 137 | -------------------------------------------------------------------------------- /crnn.pytorch/crnn_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | import collections 8 | 9 | 10 | class strLabelConverter(object): 11 | """Convert between str and label. 12 | 13 | NOTE: 14 | Insert `blank` to the alphabet for CTC. 15 | 16 | Args: 17 | alphabet (str): set of the possible characters. 18 | ignore_case (bool, default=True): whether or not to ignore all of the case. 19 | """ 20 | 21 | def __init__(self, alphabet, ignore_case=True): 22 | self._ignore_case = ignore_case 23 | if self._ignore_case: 24 | alphabet = alphabet.lower() 25 | self.alphabet = alphabet + '-' # for `-1` index 26 | 27 | self.dict = {} 28 | for i, char in enumerate(alphabet): 29 | # NOTE: 0 is reserved for 'blank' required by wrap_ctc 30 | self.dict[char] = i + 1 31 | 32 | def encode(self, text): 33 | """Support batch or single str. 34 | 35 | Args: 36 | text (str or list of str): texts to convert. 37 | 38 | Returns: 39 | torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. 40 | torch.IntTensor [n]: length of each text. 41 | """ 42 | if isinstance(text, str): 43 | text = [ 44 | self.dict[char.lower() if self._ignore_case else char] 45 | for char in text 46 | ] 47 | length = [len(text)] 48 | elif isinstance(text, collections.Iterable): 49 | length = [len(s) for s in text] 50 | text = ''.join(text) 51 | text, _ = self.encode(text) 52 | return (torch.IntTensor(text), torch.IntTensor(length)) 53 | 54 | def decode(self, t, length, raw=False): 55 | """Decode encoded texts back into strs. 56 | 57 | Args: 58 | torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. 59 | torch.IntTensor [n]: length of each text. 60 | 61 | Raises: 62 | AssertionError: when the texts and its length does not match. 63 | 64 | Returns: 65 | text (str or list of str): texts to convert. 66 | """ 67 | if length.numel() == 1: 68 | length = length[0] 69 | assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length) 70 | if raw: 71 | return ''.join([self.alphabet[i - 1] for i in t]) 72 | else: 73 | char_list = [] 74 | for i in range(length): 75 | if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): 76 | char_list.append(self.alphabet[t[i] - 1]) 77 | return ''.join(char_list) 78 | else: 79 | # batch mode 80 | assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum()) 81 | texts = [] 82 | index = 0 83 | for i in range(length.numel()): 84 | l = length[i] 85 | texts.append( 86 | self.decode( 87 | t[index:index + l], torch.IntTensor([l]), raw=raw)) 88 | index += l 89 | return texts 90 | 91 | 92 | class averager(object): 93 | """Compute average for `torch.Variable` and `torch.Tensor`. """ 94 | 95 | def __init__(self): 96 | self.reset() 97 | 98 | def add(self, v): 99 | if isinstance(v, Variable): 100 | count = v.data.numel() 101 | v = v.data.sum() 102 | elif isinstance(v, torch.Tensor): 103 | count = v.numel() 104 | v = v.sum() 105 | 106 | self.n_count += count 107 | self.sum += v 108 | 109 | def reset(self): 110 | self.n_count = 0 111 | self.sum = 0 112 | 113 | def val(self): 114 | res = 0 115 | if self.n_count != 0: 116 | res = self.sum / float(self.n_count) 117 | return res 118 | 119 | 120 | def oneHot(v, v_length, nc): 121 | batchSize = v_length.size(0) 122 | maxLength = v_length.max() 123 | v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0) 124 | acc = 0 125 | for i in range(batchSize): 126 | length = v_length[i] 127 | label = v[acc:acc + length].view(-1, 1).long() 128 | v_onehot[i, :length].scatter_(1, label, 1.0) 129 | acc += length 130 | return v_onehot 131 | 132 | 133 | def loadData(v, data): 134 | v.data.resize_(data.size()).copy_(data) 135 | 136 | 137 | def prettyPrint(v): 138 | print('Size {0}, Type: {1}'.format(str(v.size()), v.data.type())) 139 | print('| Max: %f | Min: %f | Mean: %f' % (v.max().data[0], v.min().data[0], 140 | v.mean().data[0])) 141 | 142 | 143 | def assureRatio(img): 144 | """Ensure imgH <= imgW.""" 145 | b, c, h, w = img.size() 146 | if h > w: 147 | main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None) 148 | img = main(img) 149 | return img 150 | -------------------------------------------------------------------------------- /crnn.pytorch/keys.py: -------------------------------------------------------------------------------- 1 | #coding:UTF-8 2 | alphabet = u'\'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗' 3 | -------------------------------------------------------------------------------- /crnn.pytorch/tool/convert_t7.py: -------------------------------------------------------------------------------- 1 | import torchfile 2 | import argparse 3 | import torch 4 | from torch.nn.parameter import Parameter 5 | import numpy as np 6 | import models.crnn as crnn 7 | 8 | 9 | layer_map = { 10 | 'SpatialConvolution': 'Conv2d', 11 | 'SpatialBatchNormalization': 'BatchNorm2d', 12 | 'ReLU': 'ReLU', 13 | 'SpatialMaxPooling': 'MaxPool2d', 14 | 'SpatialAveragePooling': 'AvgPool2d', 15 | 'SpatialUpSamplingNearest': 'UpsamplingNearest2d', 16 | 'View': None, 17 | 'Linear': 'linear', 18 | 'Dropout': 'Dropout', 19 | 'SoftMax': 'Softmax', 20 | 'Identity': None, 21 | 'SpatialFullConvolution': 'ConvTranspose2d', 22 | 'SpatialReplicationPadding': None, 23 | 'SpatialReflectionPadding': None, 24 | 'Copy': None, 25 | 'Narrow': None, 26 | 'SpatialCrossMapLRN': None, 27 | 'Sequential': None, 28 | 'ConcatTable': None, # output is list 29 | 'CAddTable': None, # input is list 30 | 'Concat': None, 31 | 'TorchObject': None, 32 | 'LstmLayer': 'LSTM', 33 | 'BiRnnJoin': 'Linear' 34 | } 35 | 36 | 37 | def torch_layer_serial(layer, layers): 38 | name = layer[0] 39 | if name == 'nn.Sequential' or name == 'nn.ConcatTable': 40 | tmp_layers = [] 41 | for sub_layer in layer[1]: 42 | torch_layer_serial(sub_layer, tmp_layers) 43 | layers.extend(tmp_layers) 44 | else: 45 | layers.append(layer) 46 | 47 | 48 | def py_layer_serial(layer, layers): 49 | """ 50 | Assume modules are defined as executive sequence. 51 | """ 52 | if len(layer._modules) >= 1: 53 | tmp_layers = [] 54 | for sub_layer in layer.children(): 55 | py_layer_serial(sub_layer, tmp_layers) 56 | layers.extend(tmp_layers) 57 | else: 58 | layers.append(layer) 59 | 60 | 61 | def trans_pos(param, part_indexes, dim=0): 62 | parts = np.split(param, len(part_indexes), dim) 63 | new_parts = [] 64 | for i in part_indexes: 65 | new_parts.append(parts[i]) 66 | return np.concatenate(new_parts, dim) 67 | 68 | 69 | def load_params(py_layer, t7_layer): 70 | if type(py_layer).__name__ == 'LSTM': 71 | # LSTM 72 | all_weights = [] 73 | num_directions = 2 if py_layer.bidirectional else 1 74 | for i in range(py_layer.num_layers): 75 | for j in range(num_directions): 76 | suffix = '_reverse' if j == 1 else '' 77 | weights = ['weight_ih_l{}{}', 'bias_ih_l{}{}', 78 | 'weight_hh_l{}{}', 'bias_hh_l{}{}'] 79 | weights = [x.format(i, suffix) for x in weights] 80 | all_weights += weights 81 | 82 | params = [] 83 | for i in range(len(t7_layer)): 84 | params.extend(t7_layer[i][1]) 85 | params = [trans_pos(p, [0, 1, 3, 2], dim=0) for p in params] 86 | else: 87 | all_weights = [] 88 | name = t7_layer[0].split('.')[-1] 89 | if name == 'BiRnnJoin': 90 | weight_0, bias_0, weight_1, bias_1 = t7_layer[1] 91 | weight = np.concatenate((weight_0, weight_1), axis=1) 92 | bias = bias_0 + bias_1 93 | t7_layer[1] = [weight, bias] 94 | all_weights += ['weight', 'bias'] 95 | elif name == 'SpatialConvolution' or name == 'Linear': 96 | all_weights += ['weight', 'bias'] 97 | elif name == 'SpatialBatchNormalization': 98 | all_weights += ['weight', 'bias', 'running_mean', 'running_var'] 99 | 100 | params = t7_layer[1] 101 | 102 | params = [torch.from_numpy(item) for item in params] 103 | assert len(all_weights) == len(params), "params' number not match" 104 | for py_param_name, t7_param in zip(all_weights, params): 105 | item = getattr(py_layer, py_param_name) 106 | if isinstance(item, Parameter): 107 | item = item.data 108 | try: 109 | item.copy_(t7_param) 110 | except RuntimeError: 111 | print('Size not match between %s and %s' % 112 | (item.size(), t7_param.size())) 113 | 114 | 115 | def torch_to_pytorch(model, t7_file, output): 116 | py_layers = [] 117 | for layer in list(model.children()): 118 | py_layer_serial(layer, py_layers) 119 | 120 | t7_data = torchfile.load(t7_file) 121 | t7_layers = [] 122 | for layer in t7_data: 123 | torch_layer_serial(layer, t7_layers) 124 | 125 | j = 0 126 | for i, py_layer in enumerate(py_layers): 127 | py_name = type(py_layer).__name__ 128 | t7_layer = t7_layers[j] 129 | t7_name = t7_layer[0].split('.')[-1] 130 | if layer_map[t7_name] != py_name: 131 | raise RuntimeError('%s does not match %s' % (py_name, t7_name)) 132 | 133 | if py_name == 'LSTM': 134 | n_layer = 2 if py_layer.bidirectional else 1 135 | n_layer *= py_layer.num_layers 136 | t7_layer = t7_layers[j:j + n_layer] 137 | j += n_layer 138 | else: 139 | j += 1 140 | 141 | load_params(py_layer, t7_layer) 142 | 143 | torch.save(model.state_dict(), output) 144 | 145 | 146 | if __name__ == "__main__": 147 | parser = argparse.ArgumentParser( 148 | description='Convert torch t7 model to pytorch' 149 | ) 150 | parser.add_argument( 151 | '--model_file', 152 | '-m', 153 | type=str, 154 | required=True, 155 | help='torch model file in t7 format' 156 | ) 157 | parser.add_argument( 158 | '--output', 159 | '-o', 160 | type=str, 161 | default=None, 162 | help='output file name prefix, xxx.py xxx.pth' 163 | ) 164 | args = parser.parse_args() 165 | 166 | py_model = crnn.CRNN(32, 1, 37, 256, 1) 167 | torch_to_pytorch(py_model, args.model_file, args.output) 168 | -------------------------------------------------------------------------------- /ctpnport.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import sys 3 | import numpy as np 4 | from matplotlib import cm 5 | import cv2 6 | 7 | class cfg: 8 | MEAN=np.float32([102.9801, 115.9465, 122.7717]) 9 | TEST_GPU_ID=0 10 | SCALE=600 11 | MAX_SCALE=1000 12 | 13 | LINE_MIN_SCORE=0.7 14 | TEXT_PROPOSALS_MIN_SCORE=0.7 15 | TEXT_PROPOSALS_NMS_THRESH=0.3 16 | MAX_HORIZONTAL_GAP=50 17 | TEXT_LINE_NMS_THRESH=0.3 18 | MIN_NUM_PROPOSALS=2 19 | MIN_RATIO=1.2 20 | MIN_V_OVERLAPS=0.7 21 | MIN_SIZE_SIM=0.7 22 | TEXT_PROPOSALS_WIDTH=16 23 | 24 | #sys.path.insert(0, "./CTPN/tools") 25 | #sys.path.insert(0, "./CTPN/src") 26 | #import os.path as osp 27 | #from utils.timer import Timer 28 | 29 | class CTPNDetector: 30 | 31 | def __init__(self, NET_DEF_FILE, MODEL_FILE, caffe_path): 32 | sys.path.insert(0, "%s/python"%caffe_path) 33 | import caffe 34 | from other import draw_boxes, resize_im, CaffeModel 35 | from detectors import TextProposalDetector, TextDetector 36 | sys.path.remove("%s/python"%caffe_path) 37 | #def ctpnSource(NET_DEF_FILE, MODEL_FILE, use_gpu): 38 | #NET_DEF_FILE = "CTPN/models/deploy.prototxt" 39 | #MODEL_FILE = "CTPN/models/ctpn_trained_model.caffemodel" 40 | self.caffe = caffe 41 | #if use_gpu: 42 | # caffe.set_mode_gpu() 43 | # caffe.set_device(cfg.TEST_GPU_ID) 44 | #else: 45 | # caffe.set_mode_cpu() 46 | 47 | # initialize the detectors 48 | text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 49 | self.text_detector = TextDetector(text_proposals_detector) 50 | self.resize_im = resize_im 51 | self.draw_boxes = draw_boxes 52 | #return text_detector 53 | 54 | def getCharBlock(self, im, gpu_id=0): 55 | if gpu_id < 0: 56 | self.caffe.set_mode_cpu() 57 | else: 58 | self.caffe.set_mode_gpu() 59 | self.caffe.set_device(gpu_id) 60 | 61 | resize_im, resize_ratio = self.resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 62 | #print "resize", f 63 | #cv2.imshow("src", im) 64 | tmp = resize_im.copy() 65 | #timer=Timer() 66 | #timer.tic() 67 | text_lines = self.text_detector.detect(tmp) 68 | 69 | #print "Number of the detected text lines: %s"%len(text_lines) 70 | #print "Time: %f"%timer.toc() 71 | return text_lines, resize_im, resize_ratio 72 | 73 | # this is deprecated 74 | def convert_bbox(self, bboxes): 75 | text_recs = np.zeros((len(bboxes), 8), np.int) 76 | index = 0 77 | for box in bboxes: 78 | b1 = box[6] - box[7] / 2 79 | b2 = box[6] + box[7] / 2 80 | x1 = box[0] 81 | y1 = box[5] * box[0] + b1 82 | x2 = box[2] 83 | y2 = box[5] * box[2] + b1 84 | x3 = box[0] 85 | y3 = box[5] * box[0] + b2 86 | x4 = box[2] 87 | y4 = box[5] * box[2] + b2 88 | 89 | disX = x2 - x1 90 | disY = y2 - y1 91 | width = np.sqrt(disX*disX + disY*disY) 92 | fTmp0 = y3 - y1 93 | fTmp1 = fTmp0 * disY / width 94 | x = np.fabs(fTmp1*disX / width) 95 | y = np.fabs(fTmp1*disY / width) 96 | if box[5] < 0: 97 | x1 -= x 98 | y1 += y 99 | x4 += x 100 | y4 -= y 101 | else: 102 | x2 += x 103 | y2 += y 104 | x3 -= x 105 | y3 -= y 106 | text_recs[index, 0] = x1 107 | text_recs[index, 1] = y1 108 | text_recs[index, 2] = x2 109 | text_recs[index, 3] = y2 110 | text_recs[index, 4] = x3 111 | text_recs[index, 5] = y3 112 | text_recs[index, 6] = x4 113 | text_recs[index, 7] = y4 114 | index = index + 1 115 | return text_recs 116 | def draw_boxes8(self, im, bboxes, is_display=True, color=None, caption="Image", wait=True): 117 | """ 118 | boxes: bounding boxes 119 | """ 120 | text_recs=np.zeros((len(bboxes), 8), np.int) 121 | 122 | im=im.copy() 123 | index = 0 124 | for box in bboxes: 125 | if color==None: 126 | if len(box)==8 or len(box)==9: 127 | c=tuple(cm.jet([box[-1]])[0, 2::-1]*255) 128 | else: 129 | c=tuple(np.random.randint(0, 256, 3)) 130 | else: 131 | c=color 132 | 133 | b1 = box[6] - box[7] / 2 134 | b2 = box[6] + box[7] / 2 135 | x1 = box[0] 136 | y1 = box[5] * box[0] + b1 137 | x2 = box[2] 138 | y2 = box[5] * box[2] + b1 139 | x3 = box[0] 140 | y3 = box[5] * box[0] + b2 141 | x4 = box[2] 142 | y4 = box[5] * box[2] + b2 143 | 144 | disX = x2 - x1 145 | disY = y2 - y1 146 | width = np.sqrt(disX*disX + disY*disY) 147 | fTmp0 = y3 - y1 148 | fTmp1 = fTmp0 * disY / width 149 | x = np.fabs(fTmp1*disX / width) 150 | y = np.fabs(fTmp1*disY / width) 151 | if box[5] < 0: 152 | x1 -= x 153 | y1 += y 154 | x4 += x 155 | y4 -= y 156 | else: 157 | x2 += x 158 | y2 += y 159 | x3 -= x 160 | y3 -= y 161 | cv2.line(im,(int(x1),int(y1)),(int(x2),int(y2)),c,2) 162 | cv2.line(im,(int(x1),int(y1)),(int(x3),int(y3)),c,2) 163 | cv2.line(im,(int(x4),int(y4)),(int(x2),int(y2)),c,2) 164 | cv2.line(im,(int(x3),int(y3)),(int(x4),int(y4)),c,2) 165 | text_recs[index, 0] = x1 166 | text_recs[index, 1] = y1 167 | text_recs[index, 2] = x2 168 | text_recs[index, 3] = y2 169 | text_recs[index, 4] = x3 170 | text_recs[index, 5] = y3 171 | text_recs[index, 6] = x4 172 | text_recs[index, 7] = y4 173 | index = index + 1 174 | #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2) 175 | if is_display: 176 | cv2.imshow('result', im) 177 | #if wait: 178 | #cv2.waitKey(0) 179 | return im, text_recs 180 | -------------------------------------------------------------------------------- /crnn.pytorch/crnn_main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import random 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | import torch.optim as optim 7 | import torch.utils.data 8 | from torch.autograd import Variable 9 | import numpy as np 10 | from warpctc_pytorch import CTCLoss 11 | import os 12 | import utils 13 | import dataset 14 | 15 | import models.crnn as crnn 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--trainroot', required=True, help='path to dataset') 19 | parser.add_argument('--valroot', required=True, help='path to dataset') 20 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=2) 21 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size') 22 | parser.add_argument('--imgH', type=int, default=32, help='the height of the input image to network') 23 | parser.add_argument('--imgW', type=int, default=100, help='the width of the input image to network') 24 | parser.add_argument('--nh', type=int, default=256, help='size of the lstm hidden state') 25 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') 26 | parser.add_argument('--lr', type=float, default=0.01, help='learning rate for Critic, default=0.00005') 27 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') 28 | parser.add_argument('--cuda', action='store_true', help='enables cuda') 29 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use') 30 | parser.add_argument('--crnn', default='', help="path to crnn (to continue training)") 31 | parser.add_argument('--alphabet', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz') 32 | parser.add_argument('--experiment', default=None, help='Where to store samples and models') 33 | parser.add_argument('--displayInterval', type=int, default=500, help='Interval to be displayed') 34 | parser.add_argument('--n_test_disp', type=int, default=10, help='Number of samples to display when test') 35 | parser.add_argument('--valInterval', type=int, default=500, help='Interval to be displayed') 36 | parser.add_argument('--saveInterval', type=int, default=500, help='Interval to be displayed') 37 | parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)') 38 | parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)') 39 | parser.add_argument('--keep_ratio', action='store_true', help='whether to keep ratio for image resize') 40 | parser.add_argument('--random_sample', action='store_true', help='whether to sample the dataset with random sampler') 41 | opt = parser.parse_args() 42 | print(opt) 43 | 44 | if opt.experiment is None: 45 | opt.experiment = 'expr' 46 | os.system('mkdir {0}'.format(opt.experiment)) 47 | 48 | opt.manualSeed = random.randint(1, 10000) # fix seed 49 | print("Random Seed: ", opt.manualSeed) 50 | random.seed(opt.manualSeed) 51 | np.random.seed(opt.manualSeed) 52 | torch.manual_seed(opt.manualSeed) 53 | 54 | cudnn.benchmark = True 55 | 56 | if torch.cuda.is_available() and not opt.cuda: 57 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 58 | 59 | train_dataset = dataset.lmdbDataset(root=opt.trainroot) 60 | assert train_dataset 61 | if not opt.random_sample: 62 | sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) 63 | else: 64 | sampler = None 65 | train_loader = torch.utils.data.DataLoader( 66 | train_dataset, batch_size=opt.batchSize, 67 | shuffle=True, sampler=sampler, 68 | num_workers=int(opt.workers), 69 | collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) 70 | test_dataset = dataset.lmdbDataset( 71 | root=opt.valroot, transform=dataset.resizeNormalize((100, 32))) 72 | 73 | nclass = len(opt.alphabet) + 1 74 | nc = 1 75 | 76 | converter = utils.strLabelConverter(opt.alphabet) 77 | criterion = CTCLoss() 78 | 79 | 80 | # custom weights initialization called on crnn 81 | def weights_init(m): 82 | classname = m.__class__.__name__ 83 | if classname.find('Conv') != -1: 84 | m.weight.data.normal_(0.0, 0.02) 85 | elif classname.find('BatchNorm') != -1: 86 | m.weight.data.normal_(1.0, 0.02) 87 | m.bias.data.fill_(0) 88 | 89 | 90 | crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) 91 | crnn.apply(weights_init) 92 | if opt.crnn != '': 93 | print('loading pretrained model from %s' % opt.crnn) 94 | crnn.load_state_dict(torch.load(opt.crnn)) 95 | print(crnn) 96 | 97 | image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) 98 | text = torch.IntTensor(opt.batchSize * 5) 99 | length = torch.IntTensor(opt.batchSize) 100 | 101 | if opt.cuda: 102 | crnn.cuda() 103 | crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) 104 | image = image.cuda() 105 | criterion = criterion.cuda() 106 | 107 | image = Variable(image) 108 | text = Variable(text) 109 | length = Variable(length) 110 | 111 | # loss averager 112 | loss_avg = utils.averager() 113 | 114 | # setup optimizer 115 | if opt.adam: 116 | optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, 117 | betas=(opt.beta1, 0.999)) 118 | elif opt.adadelta: 119 | optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) 120 | else: 121 | optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) 122 | 123 | 124 | def val(net, dataset, criterion, max_iter=100): 125 | print('Start val') 126 | 127 | for p in crnn.parameters(): 128 | p.requires_grad = False 129 | 130 | net.eval() 131 | data_loader = torch.utils.data.DataLoader( 132 | dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) 133 | val_iter = iter(data_loader) 134 | 135 | i = 0 136 | n_correct = 0 137 | loss_avg = utils.averager() 138 | 139 | max_iter = min(max_iter, len(data_loader)) 140 | for i in range(max_iter): 141 | data = val_iter.next() 142 | i += 1 143 | cpu_images, cpu_texts = data 144 | batch_size = cpu_images.size(0) 145 | utils.loadData(image, cpu_images) 146 | t, l = converter.encode(cpu_texts) 147 | utils.loadData(text, t) 148 | utils.loadData(length, l) 149 | 150 | preds = crnn(image) 151 | preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) 152 | cost = criterion(preds, text, preds_size, length) / batch_size 153 | loss_avg.add(cost) 154 | 155 | _, preds = preds.max(2) 156 | preds = preds.squeeze(2) 157 | preds = preds.transpose(1, 0).contiguous().view(-1) 158 | sim_preds = converter.decode(preds.data, preds_size.data, raw=False) 159 | for pred, target in zip(sim_preds, cpu_texts): 160 | if pred == target.lower(): 161 | n_correct += 1 162 | 163 | raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] 164 | for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): 165 | print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) 166 | 167 | accuracy = n_correct / float(max_iter * opt.batchSize) 168 | print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) 169 | 170 | 171 | def trainBatch(net, criterion, optimizer): 172 | data = train_iter.next() 173 | cpu_images, cpu_texts = data 174 | batch_size = cpu_images.size(0) 175 | utils.loadData(image, cpu_images) 176 | t, l = converter.encode(cpu_texts) 177 | utils.loadData(text, t) 178 | utils.loadData(length, l) 179 | 180 | preds = crnn(image) 181 | preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) 182 | cost = criterion(preds, text, preds_size, length) / batch_size 183 | crnn.zero_grad() 184 | cost.backward() 185 | optimizer.step() 186 | return cost 187 | 188 | 189 | for epoch in range(opt.niter): 190 | train_iter = iter(train_loader) 191 | i = 0 192 | while i < len(train_loader): 193 | for p in crnn.parameters(): 194 | p.requires_grad = True 195 | crnn.train() 196 | 197 | cost = trainBatch(crnn, criterion, optimizer) 198 | loss_avg.add(cost) 199 | i += 1 200 | 201 | if i % opt.displayInterval == 0: 202 | print('[%d/%d][%d/%d] Loss: %f' % 203 | (epoch, opt.niter, i, len(train_loader), loss_avg.val())) 204 | loss_avg.reset() 205 | 206 | if i % opt.valInterval == 0: 207 | val(crnn, test_dataset, criterion) 208 | 209 | # do checkpointing 210 | if i % opt.saveInterval == 0: 211 | torch.save( 212 | crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i)) 213 | -------------------------------------------------------------------------------- /models/CTPN/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | 3 | input: 'data' 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 600 8 | dim: 900 9 | } 10 | 11 | input: 'im_info' 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | layer { 18 | name: "conv1_1" 19 | type: "Convolution" 20 | bottom: "data" 21 | top: "conv1_1" 22 | param { 23 | lr_mult: 0 24 | decay_mult: 0 25 | } 26 | param { 27 | lr_mult: 0 28 | decay_mult: 0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | name: "relu1_1" 38 | type: "ReLU" 39 | bottom: "conv1_1" 40 | top: "conv1_1" 41 | } 42 | layer { 43 | name: "conv1_2" 44 | type: "Convolution" 45 | bottom: "conv1_1" 46 | top: "conv1_2" 47 | param { 48 | lr_mult: 0 49 | decay_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | pad: 1 58 | kernel_size: 3 59 | } 60 | } 61 | layer { 62 | name: "relu1_2" 63 | type: "ReLU" 64 | bottom: "conv1_2" 65 | top: "conv1_2" 66 | } 67 | layer { 68 | name: "pool1" 69 | type: "Pooling" 70 | bottom: "conv1_2" 71 | top: "pool1" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv2_1" 80 | type: "Convolution" 81 | bottom: "pool1" 82 | top: "conv2_1" 83 | param { 84 | lr_mult: 0 85 | decay_mult: 0 86 | } 87 | param { 88 | lr_mult: 0 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 128 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layer { 98 | name: "relu2_1" 99 | type: "ReLU" 100 | bottom: "conv2_1" 101 | top: "conv2_1" 102 | } 103 | layer { 104 | name: "conv2_2" 105 | type: "Convolution" 106 | bottom: "conv2_1" 107 | top: "conv2_2" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | } 150 | convolution_param { 151 | num_output: 256 152 | pad: 1 153 | kernel_size: 3 154 | } 155 | } 156 | layer { 157 | name: "relu3_1" 158 | type: "ReLU" 159 | bottom: "conv3_1" 160 | top: "conv3_1" 161 | } 162 | layer { 163 | name: "conv3_2" 164 | type: "Convolution" 165 | bottom: "conv3_1" 166 | top: "conv3_2" 167 | param { 168 | lr_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | } 173 | convolution_param { 174 | num_output: 256 175 | pad: 1 176 | kernel_size: 3 177 | } 178 | } 179 | layer { 180 | name: "relu3_2" 181 | type: "ReLU" 182 | bottom: "conv3_2" 183 | top: "conv3_2" 184 | } 185 | layer { 186 | name: "conv3_3" 187 | type: "Convolution" 188 | bottom: "conv3_2" 189 | top: "conv3_3" 190 | param { 191 | lr_mult: 1 192 | } 193 | param { 194 | lr_mult: 2 195 | } 196 | convolution_param { 197 | num_output: 256 198 | pad: 1 199 | kernel_size: 3 200 | } 201 | } 202 | layer { 203 | name: "relu3_3" 204 | type: "ReLU" 205 | bottom: "conv3_3" 206 | top: "conv3_3" 207 | } 208 | layer { 209 | name: "pool3" 210 | type: "Pooling" 211 | bottom: "conv3_3" 212 | top: "pool3" 213 | pooling_param { 214 | pool: MAX 215 | kernel_size: 2 216 | stride: 2 217 | } 218 | } 219 | layer { 220 | name: "conv4_1" 221 | type: "Convolution" 222 | bottom: "pool3" 223 | top: "conv4_1" 224 | param { 225 | lr_mult: 1 226 | } 227 | param { 228 | lr_mult: 2 229 | } 230 | convolution_param { 231 | num_output: 512 232 | pad: 1 233 | kernel_size: 3 234 | } 235 | } 236 | layer { 237 | name: "relu4_1" 238 | type: "ReLU" 239 | bottom: "conv4_1" 240 | top: "conv4_1" 241 | } 242 | layer { 243 | name: "conv4_2" 244 | type: "Convolution" 245 | bottom: "conv4_1" 246 | top: "conv4_2" 247 | param { 248 | lr_mult: 1 249 | } 250 | param { 251 | lr_mult: 2 252 | } 253 | convolution_param { 254 | num_output: 512 255 | pad: 1 256 | kernel_size: 3 257 | } 258 | } 259 | layer { 260 | name: "relu4_2" 261 | type: "ReLU" 262 | bottom: "conv4_2" 263 | top: "conv4_2" 264 | } 265 | layer { 266 | name: "conv4_3" 267 | type: "Convolution" 268 | bottom: "conv4_2" 269 | top: "conv4_3" 270 | param { 271 | lr_mult: 1 272 | } 273 | param { 274 | lr_mult: 2 275 | } 276 | convolution_param { 277 | num_output: 512 278 | pad: 1 279 | kernel_size: 3 280 | } 281 | } 282 | layer { 283 | name: "relu4_3" 284 | type: "ReLU" 285 | bottom: "conv4_3" 286 | top: "conv4_3" 287 | } 288 | layer { 289 | name: "pool4" 290 | type: "Pooling" 291 | bottom: "conv4_3" 292 | top: "pool4" 293 | pooling_param { 294 | pool: MAX 295 | kernel_size: 2 296 | stride: 2 297 | } 298 | } 299 | layer { 300 | name: "conv5_1" 301 | type: "Convolution" 302 | bottom: "pool4" 303 | top: "conv5_1" 304 | param { 305 | lr_mult: 1 306 | } 307 | param { 308 | lr_mult: 2 309 | } 310 | convolution_param { 311 | num_output: 512 312 | pad: 1 313 | kernel_size: 3 314 | } 315 | } 316 | layer { 317 | name: "relu5_1" 318 | type: "ReLU" 319 | bottom: "conv5_1" 320 | top: "conv5_1" 321 | } 322 | layer { 323 | name: "conv5_2" 324 | type: "Convolution" 325 | bottom: "conv5_1" 326 | top: "conv5_2" 327 | param { 328 | lr_mult: 1 329 | } 330 | param { 331 | lr_mult: 2 332 | } 333 | convolution_param { 334 | num_output: 512 335 | pad: 1 336 | kernel_size: 3 337 | } 338 | } 339 | layer { 340 | name: "relu5_2" 341 | type: "ReLU" 342 | bottom: "conv5_2" 343 | top: "conv5_2" 344 | } 345 | layer { 346 | name: "conv5_3" 347 | type: "Convolution" 348 | bottom: "conv5_2" 349 | top: "conv5_3" 350 | param { 351 | lr_mult: 1 352 | } 353 | param { 354 | lr_mult: 2 355 | } 356 | convolution_param { 357 | num_output: 512 358 | pad: 1 359 | kernel_size: 3 360 | } 361 | } 362 | layer { 363 | name: "relu5_3" 364 | type: "ReLU" 365 | bottom: "conv5_3" 366 | top: "conv5_3" 367 | } 368 | 369 | #========= RPN ============ 370 | 371 | # prepare lstm inputs 372 | layer { 373 | name: "im2col" 374 | bottom: "conv5_3" 375 | top: "im2col" 376 | type: "Im2col" 377 | convolution_param { 378 | pad: 1 379 | kernel_size: 3 380 | } 381 | } 382 | layer { 383 | name: "im2col_transpose" 384 | top: "im2col_transpose" 385 | bottom: "im2col" 386 | type: "Transpose" 387 | transpose_param { 388 | dim: 3 389 | dim: 2 390 | dim: 0 391 | dim: 1 392 | } 393 | } 394 | layer { 395 | name: "lstm_input" 396 | type: "Reshape" 397 | bottom: "im2col_transpose" 398 | top: "lstm_input" 399 | reshape_param { 400 | shape { dim: -1 } 401 | axis: 1 402 | num_axes: 2 403 | } 404 | } 405 | 406 | layer { 407 | name: "lstm" 408 | type: "Lstm" 409 | bottom: "lstm_input" 410 | top: "lstm" 411 | lstm_param { 412 | num_output: 128 413 | weight_filler { 414 | type: "gaussian" 415 | std: 0.01 416 | } 417 | bias_filler { 418 | type: "constant" 419 | } 420 | clipping_threshold: 1 421 | } 422 | } 423 | 424 | 425 | # ===================== rlstm =================== 426 | layer { 427 | name: "lstm-reverse1" 428 | type: "Reverse" 429 | bottom: "lstm_input" 430 | top: "rlstm_input" 431 | reverse_param { 432 | axis: 0 433 | } 434 | } 435 | layer { 436 | name: "rlstm" 437 | type: "Lstm" 438 | bottom: "rlstm_input" 439 | top: "rlstm-output" 440 | lstm_param { 441 | num_output: 128 442 | } 443 | } 444 | layer { 445 | name: "lstm-reverse2" 446 | type: "Reverse" 447 | bottom: "rlstm-output" 448 | top: "rlstm" 449 | reverse_param { 450 | axis: 0 451 | } 452 | } 453 | 454 | 455 | # merge lstm and rlstm 456 | layer { 457 | name: "merge_lstm_rlstm" 458 | type: "Concat" 459 | bottom: "lstm" 460 | bottom: "rlstm" 461 | top: "merge_lstm_rlstm" 462 | concat_param { 463 | axis: 2 464 | } 465 | } 466 | layer { 467 | name: "lstm_output_reshape" 468 | type: "Reshape" 469 | bottom: "merge_lstm_rlstm" 470 | top: "lstm_output_reshape" 471 | reshape_param { 472 | shape { dim: -1 dim: 1 } 473 | axis: 1 474 | num_axes: 1 475 | } 476 | } 477 | # transpose size of output as (N, C, H, W) 478 | layer { 479 | name: "lstm_output" 480 | type: "Transpose" 481 | bottom: "lstm_output_reshape" 482 | top: "lstm_output" 483 | transpose_param { 484 | dim: 2 485 | dim: 3 486 | dim: 1 487 | dim: 0 488 | } 489 | } 490 | layer { 491 | name: "fc" 492 | bottom: "lstm_output" 493 | top: "fc" 494 | type: "Convolution" 495 | convolution_param { 496 | num_output: 512 497 | kernel_size: 1 498 | } 499 | } 500 | layer { 501 | name: "relu_fc" 502 | type: "ReLU" 503 | bottom: "fc" 504 | top: "fc" 505 | } 506 | layer { 507 | name: "rpn_cls_score" 508 | type: "Convolution" 509 | bottom: "fc" 510 | top: "rpn_cls_score" 511 | param { lr_mult: 1.0 } 512 | param { lr_mult: 2.0 } 513 | convolution_param { 514 | num_output: 20 515 | kernel_size: 1 pad: 0 stride: 1 516 | } 517 | } 518 | layer { 519 | bottom: "rpn_cls_score" 520 | top: "rpn_cls_score_reshape" 521 | name: "rpn_cls_score_reshape" 522 | type: "Reshape" 523 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 524 | } 525 | layer { 526 | name: "rpn_bbox_pred" 527 | type: "Convolution" 528 | bottom: "fc" 529 | top: "rpn_bbox_pred" 530 | param { lr_mult: 1.0 } 531 | param { lr_mult: 2.0 } 532 | convolution_param { 533 | num_output: 20 534 | kernel_size: 1 pad: 0 stride: 1 535 | } 536 | } 537 | layer { 538 | name: "rpn_cls_prob" 539 | type: "Softmax" 540 | bottom: "rpn_cls_score_reshape" 541 | top: "rpn_cls_prob" 542 | } 543 | 544 | layer { 545 | name: 'rpn_cls_prob_reshape' 546 | type: 'Reshape' 547 | bottom: 'rpn_cls_prob' 548 | top: 'rpn_cls_prob_reshape' 549 | reshape_param { shape { dim: 0 dim: 20 dim: -1 dim: 0 } } 550 | } 551 | 552 | layer { 553 | name: 'proposal' 554 | type: 'Python' 555 | bottom: 'rpn_cls_prob_reshape' 556 | bottom: 'rpn_bbox_pred' 557 | bottom: 'im_info' 558 | top: 'rois' 559 | top: 'scores' 560 | python_param { 561 | module: 'layers.text_proposal_layer' 562 | layer: 'ProposalLayer' 563 | param_str: "'feat_stride': 16" 564 | } 565 | } 566 | --------------------------------------------------------------------------------