├── crnn.pytorch
    ├── models
    │   ├── __init__.py
    │   └── crnn.py
    ├── data
    │   └── demo.png
    ├── LICENSE.md
    ├── demo.py
    ├── README.md
    ├── tool
    │   ├── convert_t7.lua
    │   └── convert_t7.py
    ├── test
    │   └── test_utils.py
    ├── dataset.py
    ├── crnn_utils.py
    ├── keys.py
    └── crnn_main.py
├── CTPN
    ├── src
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── timer.py
    │   │   └── cpu_nms.pyx
    │   ├── layers
    │   │   ├── __init__.py
    │   │   └── text_proposal_layer.py
    │   ├── text_proposal_connector.py
    │   ├── detectors.py
    │   ├── anchor.py
    │   ├── other.py
    │   └── text_proposal_graph_builder.py
    ├── demo_images
    │   ├── img_1.jpg
    │   ├── img_2.jpg
    │   └── img_3.jpg
    ├── Makefile
    ├── tools
    │   ├── cfg.py
    │   └── demo.py
    ├── LICENSE
    ├── README.md
    └── Dockerfile
├── IMG_1556.png
├── .gitignore
├── README.md
├── demo.py
├── crnnport.py
├── ctpnport.py
└── models
    └── CTPN
        └── deploy.prototxt


/crnn.pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CTPN/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'zhitian'
2 | 


--------------------------------------------------------------------------------
/CTPN/src/layers/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tianzhi'
2 | 


--------------------------------------------------------------------------------
/IMG_1556.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/IMG_1556.png


--------------------------------------------------------------------------------
/CTPN/demo_images/img_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_1.jpg


--------------------------------------------------------------------------------
/CTPN/demo_images/img_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_2.jpg


--------------------------------------------------------------------------------
/CTPN/demo_images/img_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/CTPN/demo_images/img_3.jpg


--------------------------------------------------------------------------------
/crnn.pytorch/data/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/makefile/scene-text-recog/HEAD/crnn.pytorch/data/demo.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.pth
 3 | *.pyc
 4 | *.pyo
 5 | *.log
 6 | *.tmp
 7 | *.so
 8 | 
 9 | # specific file
10 | out.jpg
11 | *.caffemodel
12 | 
13 | 


--------------------------------------------------------------------------------
/CTPN/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cython src/utils/cpu_nms.pyx
3 | 	gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing \
4 | 		-I/usr/include/python2.7 -o src/utils/cpu_nms.so src/utils/cpu_nms.c
5 | 	rm -rf src/utils/cpu_nms.c
6 | 


--------------------------------------------------------------------------------
/CTPN/tools/cfg.py:
--------------------------------------------------------------------------------
 1 | # MUST be imported firstly
 2 | import sys
 3 | import numpy as np
 4 | 
 5 | class Config:
 6 |     MEAN=np.float32([102.9801, 115.9465, 122.7717])
 7 |     TEST_GPU_ID=0
 8 |     SCALE=600
 9 |     MAX_SCALE=1000
10 | 
11 |     LINE_MIN_SCORE=0.7
12 |     TEXT_PROPOSALS_MIN_SCORE=0.7
13 |     TEXT_PROPOSALS_NMS_THRESH=0.3
14 |     MAX_HORIZONTAL_GAP=50
15 |     TEXT_LINE_NMS_THRESH=0.3
16 |     MIN_NUM_PROPOSALS=2
17 |     MIN_RATIO=1.2
18 |     MIN_V_OVERLAPS=0.7
19 |     MIN_SIZE_SIM=0.7
20 |     TEXT_PROPOSALS_WIDTH=16
21 | 
22 | def init():
23 |     sys.path.insert(0, "./tools")
24 |     sys.path.insert(0, "./caffe/python")
25 |     sys.path.insert(0, "./src")
26 | init()
27 | 


--------------------------------------------------------------------------------
/CTPN/src/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/crnn.pytorch/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Jieru Mei <meijieru@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/crnn.pytorch/demo.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import utils
 4 | import dataset
 5 | from PIL import Image
 6 | 
 7 | import models.crnn as crnn
 8 | 
 9 | 
10 | model_path = './data/crnn.pth'
11 | img_path = './data/demo.png'
12 | alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
13 | 
14 | model = crnn.CRNN(32, 1, 37, 256)
15 | if torch.cuda.is_available():
16 |     model = model.cuda()
17 | print('loading pretrained model from %s' % model_path)
18 | model.load_state_dict(torch.load(model_path))
19 | 
20 | converter = utils.strLabelConverter(alphabet)
21 | 
22 | transformer = dataset.resizeNormalize((100, 32))
23 | image = Image.open(img_path).convert('L')
24 | image = transformer(image)
25 | if torch.cuda.is_available():
26 |     image = image.cuda()
27 | image = image.view(1, *image.size())
28 | image = Variable(image)
29 | 
30 | model.eval()
31 | preds = model(image)
32 | 
33 | _, preds = preds.max(2)
34 | preds = preds.transpose(1, 0).contiguous().view(-1)
35 | 
36 | preds_size = Variable(torch.IntTensor([preds.size(0)]))
37 | raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
38 | sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
39 | print('%-20s => %-20s' % (raw_pred, sim_pred))
40 | 


--------------------------------------------------------------------------------
/CTPN/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | ARE THIRD PARTY CODES ARE LICENSED TO YOU UNDER THEIR ORIGINAL LICENSE TERMS.
24 | 


--------------------------------------------------------------------------------
/crnn.pytorch/README.md:
--------------------------------------------------------------------------------
 1 | Convolutional Recurrent Neural Network
 2 | ======================================
 3 | 
 4 | This software implements the Convolutional Recurrent Neural Network (CRNN) in pytorch.
 5 | Origin software could be found in [crnn](https://github.com/bgshih/crnn)
 6 | 
 7 | Run demo
 8 | --------
 9 | A demo program can be found in ``src/demo.py``. Before running the demo, download a pretrained model
10 | from [Baidu Netdisk](https://pan.baidu.com/s/1pLbeCND) or [Dropbox](https://www.dropbox.com/s/dboqjk20qjkpta3/crnn.pth?dl=0). 
11 | This pretrained model is converted from auther offered one by ``tool``.
12 | Put the downloaded model file ``crnn.pth`` into directory ``data/``. Then launch the demo by:
13 | 
14 |     python demo.py
15 | 
16 | The demo reads an example image and recognizes its text content.
17 | 
18 | Example image:
19 | ![Example Image](./data/demo.png)
20 | 
21 | Expected output:
22 |     loading pretrained model from ./data/crnn.pth
23 |     a-----v--a-i-l-a-bb-l-ee-- => available
24 | 
25 | Dependence
26 | ----------
27 | * [warp_ctc_pytorch](https://github.com/SeanNaren/warp-ctc/tree/pytorch_bindings/pytorch_binding)
28 | * lmdb
29 | 
30 | Train a new model
31 | -----------------
32 | 1. Construct dataset following origin guide. For training with variable length, please sort the image according to the text length.
33 | 2. ``python crnn_main.py [--param val]``. Explore ``crnn_main.py`` for details.
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |  scene-text-recog
 2 | =================================== 
 3 | Scene character recognition combining of CTPN + CRNN.
 4 | 
 5 | code based on [bear63/sceneReco commit-ae1731e](https://github.com/bear63/sceneReco/commit/ae1731e4a344359e1ff5a147ec8d3834afb16ba1)
 6 | 
 7 | # CTPN:
 8 | 
 9 |    Caffe model in [frcnn](https://github.com/makefile/frcnn/tree/fpn/src/caffe/CTPN), adapt from [tianzhi0549/CTPN](https://github.com/tianzhi0549/CTPN) of old Caffe.
10 | 	
11 | # CRNN:
12 | 
13 |    Thsi repo uses Convolutional recurrent network in PyTorch: [meijieru/crnn.pytorch](https://github.com/meijieru/crnn.pytorch), while the original implementation [bgshih/crnn](https://github.com/bgshih/crnn) uses Torch.
14 | 
15 | # trained models:
16 | 
17 |   1. pure English set `alphabet` to English in `crnn.pytorch/keys.py`.
18 |   CPU model [crnn.pth](https://www.dropbox.com/s/dboqjk20qjkpta3/crnn.pth?dl=0)
19 |   2. Chinese with English recognition : [netCRNN63.pth](https://drive.google.com/open?id=1R1tvM_HVo5eJLqnTDpxFgMANPRk4_QHB) for GPU, [netCRNNcpu.pth](https://drive.google.com/open?id=1p8yWQ3j3hHiRA9pBYmge542Y1xetcg1x) for CPU.
20 |   
21 |   copy [ctpn_trained_model.caffemodel](https://drive.google.com/open?id=0B7c5Ix-XO7hqQWtKQ0lxTko4ZGs) to ./models/CTPN
22 |   
23 |   copy CRNN model to ./models, CPU model can be used for both CPU/GPU.
24 |   
25 |   CTPN models are tested on Caffe 1.0 CUDA8.0, CRNN models are tested on PyTorch '0.4.0' and '0.3.1'.
26 | 
27 | # Run demo
28 | 
29 |   python demo.py
30 |   
31 | ![Example Image](./IMG_1556.png)
32 | 


--------------------------------------------------------------------------------
/CTPN/src/layers/text_proposal_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import yaml, caffe
 3 | from other import clip_boxes
 4 | from anchor import AnchorText
 5 | 
 6 | 
 7 | class ProposalLayer(caffe.Layer):
 8 |     def setup(self, bottom, top):
 9 |         # parse the layer parameter string, which must be valid YAML
10 |         layer_params = yaml.load(self.param_str)
11 | 
12 |         self._feat_stride = layer_params['feat_stride']
13 |         self.anchor_generator=AnchorText()
14 |         self._num_anchors = self.anchor_generator.anchor_num
15 | 
16 |         top[0].reshape(1, 4)
17 |         top[1].reshape(1, 1, 1, 1)
18 | 
19 |     def forward(self, bottom, top):
20 |         assert bottom[0].data.shape[0]==1, \
21 |             'Only single item batches are supported'
22 | 
23 |         scores = bottom[0].data[:, self._num_anchors:, :, :]
24 | 
25 |         bbox_deltas = bottom[1].data
26 |         im_info = bottom[2].data[0, :]
27 |         height, width = scores.shape[-2:]
28 | 
29 |         anchors=self.anchor_generator.locate_anchors((height, width), self._feat_stride)
30 | 
31 |         scores=scores.transpose((0, 2, 3, 1)).reshape(-1, 1)
32 |         bbox_deltas=bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 2))
33 | 
34 |         proposals=self.anchor_generator.apply_deltas_to_anchors(bbox_deltas, anchors)
35 | 
36 |         # clip the proposals in excess of the boundaries of the image
37 |         proposals=clip_boxes(proposals, im_info[:2])
38 | 
39 |         blob=proposals.astype(np.float32, copy=False)
40 |         top[0].reshape(*(blob.shape))
41 |         top[0].data[...]=blob
42 | 
43 |         top[1].reshape(*(scores.shape))
44 |         top[1].data[...]=scores
45 | 
46 |     def backward(self, top, propagate_down, bottom):
47 |         pass
48 | 
49 |     def reshape(self, bottom, top):
50 |         pass
51 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | import sys
 3 | caffe_path = '/home/s02/fyk/frcnn'
 4 | sys.path.insert(0, "%s/python"%caffe_path)
 5 | import caffe # import first to avoid problems like "CUDNN_STATUS_BAD_PARAM"
 6 | sys.path.insert(0, "./CTPN/tools")
 7 | sys.path.insert(1, "./CTPN/src")
 8 | sys.path.append("./crnn.pytorch")
 9 | 
10 | from ctpnport import CTPNDetector
11 | from crnnport import CRNNRecognizer
12 | import time
13 | import cv2
14 | 
15 | use_gpu = False
16 | #use_gpu = True
17 | base_dir = './models/'
18 | demo_dir = '/home/s02/hgf/text-recog/sceneReco/test/'
19 | gpu_id = -1
20 | if use_gpu: gpu_id = 0
21 | #    model_path = base_dir + 'netCRNN63.pth'
22 | #else:
23 | # CPU model can be used for both CPU/GPU
24 | model_path = base_dir + 'netCRNNcpu.pth'
25 | #model_path = base_dir + 'crnn.pth'
26 | # another one is crnn.pth
27 | 
28 | NET_DEF_FILE = base_dir + "CTPN/deploy.prototxt"
29 | MODEL_FILE = base_dir + "CTPN/ctpn_trained_model.caffemodel"
30 | 
31 | #ctpn
32 | ctpn_detector = CTPNDetector(NET_DEF_FILE, MODEL_FILE, caffe_path)
33 | #crnn
34 | crnn_recog = CRNNRecognizer(model_path)
35 | 
36 | #timer=Timer()
37 | print "\ninput exit break\n"
38 | while 1 :
39 |     #im_name = raw_input("\nplease input file name:")
40 |     im_name = 'image_8.jpg'
41 |     if im_name == "exit":
42 |        break
43 |     im_path = demo_dir + im_name
44 |     im = cv2.imread(im_path)
45 |     if im is None:
46 |       continue
47 |     #timer.tic()
48 |     start = time.time()
49 |     text_lines, resize_im, resize_ratio = ctpn_detector.getCharBlock(im, gpu_id)
50 |     print 'boxes:',len(text_lines)
51 |     text_recs = ctpn_detector.convert_bbox(text_lines)
52 |     print text_recs
53 |     texts = crnn_recog.crnnRec(resize_im,text_recs, use_gpu)
54 |     print texts
55 |     end = time.time()
56 |     #print "Time: %f"%timer.toc()
57 |     print "Time ms: %f"%(end - start)
58 |     box_im, text_recs = ctpn_detector.draw_boxes8(resize_im,text_lines, is_display=False)
59 |     cv2.imwrite("out.jpg", box_im)
60 |     break
61 |     #cv2.waitKey(0)
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/CTPN/tools/demo.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # The codes are used for implementing CTPN for scene text detection, described in: 
 3 | #
 4 | # Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with
 5 | # Connectionist Text Proposal Network, ECCV, 2016.
 6 | #
 7 | # Online demo is available at: textdet.com
 8 | # 
 9 | # These demo codes (with our trained model) are for text-line detection (without 
10 | # side-refiement part).  
11 | #
12 | #
13 | # ====== Copyright by Zhi Tian, Weilin Huang, Tong He, Pan He and Yu Qiao==========
14 | 
15 | #            Email: zhi.tian@siat.ac.cn; wl.huang@siat.ac.cn
16 | # 
17 | #   Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences
18 | #
19 | #
20 | import cv2, os, caffe, sys
21 | from cfg import Config as cfg
22 | from other import draw_boxes, resize_im, CaffeModel
23 | 
24 | from detectors import TextProposalDetector, TextDetector
25 | import os.path as osp
26 | from utils.timer import Timer
27 | 
28 | DEMO_IMAGE_DIR="demo_images/"
29 | NET_DEF_FILE="models/deploy.prototxt"
30 | MODEL_FILE="models/ctpn_trained_model.caffemodel"
31 | 
32 | if len(sys.argv)>1 and sys.argv[1]=="--no-gpu":
33 |     caffe.set_mode_cpu()
34 | else:
35 |     caffe.set_mode_gpu()
36 |     caffe.set_device(cfg.TEST_GPU_ID)
37 | 
38 | # initialize the detectors
39 | text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
40 | text_detector=TextDetector(text_proposals_detector)
41 | 
42 | demo_imnames=os.listdir(DEMO_IMAGE_DIR)
43 | timer=Timer()
44 | 
45 | for im_name in demo_imnames:
46 |     print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
47 |     print "Image: %s"%im_name
48 | 
49 |     im_file=osp.join(DEMO_IMAGE_DIR, im_name)
50 |     im=cv2.imread(im_file)
51 | 
52 |     timer.tic()
53 | 
54 |     im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
55 |     text_lines=text_detector.detect(im)
56 | 
57 |     print "Number of the detected text lines: %s"%len(text_lines)
58 |     print "Time: %f"%timer.toc()
59 | 
60 |     im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, wait=False)
61 | 
62 | print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
63 | print "Thank you for trying our demo. Press any key to exit..."
64 | cv2.waitKey(0)
65 | 
66 | 


--------------------------------------------------------------------------------
/CTPN/src/utils/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/CTPN/src/text_proposal_connector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from other import clip_boxes
 3 | from text_proposal_graph_builder import TextProposalGraphBuilder
 4 | 
 5 | class TextProposalConnector:
 6 |     """
 7 |         Connect text proposals into text lines
 8 |     """
 9 |     def __init__(self):
10 |         self.graph_builder=TextProposalGraphBuilder()
11 | 
12 |     def group_text_proposals(self, text_proposals, scores, im_size):
13 |         graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
14 |         return graph.sub_graphs_connected()
15 | 
16 |     def fit_y(self, X, Y, x1, x2):
17 |         len(X)!=0
18 |         # if X only include one point, the function will get line y=Y[0]
19 |         if np.sum(X==X[0])==len(X):
20 |             return Y[0], Y[0]
21 |         p=np.poly1d(np.polyfit(X, Y, 1))
22 |         return p(x1), p(x2)
23 | 
24 |     def get_text_lines(self, text_proposals, scores, im_size):
25 |         # tp=text proposal
26 |         tp_groups=self.group_text_proposals(text_proposals, scores, im_size)
27 |         text_lines=np.zeros((len(tp_groups), 8), np.float32)
28 | 
29 |         for index, tp_indices in enumerate(tp_groups):
30 |             text_line_boxes=text_proposals[list(tp_indices)]
31 |             num = np.size(text_line_boxes)
32 |             X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2
33 |             Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
34 |             z1 = np.polyfit(X,Y,1)
35 |             p1 = np.poly1d(z1)
36 | 
37 | 
38 |             x0=np.min(text_line_boxes[:, 0])
39 |             x1=np.max(text_line_boxes[:, 2])
40 | 
41 |             offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5
42 | 
43 |             lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
44 |             lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
45 | 
46 |             # the score of a text line is the average score of the scores
47 |             # of all text proposals contained in the text line
48 |             score=scores[list(tp_indices)].sum()/float(len(tp_indices))
49 | 
50 |             text_lines[index, 0]=x0
51 |             text_lines[index, 1]=min(lt_y, rt_y)
52 |             text_lines[index, 2]=x1
53 |             text_lines[index, 3]=max(lb_y, rb_y)
54 |             text_lines[index, 4]=score
55 |             text_lines[index, 5]=z1[0]
56 |             text_lines[index, 6]=z1[1]
57 |             height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )
58 |             text_lines[index, 7]= height + 2.5
59 |         #text_lines=clip_boxes(text_lines, im_size)
60 | 
61 | 
62 |         return text_lines
63 | 


--------------------------------------------------------------------------------
/CTPN/src/detectors.py:
--------------------------------------------------------------------------------
 1 | from cfg import Config as cfg
 2 | from other import prepare_img, normalize
 3 | import numpy as np
 4 | from utils.cpu_nms import cpu_nms as nms
 5 | from text_proposal_connector import TextProposalConnector
 6 | 
 7 | 
 8 | class TextProposalDetector:
 9 |     """
10 |         Detect text proposals in an image
11 |     """
12 |     def __init__(self, caffe_model):
13 |         self.caffe_model=caffe_model
14 | 
15 |     def detect(self, im, mean):
16 |         im_data=prepare_img(im, mean)
17 |         _=self.caffe_model.forward2({
18 |             "data": im_data[np.newaxis, :],
19 |             "im_info": np.array([[im_data.shape[1], im_data.shape[2]]], np.float32)
20 |         })
21 |         rois=self.caffe_model.blob("rois")
22 |         scores=self.caffe_model.blob("scores")
23 |         return rois, scores
24 | 
25 | 
26 | class TextDetector:
27 |     """
28 |         Detect text from an image
29 |     """
30 |     def __init__(self, text_proposal_detector):
31 |         self.text_proposal_detector=text_proposal_detector
32 |         self.text_proposal_connector=TextProposalConnector()
33 | 
34 |     def detect(self, im):
35 |         """
36 |         Detecting texts from an image
37 |         :return: the bounding boxes of the detected texts
38 |         """
39 |         text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN)
40 |         keep_inds=np.where(scores>cfg.TEXT_PROPOSALS_MIN_SCORE)[0]
41 |         text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]
42 | 
43 |         sorted_indices=np.argsort(scores.ravel())[::-1]
44 |         text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices]
45 | 
46 |         # nms for text proposals
47 |         keep_inds=nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH)
48 |         text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]
49 | 
50 |         scores=normalize(scores)
51 | 
52 |         text_lines=self.text_proposal_connector.get_text_lines(text_proposals, scores, im.shape[:2])
53 | 
54 |         keep_inds=self.filter_boxes(text_lines)
55 |         text_lines=text_lines[keep_inds]
56 | 
57 |         # nms for text lines
58 |         if text_lines.shape[0]!=0:
59 |             keep_inds=nms(text_lines, cfg.TEXT_LINE_NMS_THRESH)
60 |             text_lines=text_lines[keep_inds]
61 | 
62 |         return text_lines
63 | 
64 |     def filter_boxes(self, boxes):
65 |         heights=boxes[:, 3]-boxes[:, 1]+1
66 |         widths=boxes[:, 2]-boxes[:, 0]+1
67 |         scores=boxes[:, -1]
68 |         return np.where((widths/heights>cfg.MIN_RATIO) & (scores>cfg.LINE_MIN_SCORE) &
69 |                           (widths>(cfg.TEXT_PROPOSALS_WIDTH*cfg.MIN_NUM_PROPOSALS)))[0]
70 | 


--------------------------------------------------------------------------------
/CTPN/README.md:
--------------------------------------------------------------------------------
 1 | # Detecting Text in Natural Image with Connectionist Text Proposal Network
 2 | The codes are used for implementing CTPN for scene text detection, described in: 
 3 | 
 4 |     Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with
 5 |     Connectionist Text Proposal Network, ECCV, 2016.
 6 | 
 7 | Online demo is available at: [textdet.com](http://textdet.com)
 8 | 
 9 | These demo codes (with our trained model) are for text-line detection (without 
10 | side-refiement part).
11 | 
12 | # Required hardware
13 | You need a GPU. If you use CUDNN, about 1.5GB free memory is required. If you don't use CUDNN, you will need about 5GB free memory, and the testing time will slightly increase. Therefore, we strongly recommend to use CUDNN.
14 | 
15 | It's also possible to run the program on CPU only, but it's extremely slow due to the non-optimal CPU implementation.
16 | # Required softwares
17 | Python2.7, cython and all what Caffe depends on.
18 | 
19 | # How to run this code
20 | 
21 | 1. Clone this repository with `git clone https://github.com/tianzhi0549/CTPN.git`. It will checkout the codes of CTPN and Caffe we ship.
22 | 
23 | 2. Install the caffe we ship with codes bellow.
24 |     * Install caffe's dependencies. You can follow [this tutorial](http://caffe.berkeleyvision.org/installation.html). *Note: we need Python support. The CUDA version we need is 7.0.*
25 |     * Enter the directory `caffe`.
26 |     * Run `cp Makefile.config.example Makefile.config`.
27 |     * Open Makefile.config and set `WITH_PYTHON_LAYER := 1`. If you want to use CUDNN, please also set `CUDNN := 1`. Uncomment the `CPU_ONLY :=1` if you want to compile it without GPU.
28 | 
29 |       *Note: To use CUDNN, you need to download CUDNN from NVIDIA's official website, and install it in advance. The CUDNN version we use is 3.0.*
30 |     * Run `make -j && make pycaffe`.
31 | 
32 | 3. After Caffe is set up, you need to download a trained model (about 78M) from [Google Drive](https://drive.google.com/open?id=0B7c5Ix-XO7hqQWtKQ0lxTko4ZGs) or [our website](http://textdet.com/downloads/ctpn_trained_model.caffemodel), and then populate it into directory `models`. The model's name should be ` ctpn_trained_model.caffemodel`.
33 | 
34 | 4. Now, be sure you are in the root directory of the codes. Run `make` to compile some cython files.
35 | 
36 | 5. Run `python tools/demo.py` for a demo. Or `python tools/demo.py --no-gpu` to run it under CPU mode.
37 | 
38 | # How to use other Caffe
39 | If you may want to use other Caffe instead of the one we ship for some reasons, you need to migrate the following layers into the Caffe.
40 | * Reverse
41 | * Transpose
42 | * Lstm
43 | 
44 | # License
45 | The codes are released under the MIT License.
46 | 


--------------------------------------------------------------------------------
/CTPN/src/anchor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class AnchorText:
 5 |     def __init__(self):
 6 |         self.anchor_num=10
 7 | 
 8 |     def generate_basic_anchors(self, sizes, base_size=16):
 9 |         """
10 |         :param sizes: [(h1, w1), (h2, w2)...]
11 |         :param base_size
12 |         :return:
13 |         """
14 |         assert(self.anchor_num==len(sizes))
15 |         base_anchor=np.array([0, 0, base_size-1, base_size-1], np.int32)
16 |         anchors=np.zeros((len(sizes), 4), np.int32)
17 |         index=0
18 |         for h, w in sizes:
19 |             anchors[index]=self.scale_anchor(base_anchor, h, w)
20 |             index+=1
21 |         return anchors
22 | 
23 |     def scale_anchor(self, anchor, h, w):
24 |         x_ctr=(anchor[0]+anchor[2])*0.5
25 |         y_ctr=(anchor[1]+anchor[3])*0.5
26 |         scaled_anchor=anchor.copy()
27 |         scaled_anchor[0]=x_ctr-w/2
28 |         scaled_anchor[2]=x_ctr+w/2
29 |         scaled_anchor[1]=y_ctr-h/2
30 |         scaled_anchor[3]=y_ctr+h/2
31 |         return scaled_anchor
32 | 
33 |     def apply_deltas_to_anchors(self, boxes_delta, anchors):
34 |         """
35 |             :return [l t r b]
36 |         """
37 |         anchor_y_ctr=(anchors[:, 1]+anchors[:, 3])/2.
38 |         anchor_h=anchors[:, 3]-anchors[:, 1]+1.
39 |         global_coords=np.zeros_like(boxes_delta, np.float32)
40 |         global_coords[:, 1]=np.exp(boxes_delta[:, 1])*anchor_h
41 |         global_coords[:, 0]=boxes_delta[:, 0]*anchor_h+anchor_y_ctr-global_coords[:, 1]/2.
42 |         return np.hstack((anchors[:, [0]], global_coords[:, [0]], anchors[:, [2]],
43 |                           global_coords[:, [0]]+global_coords[:, [1]])).astype(np.float32)
44 | 
45 |     def basic_anchors(self):
46 |         """
47 |             anchor [l t r b]
48 |         """
49 |         heights=[11, 16, 23, 33, 48, 68, 97, 139, 198, 283]
50 |         widths=[16]
51 |         sizes=[]
52 |         for h in heights:
53 |             for w in widths:
54 |                 sizes.append((h, w))
55 |         return self.generate_basic_anchors(sizes)
56 | 
57 |     def locate_anchors(self, feat_map_size, feat_stride):
58 |         """
59 |             return all anchors on the feature map
60 |         """
61 |         basic_anchors_=self.basic_anchors()
62 |         anchors=np.zeros((basic_anchors_.shape[0]*feat_map_size[0]*feat_map_size[1], 4), np.int32)
63 |         index=0
64 |         for y_ in range(feat_map_size[0]):
65 |             for x_ in range(feat_map_size[1]):
66 |                 shift=np.array([x_, y_, x_, y_])*feat_stride
67 |                 anchors[index:index+basic_anchors_.shape[0], :]=basic_anchors_+shift
68 |                 index+=basic_anchors_.shape[0]
69 |         return anchors
70 | 


--------------------------------------------------------------------------------
/crnn.pytorch/models/crnn.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class BidirectionalLSTM(nn.Module):
 5 | 
 6 |     def __init__(self, nIn, nHidden, nOut):
 7 |         super(BidirectionalLSTM, self).__init__()
 8 | 
 9 |         self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
10 |         self.embedding = nn.Linear(nHidden * 2, nOut)
11 | 
12 |     def forward(self, input):
13 |         recurrent, _ = self.rnn(input)
14 |         T, b, h = recurrent.size()
15 |         t_rec = recurrent.view(T * b, h)
16 | 
17 |         output = self.embedding(t_rec)  # [T * b, nOut]
18 |         output = output.view(T, b, -1)
19 | 
20 |         return output
21 | 
22 | 
23 | class CRNN(nn.Module):
24 | 
25 |     def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
26 |         super(CRNN, self).__init__()
27 |         assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
28 | 
29 |         ks = [3, 3, 3, 3, 3, 3, 2]
30 |         ps = [1, 1, 1, 1, 1, 1, 0]
31 |         ss = [1, 1, 1, 1, 1, 1, 1]
32 |         nm = [64, 128, 256, 256, 512, 512, 512]
33 | 
34 |         cnn = nn.Sequential()
35 | 
36 |         def convRelu(i, batchNormalization=False):
37 |             nIn = nc if i == 0 else nm[i - 1]
38 |             nOut = nm[i]
39 |             cnn.add_module('conv{0}'.format(i),
40 |                            nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
41 |             if batchNormalization:
42 |                 cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
43 |             if leakyRelu:
44 |                 cnn.add_module('relu{0}'.format(i),
45 |                                nn.LeakyReLU(0.2, inplace=True))
46 |             else:
47 |                 cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
48 | 
49 |         convRelu(0)
50 |         cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
51 |         convRelu(1)
52 |         cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
53 |         convRelu(2, True)
54 |         convRelu(3)
55 |         cnn.add_module('pooling{0}'.format(2),
56 |                        nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
57 |         convRelu(4, True)
58 |         convRelu(5)
59 |         cnn.add_module('pooling{0}'.format(3),
60 |                        nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
61 |         convRelu(6, True)  # 512x1x16
62 | 
63 |         self.cnn = cnn
64 |         self.rnn = nn.Sequential(
65 |             BidirectionalLSTM(512, nh, nh),
66 |             BidirectionalLSTM(nh, nh, nclass))
67 | 
68 |     def forward(self, input):
69 |         # conv features
70 |         conv = self.cnn(input)
71 |         b, c, h, w = conv.size()
72 |         assert h == 1, "the height of conv must be 1"
73 |         conv = conv.squeeze(2)
74 |         conv = conv.permute(2, 0, 1)  # [w, b, c]
75 | 
76 |         # rnn features
77 |         output = self.rnn(conv)
78 | 
79 |         return output
80 | 


--------------------------------------------------------------------------------
/crnn.pytorch/tool/convert_t7.lua:
--------------------------------------------------------------------------------
 1 | require('table')
 2 | require('torch')
 3 | require('os')
 4 | 
 5 | function clone(t)
 6 |     -- deep-copy a table
 7 |     if type(t) ~= "table" then return t end
 8 |     local meta = getmetatable(t)
 9 |     local target = {}
10 |     for k, v in pairs(t) do
11 |         if type(v) == "table" then
12 |             target[k] = clone(v)
13 |         else
14 |             target[k] = v
15 |         end
16 |     end
17 |     setmetatable(target, meta)
18 |     return target
19 | end
20 | 
21 | 
22 | function tableMerge(lhs, rhs)
23 |     output = clone(lhs)
24 |     for _, v in pairs(rhs) do
25 |         table.insert(output, v)
26 |     end
27 |     return output
28 | end
29 | 
30 | 
31 | function isInTable(val, val_list)
32 |     for _, item in pairs(val_list) do
33 |         if val == item then
34 |             return true
35 |         end
36 |     end
37 |     return false
38 | end
39 | 
40 | 
41 | function modelToList(model)
42 |     local ignoreList = {
43 |         'nn.Copy',
44 |         'nn.AddConstant',
45 |         'nn.MulConstant',
46 |         'nn.View',
47 |         'nn.Transpose',
48 |         'nn.SplitTable',
49 |         'nn.SharedParallelTable',
50 |         'nn.JoinTable',
51 |     }
52 |     local state = {}
53 |     local param
54 |     for i, layer in pairs(model.modules) do
55 |         local typeName = torch.type(layer)
56 |         if not isInTable(typeName, ignoreList) then
57 |             if typeName == 'nn.Sequential' or typeName == 'nn.ConcatTable' then
58 |                 param = modelToList(layer)
59 |             elseif typeName == 'cudnn.SpatialConvolution' or typeName == 'nn.SpatialConvolution' then
60 |                 param = layer:parameters()
61 |             elseif typeName == 'cudnn.SpatialBatchNormalization' or typeName == 'nn.SpatialBatchNormalization' then
62 |                 param = layer:parameters()
63 |                 bn_vars = {layer.running_mean, layer.running_var}
64 |                 param = tableMerge(param, bn_vars)
65 |             elseif typeName == 'nn.LstmLayer' then
66 |                 param =  layer:parameters()
67 |             elseif typeName == 'nn.BiRnnJoin' then
68 |                 param =  layer:parameters()
69 |             elseif typeName == 'cudnn.SpatialMaxPooling' or typeName == 'nn.SpatialMaxPooling' then
70 |                 param = {}
71 |             elseif typeName == 'cudnn.ReLU' or typeName == 'nn.ReLU' then
72 |                 param = {}
73 |             else
74 |                 print(string.format('Unknown class %s', typeName))
75 |                 os.exit(0)
76 |             end
77 |             table.insert(state, {typeName, param})
78 |         else
79 |             print(string.format('pass %s', typeName))
80 |         end
81 |     end
82 |     return state
83 | end
84 | 
85 | 
86 | function saveModel(model, output_path)
87 |     local state =  modelToList(model)
88 |     torch.save(output_path, state)
89 | end
90 | 


--------------------------------------------------------------------------------
/CTPN/src/other.py:
--------------------------------------------------------------------------------
 1 | import cv2, caffe
 2 | import numpy as np
 3 | from matplotlib import cm
 4 | 
 5 | 
 6 | def prepare_img(im, mean):
 7 |     """
 8 |         transform img into caffe's input img.
 9 |     """
10 |     im_data=np.transpose(im-mean, (2, 0, 1))
11 |     return im_data
12 | 
13 | 
14 | def draw_boxes(im, bboxes, is_display=True, color=None, caption="Image", wait=True):
15 |     """
16 |         boxes: bounding boxes
17 |     """
18 |     im=im.copy()
19 |     for box in bboxes:
20 |         if color==None:
21 |             if len(box)==5 or len(box)==9:
22 |                 c=tuple(cm.jet([box[-1]])[0, 2::-1]*255)
23 |             else:
24 |                 c=tuple(np.random.randint(0, 256, 3))
25 |         else:
26 |             c=color
27 |         cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c)
28 |     if is_display:
29 |         cv2.imshow(caption, im)
30 |         if wait:
31 |             cv2.waitKey(0)
32 |     return im
33 | 
34 | 
35 | def threshold(coords, min_, max_):
36 |     return np.maximum(np.minimum(coords, max_), min_)
37 | 
38 | 
39 | def clip_boxes(boxes, im_shape):
40 |     """
41 |     Clip boxes to image boundaries.
42 |     """
43 |     boxes[:, 0::2]=threshold(boxes[:, 0::2], 0, im_shape[1]-1)
44 |     boxes[:, 1::2]=threshold(boxes[:, 1::2], 0, im_shape[0]-1)
45 |     return boxes
46 | 
47 | 
48 | def normalize(data):
49 |     if data.shape[0]==0:
50 |         return data
51 |     max_=data.max()
52 |     min_=data.min()
53 |     return (data-min_)/(max_-min_) if max_-min_!=0 else data-min_
54 | 
55 | 
56 | def resize_im(im, scale, max_scale=None):
57 |     f=float(scale)/min(im.shape[0], im.shape[1])
58 |     if max_scale!=None and f*max(im.shape[0], im.shape[1])>max_scale:
59 |         f=float(max_scale)/max(im.shape[0], im.shape[1])
60 |     return cv2.resize(im, (0, 0), fx=f, fy=f), f
61 | 
62 | 
63 | class Graph:
64 |     def __init__(self, graph):
65 |         self.graph=graph
66 | 
67 |     def sub_graphs_connected(self):
68 |         sub_graphs=[]
69 |         for index in xrange(self.graph.shape[0]):
70 |             if not self.graph[:, index].any() and self.graph[index, :].any():
71 |                 v=index
72 |                 sub_graphs.append([v])
73 |                 while self.graph[v, :].any():
74 |                     v=np.where(self.graph[v, :])[0][0]
75 |                     sub_graphs[-1].append(v)
76 |         return sub_graphs
77 | 
78 | 
79 | class CaffeModel:
80 |     def __init__(self, net_def_file, model_file):
81 |         self.net_def_file=net_def_file
82 |         self.net=caffe.Net(net_def_file, model_file, caffe.TEST)
83 | 
84 |     def blob(self, key):
85 |         return self.net.blobs[key].data.copy()
86 | 
87 |     def forward(self, input_data):
88 |         return self.forward2({"data": input_data[np.newaxis, :]})
89 | 
90 |     def forward2(self, input_data):
91 |         for k, v in input_data.items():
92 |             self.net.blobs[k].reshape(*v.shape)
93 |             self.net.blobs[k].data[...]=v
94 |         return self.net.forward()
95 | 
96 |     def net_def_file(self):
97 |         return self.net_def_file
98 | 


--------------------------------------------------------------------------------
/CTPN/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:7.0-runtime-ubuntu14.04
 2 | MAINTAINER Varun Suresh <fab.varun@gmail.com>
 3 | 
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |         build-essential \
 6 |         cmake \
 7 |         git \
 8 |         wget \
 9 |         zip \
10 |         unzip \
11 |         libatlas-base-dev \
12 |         libboost-all-dev \
13 |         libgflags-dev \
14 |         libgoogle-glog-dev \
15 |         libhdf5-serial-dev \
16 |         libleveldb-dev \
17 |         liblmdb-dev \
18 |         libopencv-dev \
19 |         libprotobuf-dev \
20 |         libsnappy-dev \
21 |         protobuf-compiler \
22 |         python-dev \
23 |         python-numpy \
24 |         python-pip \
25 |         python-setuptools \
26 |         python-scipy && \
27 |     rm -rf /var/lib/apt/lists/*
28 | 
29 | ENV CTPN_ROOT=/opt/ctpn
30 | WORKDIR $CTPN_ROOT
31 | 
32 | RUN git clone --depth 1 https://github.com/tianzhi0549/CTPN.git
33 | WORKDIR $CTPN_ROOT/CTPN/caffe
34 | 
35 | # Missing "packaging" package
36 | RUN pip install --upgrade pip
37 | RUN pip install packaging
38 | 
39 | RUN cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd ..
40 | RUN git clone https://github.com/NVIDIA/nccl.git
41 | RUN apt-get update && apt-get install -y --no-install-recommends \
42 |     cuda=7.0-28
43 | WORKDIR /
44 | 
45 | # Download the CUDA drivers from https://developer.nvidia.com/rdp/cudnn-archive and place it here  :
46 | ADD cudnn-7.0-linux-x64-v3.0.8-prod.tgz /
47 | WORKDIR /cuda
48 | RUN cp -P include/cudnn.h /usr/include
49 | RUN cp -P lib64/libcudnn* /usr/lib/x86_64-linux-gnu/
50 | 
51 | WORKDIR $CTPN_ROOT/CTPN/caffe
52 | RUN cp Makefile.config.example Makefile.config
53 | RUN apt-get update && apt-get install -y --no-install-recommends \
54 |     vim
55 | RUN cd nccl && make -j install && cd .. && rm -rf nccl && \
56 |     mkdir build && cd build && \
57 |     cmake -DUSE_CUDNN=1 .. && \
58 |     WITH_PYTHON_LAYER=1 make -j"$(nproc)" && make pycaffe
59 | 
60 | # Set the environment variables so that the paths are correctly configured
61 | ENV PYCAFFE_ROOT $CTPN_ROOT/CTPN/caffe/python
62 | ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
63 | ENV PATH $CTPN_ROOT/CTPN/caffe/build/tools:$PYCAFFE_ROOT:$PATH
64 | RUN echo "$CTPN_ROOT/CTPN/caffe/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig
65 | 
66 | # To make sure the python layer builds - Need to figure out a cleaner way to do this.
67 | RUN cp $CTPN_ROOT/CTPN/src/layers/* $CTPN_ROOT/CTPN/caffe/src/caffe/layers/
68 | RUN cp $CTPN_ROOT/CTPN/src/*.py $CTPN_ROOT/CTPN/caffe/src/caffe/
69 | RUN cp -r $CTPN_ROOT/CTPN/src/utils $CTPN_ROOT/CTPN/caffe/src/caffe/
70 | 
71 | # Install Opencv - 2.4.12 :
72 | 
73 | RUN cd ~ && \
74 |     mkdir -p ocv-tmp && \
75 |     cd ocv-tmp && \
76 |     wget https://github.com/Itseez/opencv/archive/2.4.12.zip  && \
77 |     unzip 2.4.12.zip && \
78 |     cd opencv-2.4.12 && \
79 |     mkdir release && \
80 |     cd release && \
81 |     cmake -D CMAKE_BUILD_TYPE=RELEASE \
82 |           -D CMAKE_INSTALL_PREFIX=/usr/local \
83 |           -D BUILD_PYTHON_SUPPORT=ON \
84 |           .. && \
85 |     make -j8 && \
86 |     make install && \
87 |     rm -rf ~/ocv-tmp
88 | 
89 | WORKDIR $CTPN_ROOT/CTPN
90 | RUN make
91 | 


--------------------------------------------------------------------------------
/CTPN/src/text_proposal_graph_builder.py:
--------------------------------------------------------------------------------
 1 | from cfg import Config as cfg
 2 | import numpy as np
 3 | from other import Graph
 4 | 
 5 | 
 6 | class TextProposalGraphBuilder:
 7 |     """
 8 |         Build Text proposals into a graph.
 9 |     """
10 |     def get_successions(self, index):
11 |             box=self.text_proposals[index]
12 |             results=[]
13 |             for left in range(int(box[0])+1, min(int(box[0])+cfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])):
14 |                 adj_box_indices=self.boxes_table[left]
15 |                 for adj_box_index in adj_box_indices:
16 |                     if self.meet_v_iou(adj_box_index, index):
17 |                         results.append(adj_box_index)
18 |                 if len(results)!=0:
19 |                     return results
20 |             return results
21 | 
22 |     def get_precursors(self, index):
23 |         box=self.text_proposals[index]
24 |         results=[]
25 |         for left in range(int(box[0])-1, max(int(box[0]-cfg.MAX_HORIZONTAL_GAP), 0)-1, -1):
26 |             adj_box_indices=self.boxes_table[left]
27 |             for adj_box_index in adj_box_indices:
28 |                 if self.meet_v_iou(adj_box_index, index):
29 |                     results.append(adj_box_index)
30 |             if len(results)!=0:
31 |                 return results
32 |         return results
33 | 
34 |     def is_succession_node(self, index, succession_index):
35 |         precursors=self.get_precursors(succession_index)
36 |         if self.scores[index]>=np.max(self.scores[precursors]):
37 |             return True
38 |         return False
39 | 
40 |     def meet_v_iou(self, index1, index2):
41 |         def overlaps_v(index1, index2):
42 |             h1=self.heights[index1]
43 |             h2=self.heights[index2]
44 |             y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1])
45 |             y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3])
46 |             return max(0, y1-y0+1)/min(h1, h2)
47 | 
48 |         def size_similarity(index1, index2):
49 |             h1=self.heights[index1]
50 |             h2=self.heights[index2]
51 |             return min(h1, h2)/max(h1, h2)
52 | 
53 |         return overlaps_v(index1, index2)>=cfg.MIN_V_OVERLAPS and \
54 |                size_similarity(index1, index2)>=cfg.MIN_SIZE_SIM
55 | 
56 |     def build_graph(self, text_proposals, scores, im_size):
57 |         self.text_proposals=text_proposals
58 |         self.scores=scores
59 |         self.im_size=im_size
60 |         self.heights=text_proposals[:, 3]-text_proposals[:, 1]+1
61 | 
62 |         boxes_table=[[] for _ in range(self.im_size[1])]
63 |         for index, box in enumerate(text_proposals):
64 |             boxes_table[int(box[0])].append(index)
65 |         self.boxes_table=boxes_table
66 | 
67 |         graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool)
68 | 
69 |         for index, box in enumerate(text_proposals):
70 |             successions=self.get_successions(index)
71 |             if len(successions)==0:
72 |                 continue
73 |             succession_index=successions[np.argmax(scores[successions])]
74 |             if self.is_succession_node(index, succession_index):
75 |                 # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors)
76 |                 # have equal scores.
77 |                 graph[index, succession_index]=True
78 |         return Graph(graph)
79 | 


--------------------------------------------------------------------------------
/crnn.pytorch/test/test_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # encoding: utf-8
  3 | 
  4 | import sys
  5 | import unittest
  6 | import torch
  7 | from torch.autograd import Variable
  8 | import collections
  9 | origin_path = sys.path
 10 | sys.path.append("..")
 11 | import utils
 12 | sys.path = origin_path
 13 | 
 14 | 
 15 | def equal(a, b):
 16 |     if isinstance(a, torch.Tensor):
 17 |         return a.equal(b)
 18 |     elif isinstance(a, str):
 19 |         return a == b
 20 |     elif isinstance(a, collections.Iterable):
 21 |         res = True
 22 |         for (x, y) in zip(a, b):
 23 |             res = res & equal(x, y)
 24 |         return res
 25 |     else:
 26 |         return a == b
 27 | 
 28 | 
 29 | class utilsTestCase(unittest.TestCase):
 30 | 
 31 |     def checkConverter(self):
 32 |         encoder = utils.strLabelConverter('abcdefghijklmnopqrstuvwxyz')
 33 | 
 34 |         # Encode
 35 |         # trivial mode
 36 |         result = encoder.encode('efa')
 37 |         target = (torch.IntTensor([5, 6, 1]), torch.IntTensor([3]))
 38 |         self.assertTrue(equal(result, target))
 39 | 
 40 |         # batch mode
 41 |         result = encoder.encode(['efa', 'ab'])
 42 |         target = (torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2]))
 43 |         self.assertTrue(equal(result, target))
 44 | 
 45 |         # Decode
 46 |         # trivial mode
 47 |         result = encoder.decode(
 48 |             torch.IntTensor([5, 6, 1]), torch.IntTensor([3]))
 49 |         target = 'efa'
 50 |         self.assertTrue(equal(result, target))
 51 | 
 52 |         # replicate mode
 53 |         result = encoder.decode(
 54 |             torch.IntTensor([5, 5, 0, 1]), torch.IntTensor([4]))
 55 |         target = 'ea'
 56 |         self.assertTrue(equal(result, target))
 57 | 
 58 |         # raise AssertionError
 59 |         def f():
 60 |             result = encoder.decode(
 61 |                 torch.IntTensor([5, 5, 0, 1]), torch.IntTensor([3]))
 62 |         self.assertRaises(AssertionError, f)
 63 | 
 64 |         # batch mode
 65 |         result = encoder.decode(
 66 |             torch.IntTensor([5, 6, 1, 1, 2]), torch.IntTensor([3, 2]))
 67 |         target = ['efa', 'ab']
 68 |         self.assertTrue(equal(result, target))
 69 | 
 70 |     def checkOneHot(self):
 71 |         v = torch.LongTensor([1, 2, 1, 2, 0])
 72 |         v_length = torch.LongTensor([2, 3])
 73 |         v_onehot = utils.oneHot(v, v_length, 4)
 74 |         target = torch.FloatTensor([[[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]],
 75 |                                     [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]])
 76 |         assert target.equal(v_onehot)
 77 | 
 78 |     def checkAverager(self):
 79 |         acc = utils.averager()
 80 |         acc.add(Variable(torch.Tensor([1, 2])))
 81 |         acc.add(Variable(torch.Tensor([[5, 6]])))
 82 |         assert acc.val() == 3.5
 83 | 
 84 |         acc = utils.averager()
 85 |         acc.add(torch.Tensor([1, 2]))
 86 |         acc.add(torch.Tensor([[5, 6]]))
 87 |         assert acc.val() == 3.5
 88 | 
 89 |     def checkAssureRatio(self):
 90 |         img = torch.Tensor([[1], [3]]).view(1, 1, 2, 1)
 91 |         img = Variable(img)
 92 |         img = utils.assureRatio(img)
 93 |         assert torch.Size([1, 1, 2, 2]) == img.size()
 94 | 
 95 | 
 96 | def _suite():
 97 |     suite = unittest.TestSuite()
 98 |     suite.addTest(utilsTestCase("checkConverter"))
 99 |     suite.addTest(utilsTestCase("checkOneHot"))
100 |     suite.addTest(utilsTestCase("checkAverager"))
101 |     suite.addTest(utilsTestCase("checkAssureRatio"))
102 |     return suite
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     suite = _suite()
107 |     runner = unittest.TextTestRunner()
108 |     runner.run(suite)
109 | 


--------------------------------------------------------------------------------
/crnnport.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | 
  3 | import random
  4 | import torch
  5 | from torch.autograd import Variable 
  6 | import numpy as np
  7 | import os
  8 | import crnn_utils
  9 | import dataset
 10 | from PIL import Image
 11 | import models.crnn as crnn
 12 | import keys
 13 | from math import *
 14 | #import mahotas
 15 | import cv2
 16 | 
 17 | class CRNNRecognizer:
 18 | 
 19 |     def __init__(self, model_path):
 20 |         #def crnnSource(model_path, use_gpu=True):
 21 |         alphabet = keys.alphabet # Chinese words
 22 |         self.converter = crnn_utils.strLabelConverter(alphabet)
 23 |         # note that in https://github.com/bear63/sceneReco support multi GPU.
 24 |         # model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda()
 25 |         self.model = crnn.CRNN(32, 1, len(alphabet)+1, 256)
 26 |         self.cpu_model = crnn.CRNN(32, 1, len(alphabet)+1, 256)
 27 |         if torch.cuda.is_available():
 28 |             self.model = self.model.cuda()
 29 |         print('loading pretrained model from %s' % model_path)
 30 |         #model_path = './crnn/samples/netCRNN63.pth'
 31 |         model_state_dict = torch.load(model_path)
 32 |         self.model.load_state_dict(model_state_dict)
 33 |         self.cpu_model.load_state_dict(model_state_dict)
 34 |         #self.use_gpu = use_gpu
 35 |         #return model,converter
 36 |         
 37 | 
 38 |     def crnnRec(self, im, text_recs, use_gpu=True):
 39 |        texts = []
 40 |        index = 0
 41 |        for rec in text_recs:
 42 |            pt1 = (rec[0],rec[1])
 43 |            pt2 = (rec[2],rec[3])
 44 |            pt3 = (rec[6],rec[7])
 45 |            pt4 = (rec[4],rec[5])
 46 |            partImg = self.dumpRotateImage(im,degrees(atan2(pt2[1]-pt1[1],pt2[0]-pt1[0])),pt1,pt2,pt3,pt4)
 47 |            #mahotas.imsave('%s.jpg'%index, partImg)
 48 |            
 49 |     
 50 |            image = Image.fromarray(partImg).convert('L')
 51 |            #height,width,channel=partImg.shape[:3]
 52 |            #print(height,width,channel)
 53 |            #print(image.size) 
 54 |     
 55 |            #image = Image.open('./img/t4.jpg').convert('L')
 56 |            scale = image.size[1]*1.0 / 32
 57 |            w = image.size[0] / scale
 58 |            w = int(w)
 59 |            #print(w)
 60 |     
 61 |            transformer = dataset.resizeNormalize((w, 32))
 62 |            image = transformer(image)
 63 |            model = self.cpu_model
 64 |            if use_gpu and torch.cuda.is_available():
 65 |                image = image.cuda()
 66 |                model = self.model
 67 |     
 68 |            image = image.view(1, *image.size())
 69 |            image = Variable(image)
 70 |            model.eval()
 71 |            print(type(model),type(image))
 72 |            preds = model(image)
 73 |            _, preds = preds.max(2)
 74 |            preds = preds.squeeze(0)
 75 |            preds = preds.transpose(1, 0).contiguous().view(-1)
 76 |            preds_size = Variable(torch.IntTensor([preds.size(0)]))
 77 |            raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True)
 78 |            sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False)
 79 |            print('%-20s => %-20s' % (raw_pred, sim_pred))
 80 |            #print(index)
 81 |            #print(sim_pred)
 82 |            index = index + 1
 83 |            texts.append(sim_pred)
 84 |            
 85 |        return texts
 86 | 
 87 |     def dumpRotateImage(self, img,degree,pt1,pt2,pt3,pt4):
 88 |         height,width=img.shape[:2]
 89 |         heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree))))
 90 |         widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree))))
 91 |         matRotation=cv2.getRotationMatrix2D((width/2,height/2),degree,1)
 92 |         matRotation[0, 2] += (widthNew - width) / 2
 93 |         matRotation[1, 2] += (heightNew - height) / 2
 94 |         imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
 95 |         pt1 = list(pt1)
 96 |         pt3 = list(pt3)
 97 |         
 98 |         
 99 |         [[pt1[0]], [pt1[1]]] = np.dot(matRotation, np.array([[pt1[0]], [pt1[1]], [1]]))
100 |         [[pt3[0]], [pt3[1]]] = np.dot(matRotation, np.array([[pt3[0]], [pt3[1]], [1]]))
101 |         imgOut=imgRotation[int(pt1[1]):int(pt3[1]),int(pt1[0]):int(pt3[0])]
102 |         height,width=imgOut.shape[:2]
103 |         return imgOut
104 | 
105 | 


--------------------------------------------------------------------------------
/crnn.pytorch/dataset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # encoding: utf-8
  3 | 
  4 | import random
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | from torch.utils.data import sampler
  8 | import torchvision.transforms as transforms
  9 | import lmdb
 10 | import six
 11 | import sys
 12 | from PIL import Image
 13 | import numpy as np
 14 | 
 15 | 
 16 | class lmdbDataset(Dataset):
 17 | 
 18 |     def __init__(self, root=None, transform=None, target_transform=None):
 19 |         self.env = lmdb.open(
 20 |             root,
 21 |             max_readers=1,
 22 |             readonly=True,
 23 |             lock=False,
 24 |             readahead=False,
 25 |             meminit=False)
 26 | 
 27 |         if not self.env:
 28 |             print('cannot creat lmdb from %s' % (root))
 29 |             sys.exit(0)
 30 | 
 31 |         with self.env.begin(write=False) as txn:
 32 |             nSamples = int(txn.get('num-samples'))
 33 |             self.nSamples = nSamples
 34 | 
 35 |         self.transform = transform
 36 |         self.target_transform = target_transform
 37 | 
 38 |     def __len__(self):
 39 |         return self.nSamples
 40 | 
 41 |     def __getitem__(self, index):
 42 |         assert index <= len(self), 'index range error'
 43 |         index += 1
 44 |         with self.env.begin(write=False) as txn:
 45 |             img_key = 'image-%09d' % index
 46 |             imgbuf = txn.get(img_key)
 47 | 
 48 |             buf = six.BytesIO()
 49 |             buf.write(imgbuf)
 50 |             buf.seek(0)
 51 |             try:
 52 |                 img = Image.open(buf).convert('L')
 53 |             except IOError:
 54 |                 print('Corrupted image for %d' % index)
 55 |                 return self[index + 1]
 56 | 
 57 |             if self.transform is not None:
 58 |                 img = self.transform(img)
 59 | 
 60 |             label_key = 'label-%09d' % index
 61 |             label = str(txn.get(label_key))
 62 | 
 63 |             if self.target_transform is not None:
 64 |                 label = self.target_transform(label)
 65 | 
 66 |         return (img, label)
 67 | 
 68 | 
 69 | class resizeNormalize(object):
 70 | 
 71 |     def __init__(self, size, interpolation=Image.BILINEAR):
 72 |         self.size = size
 73 |         self.interpolation = interpolation
 74 |         self.toTensor = transforms.ToTensor()
 75 | 
 76 |     def __call__(self, img):
 77 |         img = img.resize(self.size, self.interpolation)
 78 |         img = self.toTensor(img)
 79 |         img.sub_(0.5).div_(0.5)
 80 |         return img
 81 | 
 82 | 
 83 | class randomSequentialSampler(sampler.Sampler):
 84 | 
 85 |     def __init__(self, data_source, batch_size):
 86 |         self.num_samples = len(data_source)
 87 |         self.batch_size = batch_size
 88 | 
 89 |     def __iter__(self):
 90 |         n_batch = len(self) // self.batch_size
 91 |         tail = len(self) % self.batch_size
 92 |         index = torch.LongTensor(len(self)).fill_(0)
 93 |         for i in range(n_batch):
 94 |             random_start = random.randint(0, len(self) - self.batch_size)
 95 |             batch_index = random_start + torch.range(0, self.batch_size - 1)
 96 |             index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index
 97 |         # deal with tail
 98 |         if tail:
 99 |             random_start = random.randint(0, len(self) - self.batch_size)
100 |             tail_index = random_start + torch.range(0, tail - 1)
101 |             index[(i + 1) * self.batch_size:] = tail_index
102 | 
103 |         return iter(index)
104 | 
105 |     def __len__(self):
106 |         return self.num_samples
107 | 
108 | 
109 | class alignCollate(object):
110 | 
111 |     def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1):
112 |         self.imgH = imgH
113 |         self.imgW = imgW
114 |         self.keep_ratio = keep_ratio
115 |         self.min_ratio = min_ratio
116 | 
117 |     def __call__(self, batch):
118 |         images, labels = zip(*batch)
119 | 
120 |         imgH = self.imgH
121 |         imgW = self.imgW
122 |         if self.keep_ratio:
123 |             ratios = []
124 |             for image in images:
125 |                 w, h = image.size
126 |                 ratios.append(w / float(h))
127 |             ratios.sort()
128 |             max_ratio = ratios[-1]
129 |             imgW = int(np.floor(max_ratio * imgH))
130 |             imgW = max(imgH * self.min_ratio, imgW)  # assure imgH >= imgW
131 | 
132 |         transform = resizeNormalize((imgW, imgH))
133 |         images = [transform(image) for image in images]
134 |         images = torch.cat([t.unsqueeze(0) for t in images], 0)
135 | 
136 |         return images, labels
137 | 


--------------------------------------------------------------------------------
/crnn.pytorch/crnn_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # encoding: utf-8
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | import collections
  8 | 
  9 | 
 10 | class strLabelConverter(object):
 11 |     """Convert between str and label.
 12 | 
 13 |     NOTE:
 14 |         Insert `blank` to the alphabet for CTC.
 15 | 
 16 |     Args:
 17 |         alphabet (str): set of the possible characters.
 18 |         ignore_case (bool, default=True): whether or not to ignore all of the case.
 19 |     """
 20 | 
 21 |     def __init__(self, alphabet, ignore_case=True):
 22 |         self._ignore_case = ignore_case
 23 |         if self._ignore_case:
 24 |             alphabet = alphabet.lower()
 25 |         self.alphabet = alphabet + '-'  # for `-1` index
 26 | 
 27 |         self.dict = {}
 28 |         for i, char in enumerate(alphabet):
 29 |             # NOTE: 0 is reserved for 'blank' required by wrap_ctc
 30 |             self.dict[char] = i + 1
 31 | 
 32 |     def encode(self, text):
 33 |         """Support batch or single str.
 34 | 
 35 |         Args:
 36 |             text (str or list of str): texts to convert.
 37 | 
 38 |         Returns:
 39 |             torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
 40 |             torch.IntTensor [n]: length of each text.
 41 |         """
 42 |         if isinstance(text, str):
 43 |             text = [
 44 |                 self.dict[char.lower() if self._ignore_case else char]
 45 |                 for char in text
 46 |             ]
 47 |             length = [len(text)]
 48 |         elif isinstance(text, collections.Iterable):
 49 |             length = [len(s) for s in text]
 50 |             text = ''.join(text)
 51 |             text, _ = self.encode(text)
 52 |         return (torch.IntTensor(text), torch.IntTensor(length))
 53 | 
 54 |     def decode(self, t, length, raw=False):
 55 |         """Decode encoded texts back into strs.
 56 | 
 57 |         Args:
 58 |             torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts.
 59 |             torch.IntTensor [n]: length of each text.
 60 | 
 61 |         Raises:
 62 |             AssertionError: when the texts and its length does not match.
 63 | 
 64 |         Returns:
 65 |             text (str or list of str): texts to convert.
 66 |         """
 67 |         if length.numel() == 1:
 68 |             length = length[0]
 69 |             assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length)
 70 |             if raw:
 71 |                 return ''.join([self.alphabet[i - 1] for i in t])
 72 |             else:
 73 |                 char_list = []
 74 |                 for i in range(length):
 75 |                     if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
 76 |                         char_list.append(self.alphabet[t[i] - 1])
 77 |                 return ''.join(char_list)
 78 |         else:
 79 |             # batch mode
 80 |             assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum())
 81 |             texts = []
 82 |             index = 0
 83 |             for i in range(length.numel()):
 84 |                 l = length[i]
 85 |                 texts.append(
 86 |                     self.decode(
 87 |                         t[index:index + l], torch.IntTensor([l]), raw=raw))
 88 |                 index += l
 89 |             return texts
 90 | 
 91 | 
 92 | class averager(object):
 93 |     """Compute average for `torch.Variable` and `torch.Tensor`. """
 94 | 
 95 |     def __init__(self):
 96 |         self.reset()
 97 | 
 98 |     def add(self, v):
 99 |         if isinstance(v, Variable):
100 |             count = v.data.numel()
101 |             v = v.data.sum()
102 |         elif isinstance(v, torch.Tensor):
103 |             count = v.numel()
104 |             v = v.sum()
105 | 
106 |         self.n_count += count
107 |         self.sum += v
108 | 
109 |     def reset(self):
110 |         self.n_count = 0
111 |         self.sum = 0
112 | 
113 |     def val(self):
114 |         res = 0
115 |         if self.n_count != 0:
116 |             res = self.sum / float(self.n_count)
117 |         return res
118 | 
119 | 
120 | def oneHot(v, v_length, nc):
121 |     batchSize = v_length.size(0)
122 |     maxLength = v_length.max()
123 |     v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0)
124 |     acc = 0
125 |     for i in range(batchSize):
126 |         length = v_length[i]
127 |         label = v[acc:acc + length].view(-1, 1).long()
128 |         v_onehot[i, :length].scatter_(1, label, 1.0)
129 |         acc += length
130 |     return v_onehot
131 | 
132 | 
133 | def loadData(v, data):
134 |     v.data.resize_(data.size()).copy_(data)
135 | 
136 | 
137 | def prettyPrint(v):
138 |     print('Size {0}, Type: {1}'.format(str(v.size()), v.data.type()))
139 |     print('| Max: %f | Min: %f | Mean: %f' % (v.max().data[0], v.min().data[0],
140 |                                               v.mean().data[0]))
141 | 
142 | 
143 | def assureRatio(img):
144 |     """Ensure imgH <= imgW."""
145 |     b, c, h, w = img.size()
146 |     if h > w:
147 |         main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None)
148 |         img = main(img)
149 |     return img
150 | 


--------------------------------------------------------------------------------
/crnn.pytorch/keys.py:
--------------------------------------------------------------------------------
1 | #coding:UTF-8
2 | alphabet =  u'\'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],，骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!！姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆￠诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永￥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶?？郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄￡瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗'
3 | 


--------------------------------------------------------------------------------
/crnn.pytorch/tool/convert_t7.py:
--------------------------------------------------------------------------------
  1 | import torchfile
  2 | import argparse
  3 | import torch
  4 | from torch.nn.parameter import Parameter
  5 | import numpy as np
  6 | import models.crnn as crnn
  7 | 
  8 | 
  9 | layer_map = {
 10 |     'SpatialConvolution': 'Conv2d',
 11 |     'SpatialBatchNormalization': 'BatchNorm2d',
 12 |     'ReLU': 'ReLU',
 13 |     'SpatialMaxPooling': 'MaxPool2d',
 14 |     'SpatialAveragePooling': 'AvgPool2d',
 15 |     'SpatialUpSamplingNearest': 'UpsamplingNearest2d',
 16 |     'View': None,
 17 |     'Linear': 'linear',
 18 |     'Dropout': 'Dropout',
 19 |     'SoftMax': 'Softmax',
 20 |     'Identity': None,
 21 |     'SpatialFullConvolution': 'ConvTranspose2d',
 22 |     'SpatialReplicationPadding': None,
 23 |     'SpatialReflectionPadding': None,
 24 |     'Copy': None,
 25 |     'Narrow': None,
 26 |     'SpatialCrossMapLRN': None,
 27 |     'Sequential': None,
 28 |     'ConcatTable': None,  # output is list
 29 |     'CAddTable': None,  # input is list
 30 |     'Concat': None,
 31 |     'TorchObject': None,
 32 |     'LstmLayer': 'LSTM',
 33 |     'BiRnnJoin': 'Linear'
 34 | }
 35 | 
 36 | 
 37 | def torch_layer_serial(layer, layers):
 38 |     name = layer[0]
 39 |     if name == 'nn.Sequential' or name == 'nn.ConcatTable':
 40 |         tmp_layers = []
 41 |         for sub_layer in layer[1]:
 42 |             torch_layer_serial(sub_layer, tmp_layers)
 43 |         layers.extend(tmp_layers)
 44 |     else:
 45 |         layers.append(layer)
 46 | 
 47 | 
 48 | def py_layer_serial(layer, layers):
 49 |     """
 50 |     Assume modules are defined as executive sequence.
 51 |     """
 52 |     if len(layer._modules) >= 1:
 53 |         tmp_layers = []
 54 |         for sub_layer in layer.children():
 55 |             py_layer_serial(sub_layer, tmp_layers)
 56 |         layers.extend(tmp_layers)
 57 |     else:
 58 |         layers.append(layer)
 59 | 
 60 | 
 61 | def trans_pos(param, part_indexes, dim=0):
 62 |     parts = np.split(param, len(part_indexes), dim)
 63 |     new_parts = []
 64 |     for i in part_indexes:
 65 |         new_parts.append(parts[i])
 66 |     return np.concatenate(new_parts, dim)
 67 | 
 68 | 
 69 | def load_params(py_layer, t7_layer):
 70 |     if type(py_layer).__name__ == 'LSTM':
 71 |         # LSTM
 72 |         all_weights = []
 73 |         num_directions = 2 if py_layer.bidirectional else 1
 74 |         for i in range(py_layer.num_layers):
 75 |             for j in range(num_directions):
 76 |                 suffix = '_reverse' if j == 1 else ''
 77 |                 weights = ['weight_ih_l{}{}', 'bias_ih_l{}{}',
 78 |                            'weight_hh_l{}{}', 'bias_hh_l{}{}']
 79 |                 weights = [x.format(i, suffix) for x in weights]
 80 |                 all_weights += weights
 81 | 
 82 |         params = []
 83 |         for i in range(len(t7_layer)):
 84 |             params.extend(t7_layer[i][1])
 85 |         params = [trans_pos(p, [0, 1, 3, 2], dim=0) for p in params]
 86 |     else:
 87 |         all_weights = []
 88 |         name = t7_layer[0].split('.')[-1]
 89 |         if name == 'BiRnnJoin':
 90 |             weight_0, bias_0, weight_1, bias_1 = t7_layer[1]
 91 |             weight = np.concatenate((weight_0, weight_1), axis=1)
 92 |             bias = bias_0 + bias_1
 93 |             t7_layer[1] = [weight, bias]
 94 |             all_weights += ['weight', 'bias']
 95 |         elif name == 'SpatialConvolution' or name == 'Linear':
 96 |             all_weights += ['weight', 'bias']
 97 |         elif name == 'SpatialBatchNormalization':
 98 |             all_weights += ['weight', 'bias', 'running_mean', 'running_var']
 99 | 
100 |         params = t7_layer[1]
101 | 
102 |     params = [torch.from_numpy(item) for item in params]
103 |     assert len(all_weights) == len(params), "params' number not match"
104 |     for py_param_name, t7_param in zip(all_weights, params):
105 |         item = getattr(py_layer, py_param_name)
106 |         if isinstance(item, Parameter):
107 |             item = item.data
108 |         try:
109 |             item.copy_(t7_param)
110 |         except RuntimeError:
111 |             print('Size not match between %s and %s' %
112 |                   (item.size(), t7_param.size()))
113 | 
114 | 
115 | def torch_to_pytorch(model, t7_file, output):
116 |     py_layers = []
117 |     for layer in list(model.children()):
118 |         py_layer_serial(layer, py_layers)
119 | 
120 |     t7_data = torchfile.load(t7_file)
121 |     t7_layers = []
122 |     for layer in t7_data:
123 |         torch_layer_serial(layer, t7_layers)
124 | 
125 |     j = 0
126 |     for i, py_layer in enumerate(py_layers):
127 |         py_name = type(py_layer).__name__
128 |         t7_layer = t7_layers[j]
129 |         t7_name = t7_layer[0].split('.')[-1]
130 |         if layer_map[t7_name] != py_name:
131 |             raise RuntimeError('%s does not match %s' % (py_name, t7_name))
132 | 
133 |         if py_name == 'LSTM':
134 |             n_layer = 2 if py_layer.bidirectional else 1
135 |             n_layer *= py_layer.num_layers
136 |             t7_layer = t7_layers[j:j + n_layer]
137 |             j += n_layer
138 |         else:
139 |             j += 1
140 | 
141 |         load_params(py_layer, t7_layer)
142 | 
143 |     torch.save(model.state_dict(), output)
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     parser = argparse.ArgumentParser(
148 |         description='Convert torch t7 model to pytorch'
149 |     )
150 |     parser.add_argument(
151 |         '--model_file',
152 |         '-m',
153 |         type=str,
154 |         required=True,
155 |         help='torch model file in t7 format'
156 |     )
157 |     parser.add_argument(
158 |         '--output',
159 |         '-o',
160 |         type=str,
161 |         default=None,
162 |         help='output file name prefix, xxx.py xxx.pth'
163 |     )
164 |     args = parser.parse_args()
165 | 
166 |     py_model = crnn.CRNN(32, 1, 37, 256, 1)
167 |     torch_to_pytorch(py_model, args.model_file, args.output)
168 | 


--------------------------------------------------------------------------------
/ctpnport.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import sys
  3 | import numpy as np
  4 | from matplotlib import cm
  5 | import cv2
  6 | 
  7 | class cfg:
  8 |     MEAN=np.float32([102.9801, 115.9465, 122.7717])
  9 |     TEST_GPU_ID=0
 10 |     SCALE=600
 11 |     MAX_SCALE=1000
 12 | 
 13 |     LINE_MIN_SCORE=0.7
 14 |     TEXT_PROPOSALS_MIN_SCORE=0.7
 15 |     TEXT_PROPOSALS_NMS_THRESH=0.3
 16 |     MAX_HORIZONTAL_GAP=50
 17 |     TEXT_LINE_NMS_THRESH=0.3
 18 |     MIN_NUM_PROPOSALS=2
 19 |     MIN_RATIO=1.2
 20 |     MIN_V_OVERLAPS=0.7
 21 |     MIN_SIZE_SIM=0.7
 22 |     TEXT_PROPOSALS_WIDTH=16
 23 | 
 24 | #sys.path.insert(0, "./CTPN/tools")
 25 | #sys.path.insert(0, "./CTPN/src")
 26 | #import os.path as osp
 27 | #from utils.timer import Timer
 28 | 
 29 | class CTPNDetector:
 30 | 
 31 |     def __init__(self, NET_DEF_FILE, MODEL_FILE, caffe_path):
 32 |         sys.path.insert(0, "%s/python"%caffe_path)
 33 |         import caffe
 34 |         from other import draw_boxes, resize_im, CaffeModel 
 35 |         from detectors import TextProposalDetector, TextDetector
 36 |         sys.path.remove("%s/python"%caffe_path)
 37 |         #def ctpnSource(NET_DEF_FILE, MODEL_FILE, use_gpu):
 38 |         #NET_DEF_FILE = "CTPN/models/deploy.prototxt"
 39 |         #MODEL_FILE = "CTPN/models/ctpn_trained_model.caffemodel"
 40 |         self.caffe = caffe
 41 |         #if use_gpu:
 42 |         #    caffe.set_mode_gpu()
 43 |         #    caffe.set_device(cfg.TEST_GPU_ID)
 44 |         #else:
 45 |         #    caffe.set_mode_cpu()
 46 | 
 47 |         # initialize the detectors
 48 |         text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
 49 |         self.text_detector = TextDetector(text_proposals_detector)
 50 |         self.resize_im = resize_im
 51 |         self.draw_boxes = draw_boxes
 52 |         #return text_detector
 53 |     
 54 |     def getCharBlock(self, im, gpu_id=0):
 55 |         if gpu_id < 0:
 56 |             self.caffe.set_mode_cpu()
 57 |         else:
 58 |             self.caffe.set_mode_gpu()
 59 |             self.caffe.set_device(gpu_id)
 60 | 
 61 |         resize_im, resize_ratio = self.resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
 62 |         #print "resize", f
 63 |         #cv2.imshow("src", im)
 64 |         tmp = resize_im.copy()
 65 |         #timer=Timer()
 66 |         #timer.tic()
 67 |         text_lines = self.text_detector.detect(tmp)
 68 |     
 69 |         #print "Number of the detected text lines: %s"%len(text_lines)
 70 |         #print "Time: %f"%timer.toc()
 71 |         return text_lines, resize_im, resize_ratio
 72 | 
 73 |     # this is deprecated
 74 |     def convert_bbox(self, bboxes):
 75 |         text_recs = np.zeros((len(bboxes), 8), np.int)
 76 |         index = 0
 77 |         for box in bboxes:
 78 |             b1 = box[6] - box[7] / 2
 79 |             b2 = box[6] + box[7] / 2
 80 |             x1 = box[0]
 81 |             y1 = box[5] * box[0] + b1
 82 |             x2 = box[2]
 83 |             y2 = box[5] * box[2] + b1
 84 |             x3 = box[0]
 85 |             y3 = box[5] * box[0] + b2
 86 |             x4 = box[2]
 87 |             y4 = box[5] * box[2] + b2
 88 |             
 89 |             disX = x2 - x1
 90 |             disY = y2 - y1
 91 |             width = np.sqrt(disX*disX + disY*disY)
 92 |             fTmp0 = y3 - y1
 93 |             fTmp1 = fTmp0 * disY / width
 94 |             x = np.fabs(fTmp1*disX / width)
 95 |             y = np.fabs(fTmp1*disY / width)
 96 |             if box[5] < 0:
 97 |                x1 -= x
 98 |                y1 += y
 99 |                x4 += x
100 |                y4 -= y
101 |             else:
102 |                x2 += x
103 |                y2 += y
104 |                x3 -= x
105 |                y3 -= y
106 |             text_recs[index, 0] = x1
107 |             text_recs[index, 1] = y1
108 |             text_recs[index, 2] = x2
109 |             text_recs[index, 3] = y2
110 |             text_recs[index, 4] = x3
111 |             text_recs[index, 5] = y3
112 |             text_recs[index, 6] = x4
113 |             text_recs[index, 7] = y4
114 |             index = index + 1
115 |         return text_recs
116 |     def draw_boxes8(self, im, bboxes, is_display=True, color=None, caption="Image", wait=True):
117 |         """
118 |             boxes: bounding boxes
119 |         """
120 |         text_recs=np.zeros((len(bboxes), 8), np.int)
121 |     
122 |         im=im.copy()
123 |         index = 0
124 |         for box in bboxes:
125 |             if color==None:
126 |                 if len(box)==8 or len(box)==9:
127 |                     c=tuple(cm.jet([box[-1]])[0, 2::-1]*255)
128 |                 else:
129 |                     c=tuple(np.random.randint(0, 256, 3))
130 |             else:
131 |                 c=color
132 |             
133 |             b1 = box[6] - box[7] / 2
134 |             b2 = box[6] + box[7] / 2
135 |             x1 = box[0]
136 |             y1 = box[5] * box[0] + b1
137 |             x2 = box[2]
138 |             y2 = box[5] * box[2] + b1
139 |             x3 = box[0]
140 |             y3 = box[5] * box[0] + b2
141 |             x4 = box[2]
142 |             y4 = box[5] * box[2] + b2
143 |             
144 |             disX = x2 - x1
145 |             disY = y2 - y1
146 |             width = np.sqrt(disX*disX + disY*disY)
147 |             fTmp0 = y3 - y1
148 |             fTmp1 = fTmp0 * disY / width
149 |             x = np.fabs(fTmp1*disX / width)
150 |             y = np.fabs(fTmp1*disY / width)
151 |             if box[5] < 0:
152 |                x1 -= x
153 |                y1 += y
154 |                x4 += x
155 |                y4 -= y
156 |             else:
157 |                x2 += x
158 |                y2 += y
159 |                x3 -= x
160 |                y3 -= y
161 |             cv2.line(im,(int(x1),int(y1)),(int(x2),int(y2)),c,2)
162 |             cv2.line(im,(int(x1),int(y1)),(int(x3),int(y3)),c,2)
163 |             cv2.line(im,(int(x4),int(y4)),(int(x2),int(y2)),c,2)
164 |             cv2.line(im,(int(x3),int(y3)),(int(x4),int(y4)),c,2)
165 |             text_recs[index, 0] = x1
166 |             text_recs[index, 1] = y1
167 |             text_recs[index, 2] = x2
168 |             text_recs[index, 3] = y2
169 |             text_recs[index, 4] = x3
170 |             text_recs[index, 5] = y3
171 |             text_recs[index, 6] = x4
172 |             text_recs[index, 7] = y4
173 |             index = index + 1
174 |             #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2)  
175 |         if is_display:
176 |             cv2.imshow('result', im)
177 |             #if wait:
178 |                 #cv2.waitKey(0)
179 |         return im, text_recs
180 | 


--------------------------------------------------------------------------------
/crnn.pytorch/crnn_main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import random
  4 | import torch
  5 | import torch.backends.cudnn as cudnn
  6 | import torch.optim as optim
  7 | import torch.utils.data
  8 | from torch.autograd import Variable
  9 | import numpy as np
 10 | from warpctc_pytorch import CTCLoss
 11 | import os
 12 | import utils
 13 | import dataset
 14 | 
 15 | import models.crnn as crnn
 16 | 
 17 | parser = argparse.ArgumentParser()
 18 | parser.add_argument('--trainroot', required=True, help='path to dataset')
 19 | parser.add_argument('--valroot', required=True, help='path to dataset')
 20 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
 21 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size')
 22 | parser.add_argument('--imgH', type=int, default=32, help='the height of the input image to network')
 23 | parser.add_argument('--imgW', type=int, default=100, help='the width of the input image to network')
 24 | parser.add_argument('--nh', type=int, default=256, help='size of the lstm hidden state')
 25 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
 26 | parser.add_argument('--lr', type=float, default=0.01, help='learning rate for Critic, default=0.00005')
 27 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
 28 | parser.add_argument('--cuda', action='store_true', help='enables cuda')
 29 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
 30 | parser.add_argument('--crnn', default='', help="path to crnn (to continue training)")
 31 | parser.add_argument('--alphabet', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz')
 32 | parser.add_argument('--experiment', default=None, help='Where to store samples and models')
 33 | parser.add_argument('--displayInterval', type=int, default=500, help='Interval to be displayed')
 34 | parser.add_argument('--n_test_disp', type=int, default=10, help='Number of samples to display when test')
 35 | parser.add_argument('--valInterval', type=int, default=500, help='Interval to be displayed')
 36 | parser.add_argument('--saveInterval', type=int, default=500, help='Interval to be displayed')
 37 | parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)')
 38 | parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)')
 39 | parser.add_argument('--keep_ratio', action='store_true', help='whether to keep ratio for image resize')
 40 | parser.add_argument('--random_sample', action='store_true', help='whether to sample the dataset with random sampler')
 41 | opt = parser.parse_args()
 42 | print(opt)
 43 | 
 44 | if opt.experiment is None:
 45 |     opt.experiment = 'expr'
 46 | os.system('mkdir {0}'.format(opt.experiment))
 47 | 
 48 | opt.manualSeed = random.randint(1, 10000)  # fix seed
 49 | print("Random Seed: ", opt.manualSeed)
 50 | random.seed(opt.manualSeed)
 51 | np.random.seed(opt.manualSeed)
 52 | torch.manual_seed(opt.manualSeed)
 53 | 
 54 | cudnn.benchmark = True
 55 | 
 56 | if torch.cuda.is_available() and not opt.cuda:
 57 |     print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 58 | 
 59 | train_dataset = dataset.lmdbDataset(root=opt.trainroot)
 60 | assert train_dataset
 61 | if not opt.random_sample:
 62 |     sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
 63 | else:
 64 |     sampler = None
 65 | train_loader = torch.utils.data.DataLoader(
 66 |     train_dataset, batch_size=opt.batchSize,
 67 |     shuffle=True, sampler=sampler,
 68 |     num_workers=int(opt.workers),
 69 |     collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
 70 | test_dataset = dataset.lmdbDataset(
 71 |     root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))
 72 | 
 73 | nclass = len(opt.alphabet) + 1
 74 | nc = 1
 75 | 
 76 | converter = utils.strLabelConverter(opt.alphabet)
 77 | criterion = CTCLoss()
 78 | 
 79 | 
 80 | # custom weights initialization called on crnn
 81 | def weights_init(m):
 82 |     classname = m.__class__.__name__
 83 |     if classname.find('Conv') != -1:
 84 |         m.weight.data.normal_(0.0, 0.02)
 85 |     elif classname.find('BatchNorm') != -1:
 86 |         m.weight.data.normal_(1.0, 0.02)
 87 |         m.bias.data.fill_(0)
 88 | 
 89 | 
 90 | crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh)
 91 | crnn.apply(weights_init)
 92 | if opt.crnn != '':
 93 |     print('loading pretrained model from %s' % opt.crnn)
 94 |     crnn.load_state_dict(torch.load(opt.crnn))
 95 | print(crnn)
 96 | 
 97 | image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
 98 | text = torch.IntTensor(opt.batchSize * 5)
 99 | length = torch.IntTensor(opt.batchSize)
100 | 
101 | if opt.cuda:
102 |     crnn.cuda()
103 |     crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
104 |     image = image.cuda()
105 |     criterion = criterion.cuda()
106 | 
107 | image = Variable(image)
108 | text = Variable(text)
109 | length = Variable(length)
110 | 
111 | # loss averager
112 | loss_avg = utils.averager()
113 | 
114 | # setup optimizer
115 | if opt.adam:
116 |     optimizer = optim.Adam(crnn.parameters(), lr=opt.lr,
117 |                            betas=(opt.beta1, 0.999))
118 | elif opt.adadelta:
119 |     optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
120 | else:
121 |     optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)
122 | 
123 | 
124 | def val(net, dataset, criterion, max_iter=100):
125 |     print('Start val')
126 | 
127 |     for p in crnn.parameters():
128 |         p.requires_grad = False
129 | 
130 |     net.eval()
131 |     data_loader = torch.utils.data.DataLoader(
132 |         dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
133 |     val_iter = iter(data_loader)
134 | 
135 |     i = 0
136 |     n_correct = 0
137 |     loss_avg = utils.averager()
138 | 
139 |     max_iter = min(max_iter, len(data_loader))
140 |     for i in range(max_iter):
141 |         data = val_iter.next()
142 |         i += 1
143 |         cpu_images, cpu_texts = data
144 |         batch_size = cpu_images.size(0)
145 |         utils.loadData(image, cpu_images)
146 |         t, l = converter.encode(cpu_texts)
147 |         utils.loadData(text, t)
148 |         utils.loadData(length, l)
149 | 
150 |         preds = crnn(image)
151 |         preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
152 |         cost = criterion(preds, text, preds_size, length) / batch_size
153 |         loss_avg.add(cost)
154 | 
155 |         _, preds = preds.max(2)
156 |         preds = preds.squeeze(2)
157 |         preds = preds.transpose(1, 0).contiguous().view(-1)
158 |         sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
159 |         for pred, target in zip(sim_preds, cpu_texts):
160 |             if pred == target.lower():
161 |                 n_correct += 1
162 | 
163 |     raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
164 |     for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
165 |         print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
166 | 
167 |     accuracy = n_correct / float(max_iter * opt.batchSize)
168 |     print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
169 | 
170 | 
171 | def trainBatch(net, criterion, optimizer):
172 |     data = train_iter.next()
173 |     cpu_images, cpu_texts = data
174 |     batch_size = cpu_images.size(0)
175 |     utils.loadData(image, cpu_images)
176 |     t, l = converter.encode(cpu_texts)
177 |     utils.loadData(text, t)
178 |     utils.loadData(length, l)
179 | 
180 |     preds = crnn(image)
181 |     preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
182 |     cost = criterion(preds, text, preds_size, length) / batch_size
183 |     crnn.zero_grad()
184 |     cost.backward()
185 |     optimizer.step()
186 |     return cost
187 | 
188 | 
189 | for epoch in range(opt.niter):
190 |     train_iter = iter(train_loader)
191 |     i = 0
192 |     while i < len(train_loader):
193 |         for p in crnn.parameters():
194 |             p.requires_grad = True
195 |         crnn.train()
196 | 
197 |         cost = trainBatch(crnn, criterion, optimizer)
198 |         loss_avg.add(cost)
199 |         i += 1
200 | 
201 |         if i % opt.displayInterval == 0:
202 |             print('[%d/%d][%d/%d] Loss: %f' %
203 |                   (epoch, opt.niter, i, len(train_loader), loss_avg.val()))
204 |             loss_avg.reset()
205 | 
206 |         if i % opt.valInterval == 0:
207 |             val(crnn, test_dataset, criterion)
208 | 
209 |         # do checkpointing
210 |         if i % opt.saveInterval == 0:
211 |             torch.save(
212 |                 crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i))
213 | 


--------------------------------------------------------------------------------
/models/CTPN/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: 'data'
  4 | input_shape {
  5 |     dim: 1
  6 |     dim: 3
  7 |     dim: 600
  8 |     dim: 900
  9 | }
 10 | 
 11 | input: 'im_info'
 12 | input_shape {
 13 |     dim: 1
 14 |     dim: 3
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |   }
150 |   convolution_param {
151 |     num_output: 256
152 |     pad: 1
153 |     kernel_size: 3
154 |   }
155 | }
156 | layer {
157 |   name: "relu3_1"
158 |   type: "ReLU"
159 |   bottom: "conv3_1"
160 |   top: "conv3_1"
161 | }
162 | layer {
163 |   name: "conv3_2"
164 |   type: "Convolution"
165 |   bottom: "conv3_1"
166 |   top: "conv3_2"
167 |   param {
168 |     lr_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |   }
173 |   convolution_param {
174 |     num_output: 256
175 |     pad: 1
176 |     kernel_size: 3
177 |   }
178 | }
179 | layer {
180 |   name: "relu3_2"
181 |   type: "ReLU"
182 |   bottom: "conv3_2"
183 |   top: "conv3_2"
184 | }
185 | layer {
186 |   name: "conv3_3"
187 |   type: "Convolution"
188 |   bottom: "conv3_2"
189 |   top: "conv3_3"
190 |   param {
191 |     lr_mult: 1
192 |   }
193 |   param {
194 |     lr_mult: 2
195 |   }
196 |   convolution_param {
197 |     num_output: 256
198 |     pad: 1
199 |     kernel_size: 3
200 |   }
201 | }
202 | layer {
203 |   name: "relu3_3"
204 |   type: "ReLU"
205 |   bottom: "conv3_3"
206 |   top: "conv3_3"
207 | }
208 | layer {
209 |   name: "pool3"
210 |   type: "Pooling"
211 |   bottom: "conv3_3"
212 |   top: "pool3"
213 |   pooling_param {
214 |     pool: MAX
215 |     kernel_size: 2
216 |     stride: 2
217 |   }
218 | }
219 | layer {
220 |   name: "conv4_1"
221 |   type: "Convolution"
222 |   bottom: "pool3"
223 |   top: "conv4_1"
224 |   param {
225 |     lr_mult: 1
226 |   }
227 |   param {
228 |     lr_mult: 2
229 |   }
230 |   convolution_param {
231 |     num_output: 512
232 |     pad: 1
233 |     kernel_size: 3
234 |   }
235 | }
236 | layer {
237 |   name: "relu4_1"
238 |   type: "ReLU"
239 |   bottom: "conv4_1"
240 |   top: "conv4_1"
241 | }
242 | layer {
243 |   name: "conv4_2"
244 |   type: "Convolution"
245 |   bottom: "conv4_1"
246 |   top: "conv4_2"
247 |   param {
248 |     lr_mult: 1
249 |   }
250 |   param {
251 |     lr_mult: 2
252 |   }
253 |   convolution_param {
254 |     num_output: 512
255 |     pad: 1
256 |     kernel_size: 3
257 |   }
258 | }
259 | layer {
260 |   name: "relu4_2"
261 |   type: "ReLU"
262 |   bottom: "conv4_2"
263 |   top: "conv4_2"
264 | }
265 | layer {
266 |   name: "conv4_3"
267 |   type: "Convolution"
268 |   bottom: "conv4_2"
269 |   top: "conv4_3"
270 |   param {
271 |     lr_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |   }
276 |   convolution_param {
277 |     num_output: 512
278 |     pad: 1
279 |     kernel_size: 3
280 |   }
281 | }
282 | layer {
283 |   name: "relu4_3"
284 |   type: "ReLU"
285 |   bottom: "conv4_3"
286 |   top: "conv4_3"
287 | }
288 | layer {
289 |   name: "pool4"
290 |   type: "Pooling"
291 |   bottom: "conv4_3"
292 |   top: "pool4"
293 |   pooling_param {
294 |     pool: MAX
295 |     kernel_size: 2
296 |     stride: 2
297 |   }
298 | }
299 | layer {
300 |   name: "conv5_1"
301 |   type: "Convolution"
302 |   bottom: "pool4"
303 |   top: "conv5_1"
304 |   param {
305 |     lr_mult: 1
306 |   }
307 |   param {
308 |     lr_mult: 2
309 |   }
310 |   convolution_param {
311 |     num_output: 512
312 |     pad: 1
313 |     kernel_size: 3
314 |   }
315 | }
316 | layer {
317 |   name: "relu5_1"
318 |   type: "ReLU"
319 |   bottom: "conv5_1"
320 |   top: "conv5_1"
321 | }
322 | layer {
323 |   name: "conv5_2"
324 |   type: "Convolution"
325 |   bottom: "conv5_1"
326 |   top: "conv5_2"
327 |   param {
328 |     lr_mult: 1
329 |   }
330 |   param {
331 |     lr_mult: 2
332 |   }
333 |   convolution_param {
334 |     num_output: 512
335 |     pad: 1
336 |     kernel_size: 3
337 |   }
338 | }
339 | layer {
340 |   name: "relu5_2"
341 |   type: "ReLU"
342 |   bottom: "conv5_2"
343 |   top: "conv5_2"
344 | }
345 | layer {
346 |   name: "conv5_3"
347 |   type: "Convolution"
348 |   bottom: "conv5_2"
349 |   top: "conv5_3"
350 |   param {
351 |     lr_mult: 1
352 |   }
353 |   param {
354 |     lr_mult: 2
355 |   }
356 |   convolution_param {
357 |     num_output: 512
358 |     pad: 1
359 |     kernel_size: 3
360 |   }
361 | }
362 | layer {
363 |   name: "relu5_3"
364 |   type: "ReLU"
365 |   bottom: "conv5_3"
366 |   top: "conv5_3"
367 | }
368 | 
369 | #========= RPN ============
370 | 
371 | # prepare lstm inputs
372 | layer {
373 |   name: "im2col"
374 |   bottom: "conv5_3"
375 |   top: "im2col"
376 |   type: "Im2col"
377 |   convolution_param {
378 |     pad: 1
379 |     kernel_size: 3
380 |   }
381 | }
382 | layer {
383 |   name: "im2col_transpose"
384 |   top: "im2col_transpose"
385 |   bottom: "im2col"
386 |   type: "Transpose"
387 |   transpose_param {
388 |     dim: 3
389 |     dim: 2
390 |     dim: 0
391 |     dim: 1
392 |   }
393 | }
394 | layer {
395 |   name: "lstm_input"
396 |   type: "Reshape"
397 |   bottom: "im2col_transpose"
398 |   top: "lstm_input"
399 |   reshape_param {
400 |     shape { dim: -1 }
401 |     axis: 1
402 |     num_axes: 2
403 |   }
404 | }
405 | 
406 | layer {
407 |   name: "lstm"
408 |   type: "Lstm"
409 |   bottom: "lstm_input"
410 |   top: "lstm"
411 |   lstm_param {
412 |       num_output: 128
413 |       weight_filler {
414 |         type: "gaussian"
415 |         std: 0.01
416 |       }
417 |       bias_filler {
418 |         type: "constant"
419 |       }
420 |       clipping_threshold: 1
421 |     }
422 | }
423 | 
424 | 
425 | # ===================== rlstm ===================
426 | layer {
427 |   name: "lstm-reverse1"
428 |   type: "Reverse"
429 |   bottom: "lstm_input"
430 |   top: "rlstm_input"
431 |   reverse_param {
432 |     axis: 0
433 |   }
434 | }
435 | layer {
436 |   name: "rlstm"
437 |   type: "Lstm"
438 |   bottom: "rlstm_input"
439 |   top: "rlstm-output"
440 |   lstm_param {
441 |     num_output: 128
442 |    }
443 | }
444 | layer {
445 |   name: "lstm-reverse2"
446 |   type: "Reverse"
447 |   bottom: "rlstm-output"
448 |   top: "rlstm"
449 |   reverse_param {
450 |     axis: 0
451 |   }
452 | }
453 | 
454 | 
455 | # merge lstm and rlstm
456 | layer {
457 |   name: "merge_lstm_rlstm"
458 |   type: "Concat"
459 |   bottom: "lstm"
460 |   bottom: "rlstm"
461 |   top: "merge_lstm_rlstm"
462 |   concat_param {
463 |     axis: 2
464 |   }
465 | }
466 | layer {
467 |   name: "lstm_output_reshape"
468 |   type: "Reshape"
469 |   bottom: "merge_lstm_rlstm"
470 |   top: "lstm_output_reshape"
471 |   reshape_param {
472 |     shape { dim: -1 dim: 1 }
473 |     axis: 1
474 |     num_axes: 1
475 |   }
476 | }
477 | # transpose size of output as (N, C, H, W)
478 | layer {
479 |   name: "lstm_output"
480 |   type: "Transpose"
481 |   bottom: "lstm_output_reshape"
482 |   top: "lstm_output"
483 |   transpose_param {
484 |     dim: 2
485 |     dim: 3
486 |     dim: 1
487 |     dim: 0
488 |   }
489 | }
490 | layer {
491 |   name: "fc"
492 |   bottom: "lstm_output"
493 |   top: "fc"
494 |   type: "Convolution"
495 |   convolution_param {
496 |       num_output: 512
497 |       kernel_size: 1
498 |     }
499 | }
500 | layer {
501 |   name: "relu_fc"
502 |   type: "ReLU"
503 |   bottom: "fc"
504 |   top: "fc"
505 | }
506 | layer {
507 |   name: "rpn_cls_score"
508 |   type: "Convolution"
509 |   bottom: "fc"
510 |   top: "rpn_cls_score"
511 |   param { lr_mult: 1.0 }
512 |   param { lr_mult: 2.0 }
513 |   convolution_param {
514 |     num_output: 20
515 |     kernel_size: 1 pad: 0 stride: 1
516 |   }
517 | }
518 | layer {
519 |    bottom: "rpn_cls_score"
520 |    top: "rpn_cls_score_reshape"
521 |    name: "rpn_cls_score_reshape"
522 |    type: "Reshape"
523 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
524 | }
525 | layer {
526 |   name: "rpn_bbox_pred"
527 |   type: "Convolution"
528 |   bottom: "fc"
529 |   top: "rpn_bbox_pred"
530 |   param { lr_mult: 1.0 }
531 |   param { lr_mult: 2.0 }
532 |   convolution_param {
533 |     num_output: 20
534 |     kernel_size: 1 pad: 0 stride: 1
535 |   }
536 | }
537 | layer {
538 |   name: "rpn_cls_prob"
539 |   type: "Softmax"
540 |   bottom: "rpn_cls_score_reshape"
541 |   top: "rpn_cls_prob"
542 | }
543 | 
544 | layer {
545 |   name: 'rpn_cls_prob_reshape'
546 |   type: 'Reshape'
547 |   bottom: 'rpn_cls_prob'
548 |   top: 'rpn_cls_prob_reshape'
549 |   reshape_param { shape { dim: 0 dim: 20 dim: -1 dim: 0 } }
550 | }
551 | 
552 | layer {
553 |   name: 'proposal'
554 |   type: 'Python'
555 |   bottom: 'rpn_cls_prob_reshape'
556 |   bottom: 'rpn_bbox_pred'
557 |   bottom: 'im_info'
558 |   top: 'rois'
559 |   top: 'scores'
560 |   python_param {
561 |     module: 'layers.text_proposal_layer'
562 |     layer: 'ProposalLayer'
563 |     param_str: "'feat_stride': 16"
564 |   }
565 | }
566 | 


--------------------------------------------------------------------------------