├── 01.jpg ├── 02.jpg ├── 03.jpg ├── 06.jpg ├── CTPN ├── .gitignore ├── Makefile ├── models │ └── deploy.prototxt ├── src │ ├── anchor.py │ ├── detectors.py │ ├── layers │ │ ├── __init__.py │ │ └── text_proposal_layer.py │ ├── other.py │ ├── text_proposal_connector.py │ ├── text_proposal_graph_builder.py │ └── utils │ │ ├── __init__.py │ │ ├── cpu_nms.pyx │ │ └── timer.py ├── tools │ ├── cfg.py │ └── demo.py └── 缂栬瘧杩愯璇存槑.txt ├── README.md ├── crnn ├── dataset.py ├── dataset.pyc ├── keys.py ├── keys.pyc ├── models │ ├── __init__.py │ ├── __init__.pyc │ ├── crnn.py │ ├── crnn.pyc │ ├── utils.py │ └── utils.pyc ├── test.py ├── util.py └── util.pyc ├── crnnport.py ├── crnnport.pyc ├── ctpnport.py ├── ctpnport.pyc ├── demo.py └── img ├── 1755.jpg ├── 21.bmp ├── 6408.bmp └── t37.jpg /01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/01.jpg -------------------------------------------------------------------------------- /02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/02.jpg -------------------------------------------------------------------------------- /03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/03.jpg -------------------------------------------------------------------------------- /06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/06.jpg -------------------------------------------------------------------------------- /CTPN/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.caffemodel 3 | .idea 4 | *.so 5 | results 6 | *.xml 7 | 8 | -------------------------------------------------------------------------------- /CTPN/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cython src/utils/cpu_nms.pyx 3 | gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing \ 4 | -I/usr/include/python2.7 -o src/utils/cpu_nms.so src/utils/cpu_nms.c 5 | rm -rf src/utils/cpu_nms.c 6 | -------------------------------------------------------------------------------- /CTPN/models/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | 3 | input: 'data' 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 600 8 | dim: 900 9 | } 10 | 11 | input: 'im_info' 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | layer { 18 | name: "conv1_1" 19 | type: "Convolution" 20 | bottom: "data" 21 | top: "conv1_1" 22 | param { 23 | lr_mult: 0 24 | decay_mult: 0 25 | } 26 | param { 27 | lr_mult: 0 28 | decay_mult: 0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | name: "relu1_1" 38 | type: "ReLU" 39 | bottom: "conv1_1" 40 | top: "conv1_1" 41 | } 42 | layer { 43 | name: "conv1_2" 44 | type: "Convolution" 45 | bottom: "conv1_1" 46 | top: "conv1_2" 47 | param { 48 | lr_mult: 0 49 | decay_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | pad: 1 58 | kernel_size: 3 59 | } 60 | } 61 | layer { 62 | name: "relu1_2" 63 | type: "ReLU" 64 | bottom: "conv1_2" 65 | top: "conv1_2" 66 | } 67 | layer { 68 | name: "pool1" 69 | type: "Pooling" 70 | bottom: "conv1_2" 71 | top: "pool1" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv2_1" 80 | type: "Convolution" 81 | bottom: "pool1" 82 | top: "conv2_1" 83 | param { 84 | lr_mult: 0 85 | decay_mult: 0 86 | } 87 | param { 88 | lr_mult: 0 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 128 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layer { 98 | name: "relu2_1" 99 | type: "ReLU" 100 | bottom: "conv2_1" 101 | top: "conv2_1" 102 | } 103 | layer { 104 | name: "conv2_2" 105 | type: "Convolution" 106 | bottom: "conv2_1" 107 | top: "conv2_2" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | } 147 | param { 148 | lr_mult: 2 149 | } 150 | convolution_param { 151 | num_output: 256 152 | pad: 1 153 | kernel_size: 3 154 | } 155 | } 156 | layer { 157 | name: "relu3_1" 158 | type: "ReLU" 159 | bottom: "conv3_1" 160 | top: "conv3_1" 161 | } 162 | layer { 163 | name: "conv3_2" 164 | type: "Convolution" 165 | bottom: "conv3_1" 166 | top: "conv3_2" 167 | param { 168 | lr_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | } 173 | convolution_param { 174 | num_output: 256 175 | pad: 1 176 | kernel_size: 3 177 | } 178 | } 179 | layer { 180 | name: "relu3_2" 181 | type: "ReLU" 182 | bottom: "conv3_2" 183 | top: "conv3_2" 184 | } 185 | layer { 186 | name: "conv3_3" 187 | type: "Convolution" 188 | bottom: "conv3_2" 189 | top: "conv3_3" 190 | param { 191 | lr_mult: 1 192 | } 193 | param { 194 | lr_mult: 2 195 | } 196 | convolution_param { 197 | num_output: 256 198 | pad: 1 199 | kernel_size: 3 200 | } 201 | } 202 | layer { 203 | name: "relu3_3" 204 | type: "ReLU" 205 | bottom: "conv3_3" 206 | top: "conv3_3" 207 | } 208 | layer { 209 | name: "pool3" 210 | type: "Pooling" 211 | bottom: "conv3_3" 212 | top: "pool3" 213 | pooling_param { 214 | pool: MAX 215 | kernel_size: 2 216 | stride: 2 217 | } 218 | } 219 | layer { 220 | name: "conv4_1" 221 | type: "Convolution" 222 | bottom: "pool3" 223 | top: "conv4_1" 224 | param { 225 | lr_mult: 1 226 | } 227 | param { 228 | lr_mult: 2 229 | } 230 | convolution_param { 231 | num_output: 512 232 | pad: 1 233 | kernel_size: 3 234 | } 235 | } 236 | layer { 237 | name: "relu4_1" 238 | type: "ReLU" 239 | bottom: "conv4_1" 240 | top: "conv4_1" 241 | } 242 | layer { 243 | name: "conv4_2" 244 | type: "Convolution" 245 | bottom: "conv4_1" 246 | top: "conv4_2" 247 | param { 248 | lr_mult: 1 249 | } 250 | param { 251 | lr_mult: 2 252 | } 253 | convolution_param { 254 | num_output: 512 255 | pad: 1 256 | kernel_size: 3 257 | } 258 | } 259 | layer { 260 | name: "relu4_2" 261 | type: "ReLU" 262 | bottom: "conv4_2" 263 | top: "conv4_2" 264 | } 265 | layer { 266 | name: "conv4_3" 267 | type: "Convolution" 268 | bottom: "conv4_2" 269 | top: "conv4_3" 270 | param { 271 | lr_mult: 1 272 | } 273 | param { 274 | lr_mult: 2 275 | } 276 | convolution_param { 277 | num_output: 512 278 | pad: 1 279 | kernel_size: 3 280 | } 281 | } 282 | layer { 283 | name: "relu4_3" 284 | type: "ReLU" 285 | bottom: "conv4_3" 286 | top: "conv4_3" 287 | } 288 | layer { 289 | name: "pool4" 290 | type: "Pooling" 291 | bottom: "conv4_3" 292 | top: "pool4" 293 | pooling_param { 294 | pool: MAX 295 | kernel_size: 2 296 | stride: 2 297 | } 298 | } 299 | layer { 300 | name: "conv5_1" 301 | type: "Convolution" 302 | bottom: "pool4" 303 | top: "conv5_1" 304 | param { 305 | lr_mult: 1 306 | } 307 | param { 308 | lr_mult: 2 309 | } 310 | convolution_param { 311 | num_output: 512 312 | pad: 1 313 | kernel_size: 3 314 | } 315 | } 316 | layer { 317 | name: "relu5_1" 318 | type: "ReLU" 319 | bottom: "conv5_1" 320 | top: "conv5_1" 321 | } 322 | layer { 323 | name: "conv5_2" 324 | type: "Convolution" 325 | bottom: "conv5_1" 326 | top: "conv5_2" 327 | param { 328 | lr_mult: 1 329 | } 330 | param { 331 | lr_mult: 2 332 | } 333 | convolution_param { 334 | num_output: 512 335 | pad: 1 336 | kernel_size: 3 337 | } 338 | } 339 | layer { 340 | name: "relu5_2" 341 | type: "ReLU" 342 | bottom: "conv5_2" 343 | top: "conv5_2" 344 | } 345 | layer { 346 | name: "conv5_3" 347 | type: "Convolution" 348 | bottom: "conv5_2" 349 | top: "conv5_3" 350 | param { 351 | lr_mult: 1 352 | } 353 | param { 354 | lr_mult: 2 355 | } 356 | convolution_param { 357 | num_output: 512 358 | pad: 1 359 | kernel_size: 3 360 | } 361 | } 362 | layer { 363 | name: "relu5_3" 364 | type: "ReLU" 365 | bottom: "conv5_3" 366 | top: "conv5_3" 367 | } 368 | 369 | #========= RPN ============ 370 | 371 | # prepare lstm inputs 372 | layer { 373 | name: "im2col" 374 | bottom: "conv5_3" 375 | top: "im2col" 376 | type: "Im2col" 377 | convolution_param { 378 | pad: 1 379 | kernel_size: 3 380 | } 381 | } 382 | layer { 383 | name: "im2col_transpose" 384 | top: "im2col_transpose" 385 | bottom: "im2col" 386 | type: "Transpose" 387 | transpose_param { 388 | dim: 3 389 | dim: 2 390 | dim: 0 391 | dim: 1 392 | } 393 | } 394 | layer { 395 | name: "lstm_input" 396 | type: "Reshape" 397 | bottom: "im2col_transpose" 398 | top: "lstm_input" 399 | reshape_param { 400 | shape { dim: -1 } 401 | axis: 1 402 | num_axes: 2 403 | } 404 | } 405 | 406 | layer { 407 | name: "lstm" 408 | type: "Lstm" 409 | bottom: "lstm_input" 410 | top: "lstm" 411 | lstm_param { 412 | num_output: 128 413 | weight_filler { 414 | type: "gaussian" 415 | std: 0.01 416 | } 417 | bias_filler { 418 | type: "constant" 419 | } 420 | clipping_threshold: 1 421 | } 422 | } 423 | 424 | 425 | # ===================== rlstm =================== 426 | layer { 427 | name: "lstm-reverse1" 428 | type: "Reverse" 429 | bottom: "lstm_input" 430 | top: "rlstm_input" 431 | reverse_param { 432 | axis: 0 433 | } 434 | } 435 | layer { 436 | name: "rlstm" 437 | type: "Lstm" 438 | bottom: "rlstm_input" 439 | top: "rlstm-output" 440 | lstm_param { 441 | num_output: 128 442 | } 443 | } 444 | layer { 445 | name: "lstm-reverse2" 446 | type: "Reverse" 447 | bottom: "rlstm-output" 448 | top: "rlstm" 449 | reverse_param { 450 | axis: 0 451 | } 452 | } 453 | 454 | 455 | # merge lstm and rlstm 456 | layer { 457 | name: "merge_lstm_rlstm" 458 | type: "Concat" 459 | bottom: "lstm" 460 | bottom: "rlstm" 461 | top: "merge_lstm_rlstm" 462 | concat_param { 463 | axis: 2 464 | } 465 | } 466 | layer { 467 | name: "lstm_output_reshape" 468 | type: "Reshape" 469 | bottom: "merge_lstm_rlstm" 470 | top: "lstm_output_reshape" 471 | reshape_param { 472 | shape { dim: -1 dim: 1 } 473 | axis: 1 474 | num_axes: 1 475 | } 476 | } 477 | # transpose size of output as (N, C, H, W) 478 | layer { 479 | name: "lstm_output" 480 | type: "Transpose" 481 | bottom: "lstm_output_reshape" 482 | top: "lstm_output" 483 | transpose_param { 484 | dim: 2 485 | dim: 3 486 | dim: 1 487 | dim: 0 488 | } 489 | } 490 | layer { 491 | name: "fc" 492 | bottom: "lstm_output" 493 | top: "fc" 494 | type: "Convolution" 495 | convolution_param { 496 | num_output: 512 497 | kernel_size: 1 498 | } 499 | } 500 | layer { 501 | name: "relu_fc" 502 | type: "ReLU" 503 | bottom: "fc" 504 | top: "fc" 505 | } 506 | layer { 507 | name: "rpn_cls_score" 508 | type: "Convolution" 509 | bottom: "fc" 510 | top: "rpn_cls_score" 511 | param { lr_mult: 1.0 } 512 | param { lr_mult: 2.0 } 513 | convolution_param { 514 | num_output: 20 515 | kernel_size: 1 pad: 0 stride: 1 516 | } 517 | } 518 | layer { 519 | bottom: "rpn_cls_score" 520 | top: "rpn_cls_score_reshape" 521 | name: "rpn_cls_score_reshape" 522 | type: "Reshape" 523 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 524 | } 525 | layer { 526 | name: "rpn_bbox_pred" 527 | type: "Convolution" 528 | bottom: "fc" 529 | top: "rpn_bbox_pred" 530 | param { lr_mult: 1.0 } 531 | param { lr_mult: 2.0 } 532 | convolution_param { 533 | num_output: 20 534 | kernel_size: 1 pad: 0 stride: 1 535 | } 536 | } 537 | layer { 538 | name: "rpn_cls_prob" 539 | type: "Softmax" 540 | bottom: "rpn_cls_score_reshape" 541 | top: "rpn_cls_prob" 542 | } 543 | 544 | layer { 545 | name: 'rpn_cls_prob_reshape' 546 | type: 'Reshape' 547 | bottom: 'rpn_cls_prob' 548 | top: 'rpn_cls_prob_reshape' 549 | reshape_param { shape { dim: 0 dim: 20 dim: -1 dim: 0 } } 550 | } 551 | 552 | layer { 553 | name: 'proposal' 554 | type: 'Python' 555 | bottom: 'rpn_cls_prob_reshape' 556 | bottom: 'rpn_bbox_pred' 557 | bottom: 'im_info' 558 | top: 'rois' 559 | top: 'scores' 560 | python_param { 561 | module: 'layers.text_proposal_layer' 562 | layer: 'ProposalLayer' 563 | param_str: "'feat_stride': 16" 564 | } 565 | } 566 | -------------------------------------------------------------------------------- /CTPN/src/anchor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class AnchorText: 5 | def __init__(self): 6 | self.anchor_num=10 7 | 8 | def generate_basic_anchors(self, sizes, base_size=16): 9 | """ 10 | :param sizes: [(h1, w1), (h2, w2)...] 11 | :param base_size 12 | :return: 13 | """ 14 | assert(self.anchor_num==len(sizes)) 15 | base_anchor=np.array([0, 0, base_size-1, base_size-1], np.int32) 16 | anchors=np.zeros((len(sizes), 4), np.int32) 17 | index=0 18 | for h, w in sizes: 19 | anchors[index]=self.scale_anchor(base_anchor, h, w) 20 | index+=1 21 | return anchors 22 | 23 | def scale_anchor(self, anchor, h, w): 24 | x_ctr=(anchor[0]+anchor[2])*0.5 25 | y_ctr=(anchor[1]+anchor[3])*0.5 26 | scaled_anchor=anchor.copy() 27 | scaled_anchor[0]=x_ctr-w/2 28 | scaled_anchor[2]=x_ctr+w/2 29 | scaled_anchor[1]=y_ctr-h/2 30 | scaled_anchor[3]=y_ctr+h/2 31 | return scaled_anchor 32 | 33 | def apply_deltas_to_anchors(self, boxes_delta, anchors): 34 | """ 35 | :return [l t r b] 36 | """ 37 | anchor_y_ctr=(anchors[:, 1]+anchors[:, 3])/2. 38 | anchor_h=anchors[:, 3]-anchors[:, 1]+1. 39 | global_coords=np.zeros_like(boxes_delta, np.float32) 40 | global_coords[:, 1]=np.exp(boxes_delta[:, 1])*anchor_h 41 | global_coords[:, 0]=boxes_delta[:, 0]*anchor_h+anchor_y_ctr-global_coords[:, 1]/2. 42 | return np.hstack((anchors[:, [0]], global_coords[:, [0]], anchors[:, [2]], 43 | global_coords[:, [0]]+global_coords[:, [1]])).astype(np.float32) 44 | 45 | def basic_anchors(self): 46 | """ 47 | anchor [l t r b] 48 | """ 49 | heights=[11, 16, 23, 33, 48, 68, 97, 139, 198, 283] 50 | widths=[16] 51 | sizes=[] 52 | for h in heights: 53 | for w in widths: 54 | sizes.append((h, w)) 55 | return self.generate_basic_anchors(sizes) 56 | 57 | def locate_anchors(self, feat_map_size, feat_stride): 58 | """ 59 | return all anchors on the feature map 60 | """ 61 | basic_anchors_=self.basic_anchors() 62 | anchors=np.zeros((basic_anchors_.shape[0]*feat_map_size[0]*feat_map_size[1], 4), np.int32) 63 | index=0 64 | for y_ in range(feat_map_size[0]): 65 | for x_ in range(feat_map_size[1]): 66 | shift=np.array([x_, y_, x_, y_])*feat_stride 67 | anchors[index:index+basic_anchors_.shape[0], :]=basic_anchors_+shift 68 | index+=basic_anchors_.shape[0] 69 | return anchors 70 | -------------------------------------------------------------------------------- /CTPN/src/detectors.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | from other import prepare_img, normalize 3 | import numpy as np 4 | from utils.cpu_nms import cpu_nms as nms 5 | from text_proposal_connector import TextProposalConnector 6 | 7 | 8 | class TextProposalDetector: 9 | """ 10 | Detect text proposals in an image 11 | """ 12 | def __init__(self, caffe_model): 13 | self.caffe_model=caffe_model 14 | 15 | def detect(self, im, mean): 16 | im_data=prepare_img(im, mean) 17 | _=self.caffe_model.forward2({ 18 | "data": im_data[np.newaxis, :], 19 | "im_info": np.array([[im_data.shape[1], im_data.shape[2]]], np.float32) 20 | }) 21 | rois=self.caffe_model.blob("rois") 22 | scores=self.caffe_model.blob("scores") 23 | return rois, scores 24 | 25 | 26 | class TextDetector: 27 | """ 28 | Detect text from an image 29 | """ 30 | def __init__(self, text_proposal_detector): 31 | self.text_proposal_detector=text_proposal_detector 32 | self.text_proposal_connector=TextProposalConnector() 33 | 34 | def detect(self, im): 35 | """ 36 | Detecting texts from an image 37 | :return: the bounding boxes of the detected texts 38 | """ 39 | text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) 40 | keep_inds=np.where(scores>cfg.TEXT_PROPOSALS_MIN_SCORE)[0] 41 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 42 | 43 | sorted_indices=np.argsort(scores.ravel())[::-1] 44 | text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices] 45 | 46 | # nms for text proposals 47 | keep_inds=nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH) 48 | text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] 49 | 50 | scores=normalize(scores) 51 | 52 | text_lines=self.text_proposal_connector.get_text_lines(text_proposals, scores, im.shape[:2]) 53 | 54 | keep_inds=self.filter_boxes(text_lines) 55 | text_lines=text_lines[keep_inds] 56 | 57 | if text_lines.shape[0]!=0: 58 | keep_inds=nms(text_lines, cfg.TEXT_LINE_NMS_THRESH) 59 | text_lines=text_lines[keep_inds] 60 | 61 | return text_lines 62 | 63 | def filter_boxes(self, boxes): 64 | heights=boxes[:, 3]-boxes[:, 1]+1 65 | widths=boxes[:, 2]-boxes[:, 0]+1 66 | scores=boxes[:, -1] 67 | return np.where((widths/heights>cfg.MIN_RATIO) & (scores>cfg.LINE_MIN_SCORE) & 68 | (widths>(cfg.TEXT_PROPOSALS_WIDTH*cfg.MIN_NUM_PROPOSALS)))[0] 69 | -------------------------------------------------------------------------------- /CTPN/src/layers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tianzhi' 2 | -------------------------------------------------------------------------------- /CTPN/src/layers/text_proposal_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import yaml, caffe 3 | from other import clip_boxes 4 | from anchor import AnchorText 5 | 6 | 7 | class ProposalLayer(caffe.Layer): 8 | def setup(self, bottom, top): 9 | # parse the layer parameter string, which must be valid YAML 10 | layer_params = yaml.load(self.param_str_) 11 | 12 | self._feat_stride = layer_params['feat_stride'] 13 | self.anchor_generator=AnchorText() 14 | self._num_anchors = self.anchor_generator.anchor_num 15 | 16 | top[0].reshape(1, 4) 17 | top[1].reshape(1, 1, 1, 1) 18 | 19 | def forward(self, bottom, top): 20 | assert bottom[0].data.shape[0]==1, \ 21 | 'Only single item batches are supported' 22 | 23 | scores = bottom[0].data[:, self._num_anchors:, :, :] 24 | 25 | bbox_deltas = bottom[1].data 26 | im_info = bottom[2].data[0, :] 27 | height, width = scores.shape[-2:] 28 | 29 | anchors=self.anchor_generator.locate_anchors((height, width), self._feat_stride) 30 | 31 | scores=scores.transpose((0, 2, 3, 1)).reshape(-1, 1) 32 | bbox_deltas=bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 2)) 33 | 34 | proposals=self.anchor_generator.apply_deltas_to_anchors(bbox_deltas, anchors) 35 | 36 | # clip the proposals in excess of the boundaries of the image 37 | proposals=clip_boxes(proposals, im_info[:2]) 38 | 39 | blob=proposals.astype(np.float32, copy=False) 40 | top[0].reshape(*(blob.shape)) 41 | top[0].data[...]=blob 42 | 43 | top[1].reshape(*(scores.shape)) 44 | top[1].data[...]=scores 45 | 46 | def backward(self, top, propagate_down, bottom): 47 | pass 48 | 49 | def reshape(self, bottom, top): 50 | pass 51 | -------------------------------------------------------------------------------- /CTPN/src/other.py: -------------------------------------------------------------------------------- 1 | import cv2, caffe 2 | import numpy as np 3 | from matplotlib import cm 4 | 5 | 6 | def prepare_img(im, mean): 7 | """ 8 | transform img into caffe's input img. 9 | """ 10 | im_data=np.transpose(im-mean, (2, 0, 1)) 11 | return im_data 12 | 13 | 14 | def draw_boxes(im, bboxes, is_display=True, color=None, caption="Image", wait=True): 15 | """ 16 | boxes: bounding boxes 17 | """ 18 | text_recs=np.zeros((len(bboxes), 8), np.int) 19 | 20 | im=im.copy() 21 | index = 0 22 | for box in bboxes: 23 | if color==None: 24 | if len(box)==8 or len(box)==9: 25 | c=tuple(cm.jet([box[-1]])[0, 2::-1]*255) 26 | else: 27 | c=tuple(np.random.randint(0, 256, 3)) 28 | else: 29 | c=color 30 | 31 | b1 = box[6] - box[7] / 2 32 | b2 = box[6] + box[7] / 2 33 | x1 = box[0] 34 | y1 = box[5] * box[0] + b1 35 | x2 = box[2] 36 | y2 = box[5] * box[2] + b1 37 | x3 = box[0] 38 | y3 = box[5] * box[0] + b2 39 | x4 = box[2] 40 | y4 = box[5] * box[2] + b2 41 | 42 | disX = x2 - x1 43 | disY = y2 - y1 44 | width = np.sqrt(disX*disX + disY*disY) 45 | fTmp0 = y3 - y1 46 | fTmp1 = fTmp0 * disY / width 47 | x = np.fabs(fTmp1*disX / width) 48 | y = np.fabs(fTmp1*disY / width) 49 | if box[5] < 0: 50 | x1 -= x 51 | y1 += y 52 | x4 += x 53 | y4 -= y 54 | else: 55 | x2 += x 56 | y2 += y 57 | x3 -= x 58 | y3 -= y 59 | cv2.line(im,(int(x1),int(y1)),(int(x2),int(y2)),c,2) 60 | cv2.line(im,(int(x1),int(y1)),(int(x3),int(y3)),c,2) 61 | cv2.line(im,(int(x4),int(y4)),(int(x2),int(y2)),c,2) 62 | cv2.line(im,(int(x3),int(y3)),(int(x4),int(y4)),c,2) 63 | text_recs[index, 0] = x1 64 | text_recs[index, 1] = y1 65 | text_recs[index, 2] = x2 66 | text_recs[index, 3] = y2 67 | text_recs[index, 4] = x3 68 | text_recs[index, 5] = y3 69 | text_recs[index, 6] = x4 70 | text_recs[index, 7] = y4 71 | index = index + 1 72 | #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2) 73 | if is_display: 74 | cv2.imshow('result', im) 75 | #if wait: 76 | #cv2.waitKey(0) 77 | return text_recs 78 | 79 | 80 | def threshold(coords, min_, max_): 81 | return np.maximum(np.minimum(coords, max_), min_) 82 | 83 | 84 | def clip_boxes(boxes, im_shape): 85 | """ 86 | Clip boxes to image boundaries. 87 | """ 88 | boxes[:, 0::2]=threshold(boxes[:, 0::2], 0, im_shape[1]-1) 89 | boxes[:, 1::2]=threshold(boxes[:, 1::2], 0, im_shape[0]-1) 90 | return boxes 91 | 92 | 93 | def normalize(data): 94 | if data.shape[0]==0: 95 | return data 96 | max_=data.max() 97 | min_=data.min() 98 | return (data-min_)/(max_-min_) if max_-min_!=0 else data-min_ 99 | 100 | 101 | def resize_im(im, scale, max_scale=None): 102 | f=float(scale)/min(im.shape[0], im.shape[1]) 103 | if max_scale!=None and f*max(im.shape[0], im.shape[1])>max_scale: 104 | f=float(max_scale)/max(im.shape[0], im.shape[1]) 105 | return cv2.resize(im, (0, 0), fx=f, fy=f), f 106 | 107 | 108 | class Graph: 109 | def __init__(self, graph): 110 | self.graph=graph 111 | 112 | def sub_graphs_connected(self): 113 | sub_graphs=[] 114 | for index in xrange(self.graph.shape[0]): 115 | if not self.graph[:, index].any() and self.graph[index, :].any(): 116 | v=index 117 | sub_graphs.append([v]) 118 | while self.graph[v, :].any(): 119 | v=np.where(self.graph[v, :])[0][0] 120 | sub_graphs[-1].append(v) 121 | return sub_graphs 122 | 123 | 124 | class CaffeModel: 125 | def __init__(self, net_def_file, model_file): 126 | self.net_def_file=net_def_file 127 | self.net=caffe.Net(net_def_file, model_file, caffe.TEST) 128 | 129 | def blob(self, key): 130 | return self.net.blobs[key].data.copy() 131 | 132 | def forward(self, input_data): 133 | return self.forward2({"data": input_data[np.newaxis, :]}) 134 | 135 | def forward2(self, input_data): 136 | for k, v in input_data.items(): 137 | self.net.blobs[k].reshape(*v.shape) 138 | self.net.blobs[k].data[...]=v 139 | return self.net.forward() 140 | 141 | def net_def_file(self): 142 | return self.net_def_file 143 | -------------------------------------------------------------------------------- /CTPN/src/text_proposal_connector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from other import clip_boxes 3 | from text_proposal_graph_builder import TextProposalGraphBuilder 4 | 5 | class TextProposalConnector: 6 | """ 7 | Connect text proposals into text lines 8 | """ 9 | def __init__(self): 10 | self.graph_builder=TextProposalGraphBuilder() 11 | 12 | def group_text_proposals(self, text_proposals, scores, im_size): 13 | graph=self.graph_builder.build_graph(text_proposals, scores, im_size) 14 | return graph.sub_graphs_connected() 15 | 16 | def fit_y(self, X, Y, x1, x2): 17 | len(X)!=0 18 | # if X only include one point, the function will get line y=Y[0] 19 | if np.sum(X==X[0])==len(X): 20 | return Y[0], Y[0] 21 | p=np.poly1d(np.polyfit(X, Y, 1)) 22 | return p(x1), p(x2) 23 | 24 | def get_text_lines(self, text_proposals, scores, im_size): 25 | # tp=text proposal 26 | tp_groups=self.group_text_proposals(text_proposals, scores, im_size) 27 | text_lines=np.zeros((len(tp_groups), 8), np.float32) 28 | 29 | for index, tp_indices in enumerate(tp_groups): 30 | text_line_boxes=text_proposals[list(tp_indices)] 31 | num = np.size(text_line_boxes) 32 | X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2 33 | Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2 34 | z1 = np.polyfit(X,Y,1) 35 | p1 = np.poly1d(z1) 36 | 37 | 38 | x0=np.min(text_line_boxes[:, 0]) 39 | x1=np.max(text_line_boxes[:, 2]) 40 | 41 | offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5 42 | 43 | lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) 44 | lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) 45 | 46 | # the score of a text line is the average score of the scores 47 | # of all text proposals contained in the text line 48 | score=scores[list(tp_indices)].sum()/float(len(tp_indices)) 49 | 50 | text_lines[index, 0]=x0 51 | text_lines[index, 1]=min(lt_y, rt_y) 52 | text_lines[index, 2]=x1 53 | text_lines[index, 3]=max(lb_y, rb_y) 54 | text_lines[index, 4]=score 55 | text_lines[index, 5]=z1[0] 56 | text_lines[index, 6]=z1[1] 57 | height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) ) 58 | text_lines[index, 7]= height + 2.5 59 | #text_lines=clip_boxes(text_lines, im_size) 60 | 61 | 62 | return text_lines 63 | -------------------------------------------------------------------------------- /CTPN/src/text_proposal_graph_builder.py: -------------------------------------------------------------------------------- 1 | from cfg import Config as cfg 2 | import numpy as np 3 | from other import Graph 4 | 5 | 6 | class TextProposalGraphBuilder: 7 | """ 8 | Build Text proposals into a graph. 9 | """ 10 | def get_successions(self, index): 11 | box=self.text_proposals[index] 12 | results=[] 13 | for left in range(int(box[0])+1, min(int(box[0])+cfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])): 14 | adj_box_indices=self.boxes_table[left] 15 | for adj_box_index in adj_box_indices: 16 | if self.meet_v_iou(adj_box_index, index): 17 | results.append(adj_box_index) 18 | if len(results)!=0: 19 | return results 20 | return results 21 | 22 | def get_precursors(self, index): 23 | box=self.text_proposals[index] 24 | results=[] 25 | for left in range(int(box[0])-1, max(int(box[0]-cfg.MAX_HORIZONTAL_GAP), 0)-1, -1): 26 | adj_box_indices=self.boxes_table[left] 27 | for adj_box_index in adj_box_indices: 28 | if self.meet_v_iou(adj_box_index, index): 29 | results.append(adj_box_index) 30 | if len(results)!=0: 31 | return results 32 | return results 33 | 34 | def is_succession_node(self, index, succession_index): 35 | precursors=self.get_precursors(succession_index) 36 | if self.scores[index]>=np.max(self.scores[precursors]): 37 | return True 38 | return False 39 | 40 | def meet_v_iou(self, index1, index2): 41 | def overlaps_v(index1, index2): 42 | h1=self.heights[index1] 43 | h2=self.heights[index2] 44 | y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1]) 45 | y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3]) 46 | return max(0, y1-y0+1)/min(h1, h2) 47 | 48 | def size_similarity(index1, index2): 49 | h1=self.heights[index1] 50 | h2=self.heights[index2] 51 | return min(h1, h2)/max(h1, h2) 52 | 53 | return overlaps_v(index1, index2)>=cfg.MIN_V_OVERLAPS and \ 54 | size_similarity(index1, index2)>=cfg.MIN_SIZE_SIM 55 | 56 | def build_graph(self, text_proposals, scores, im_size): 57 | self.text_proposals=text_proposals 58 | self.scores=scores 59 | self.im_size=im_size 60 | self.heights=text_proposals[:, 3]-text_proposals[:, 1]+1 61 | 62 | boxes_table=[[] for _ in range(self.im_size[1])] 63 | for index, box in enumerate(text_proposals): 64 | boxes_table[int(box[0])].append(index) 65 | self.boxes_table=boxes_table 66 | 67 | graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool) 68 | 69 | for index, box in enumerate(text_proposals): 70 | successions=self.get_successions(index) 71 | if len(successions)==0: 72 | continue 73 | succession_index=successions[np.argmax(scores[successions])] 74 | if self.is_succession_node(index, succession_index): 75 | # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) 76 | # have equal scores. 77 | graph[index, succession_index]=True 78 | return Graph(graph) 79 | -------------------------------------------------------------------------------- /CTPN/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'zhitian' 2 | -------------------------------------------------------------------------------- /CTPN/src/utils/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /CTPN/src/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /CTPN/tools/cfg.py: -------------------------------------------------------------------------------- 1 | # MUST be imported firstly 2 | import sys 3 | import numpy as np 4 | 5 | class Config: 6 | MEAN=np.float32([102.9801, 115.9465, 122.7717]) 7 | TEST_GPU_ID=0 8 | SCALE=600 9 | MAX_SCALE=1000 10 | 11 | LINE_MIN_SCORE=0.7 12 | TEXT_PROPOSALS_MIN_SCORE=0.7 13 | TEXT_PROPOSALS_NMS_THRESH=0.3 14 | MAX_HORIZONTAL_GAP=50 15 | TEXT_LINE_NMS_THRESH=0.3 16 | MIN_NUM_PROPOSALS=2 17 | MIN_RATIO=1.2 18 | MIN_V_OVERLAPS=0.7 19 | MIN_SIZE_SIM=0.7 20 | TEXT_PROPOSALS_WIDTH=16 21 | 22 | def init(): 23 | sys.path.insert(0, "./tools") 24 | sys.path.insert(0, "./caffe/python") 25 | sys.path.insert(0, "./src") 26 | init() 27 | -------------------------------------------------------------------------------- /CTPN/tools/demo.py: -------------------------------------------------------------------------------- 1 | # 2 | # The codes are used for implementing CTPN for scene text detection, described in: 3 | # 4 | # Z. Tian, W. Huang, T. He, P. He and Y. Qiao: Detecting Text in Natural Image with 5 | # Connectionist Text Proposal Network, ECCV, 2016. 6 | # 7 | # Online demo is available at: textdet.com 8 | # 9 | # These demo codes (with our trained model) are for text-line detection (without 10 | # side-refiement part). 11 | # 12 | # 13 | # ====== Copyright by Zhi Tian, Weilin Huang, Tong He, Pan He and Yu Qiao========== 14 | 15 | # Email: zhi.tian@siat.ac.cn; wl.huang@siat.ac.cn 16 | # 17 | # Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences 18 | # 19 | # 20 | 21 | from cfg import Config as cfg 22 | from other import draw_boxes, resize_im, CaffeModel 23 | import cv2, os, caffe, sys 24 | from detectors import TextProposalDetector, TextDetector 25 | import os.path as osp 26 | from utils.timer import Timer 27 | 28 | DEMO_IMAGE_DIR = "demo_images/" 29 | NET_DEF_FILE = "models/deploy.prototxt" 30 | MODEL_FILE = "models/ctpn_trained_model.caffemodel" 31 | 32 | if len(sys.argv) > 1 and sys.argv[1] == "--no-gpu": 33 | caffe.set_mode_cpu() 34 | else: 35 | caffe.set_mode_gpu() 36 | caffe.set_device(cfg.TEST_GPU_ID) 37 | 38 | # initialize the detectors 39 | text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 40 | 41 | text_detector = TextDetector(text_proposals_detector) 42 | 43 | path = os.path.abspath(os.curdir) 44 | timer=Timer() 45 | print "\ninput exit break\n" 46 | while 1 : 47 | im_name = raw_input("\nplease input file name:") 48 | if im_name == "exit": 49 | break 50 | im_path = path + "/demo_images/" + im_name 51 | 52 | im = cv2.imread(im_path) 53 | if im is None: 54 | continue 55 | 56 | im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 57 | cv2.imshow("src", im) 58 | tmp = im.copy() 59 | timer.tic() 60 | text_lines=text_detector.detect(im) 61 | 62 | print "Number of the detected text lines: %s"%len(text_lines) 63 | print "Time: %f"%timer.toc() 64 | 65 | im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True) 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /CTPN/缂栬瘧杩愯璇存槑.txt: -------------------------------------------------------------------------------- 1 | 1:caffe通用依赖安装 http://blog.csdn.net/u013832707/article/details/53159071 要使用gpu必须使用cuda及cudnn(cudnn对应版本7.0 v4.0) 2 | 3 | cython安装 sudo apt-get install cython 4 | 2:进入code/ctpn/caffe 5 | make -j4 4根据自己的内存填写 6 | <1>:若出现hdf5相关错误 不同系统hdf5位置不一样,修改路径,如我的 7 | # Whatever else you find you need goes here. 8 | INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial 9 | LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/ /usr/lib/ x86_64-linux-gnu/hdf5/serial 10 | 11 | make pycaffe 12 | 13 | 回到ctpn目录 make 注意目录不能有中文 14 | 3: 进入code/ctpn 执行 python ./tools/demo.py --no-gpu (cpu版本) 15 | python ./tools/demo.py (gpu版本) 16 | 可能出现的问题 17 | <1>cv2 module找不到 18 | 解决方法: sudo apt-get install libopencv-dev python-opencv 19 | <2>No module named skimage.io 20 | 解决方法: pip install scikit-image 21 | <3>No module named google.protobuf.internal 22 | 解决方法: sudo pip install --upgrade protobuf 23 | <4>No module named yaml 24 | 解决方法:sudo apt-get install python-yaml 25 | <5>libcudnn.so.4 is not a symbolic link 26 | export PATH=/usr/local/cuda/bin:$PATH'>> ~/.bashrc 27 | echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc 28 | 4:输入demo_img文件夹下对应的图片文件名 对该图片处理 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | sceneReco 2 | =================================== 3 | ctpn+crnn Scene character recognition 4 | 5 | # ctpn: 6 | 7 | https://github.com/tianzhi0549/CTPN 8 | 9 | # crnn: 10 | 11 | https://github.com/bgshih/crnn 12 | 13 | # model: 14 | 15 | https://pan.baidu.com/s/19outaxa-vRjPkWaDuTNUPg 16 | 17 | copy ctpn_trained_model.caffemodel to ./CTPN.models 18 | 19 | https://pan.baidu.com/s/1Tap1VuKRT98ThZo8pzyztw 20 |  https://pan.baidu.com/s/1b5CuFCjXe2gAIuK9Yu4L2g 21 | 22 | copy netCRNN63.pth to ./crnn/samples 23 | 24 | # Run demo 25 | 26 | python demo.py 27 | 28 | 29 | # Example image: 30 | 31 | ![Example Image](./01.jpg) 32 | ![Example Image](./03.jpg) 33 | -------------------------------------------------------------------------------- /crnn/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import random 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torch.utils.data import sampler 8 | import torchvision.transforms as transforms 9 | import lmdb 10 | import six 11 | import sys 12 | from PIL import Image 13 | import numpy as np 14 | 15 | 16 | class lmdbDataset(Dataset): 17 | 18 | def __init__(self, root=None, transform=None, target_transform=None): 19 | self.env = lmdb.open( 20 | root, 21 | max_readers=1, 22 | readonly=True, 23 | lock=False, 24 | readahead=False, 25 | meminit=False) 26 | 27 | if not self.env: 28 | print('cannot creat lmdb from %s' % (root)) 29 | sys.exit(0) 30 | 31 | with self.env.begin(write=False) as txn: 32 | nSamples = int(txn.get('num-samples')) 33 | self.nSamples = nSamples 34 | 35 | self.transform = transform 36 | self.target_transform = target_transform 37 | 38 | def __len__(self): 39 | return self.nSamples 40 | 41 | def __getitem__(self, index): 42 | assert index <= len(self), 'index range error' 43 | index += 1 44 | with self.env.begin(write=False) as txn: 45 | img_key = 'image-%09d' % index 46 | imgbuf = txn.get(img_key) 47 | 48 | buf = six.BytesIO() 49 | buf.write(imgbuf) 50 | buf.seek(0) 51 | try: 52 | img = Image.open(buf).convert('L') 53 | except IOError: 54 | print('Corrupted image for %d' % index) 55 | return self[index + 1] 56 | 57 | if self.transform is not None: 58 | img = self.transform(img) 59 | 60 | label_key = 'label-%09d' % index 61 | label = str(txn.get(label_key)) 62 | if self.target_transform is not None: 63 | label = self.target_transform(label) 64 | 65 | return (img, label) 66 | 67 | 68 | class resizeNormalize(object): 69 | 70 | def __init__(self, size, interpolation=Image.BILINEAR): 71 | self.size = size 72 | self.interpolation = interpolation 73 | self.toTensor = transforms.ToTensor() 74 | 75 | def __call__(self, img): 76 | img = img.resize(self.size, self.interpolation) 77 | img = self.toTensor(img) 78 | img.sub_(0.5).div_(0.5) 79 | return img 80 | 81 | 82 | class randomSequentialSampler(sampler.Sampler): 83 | 84 | def __init__(self, data_source, batch_size): 85 | self.num_samples = len(data_source) 86 | self.batch_size = batch_size 87 | 88 | def __iter__(self): 89 | n_batch = len(self) // self.batch_size 90 | tail = len(self) % self.batch_size 91 | index = torch.LongTensor(len(self)).fill_(0) 92 | for i in range(n_batch): 93 | random_start = random.randint(0, len(self) - self.batch_size) 94 | batch_index = random_start + torch.range(0, self.batch_size - 1) 95 | index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index 96 | # deal with tail 97 | if tail: 98 | random_start = random.randint(0, len(self) - self.batch_size) 99 | tail_index = random_start + torch.range(0, tail - 1) 100 | index[(i + 1) * self.batch_size:] = tail_index 101 | 102 | return iter(index) 103 | 104 | def __len__(self): 105 | return self.num_samples 106 | 107 | 108 | class alignCollate(object): 109 | 110 | def __init__(self, imgH=32, imgW=128, keep_ratio=False, min_ratio=1): 111 | self.imgH = imgH 112 | self.imgW = imgW 113 | self.keep_ratio = keep_ratio 114 | self.min_ratio = min_ratio 115 | 116 | def __call__(self, batch): 117 | images, labels = zip(*batch) 118 | 119 | imgH = self.imgH 120 | imgW = self.imgW 121 | if self.keep_ratio: 122 | ratios = [] 123 | for image in images: 124 | w, h = image.size 125 | ratios.append(w / float(h)) 126 | ratios.sort() 127 | max_ratio = ratios[-1] 128 | imgW = int(np.floor(max_ratio * imgH)) 129 | imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW 130 | 131 | transform = resizeNormalize((imgW, imgH)) 132 | images = [transform(image) for image in images] 133 | images = torch.cat([t.unsqueeze(0) for t in images], 0) 134 | 135 | return images, labels 136 | -------------------------------------------------------------------------------- /crnn/dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/dataset.pyc -------------------------------------------------------------------------------- /crnn/keys.py: -------------------------------------------------------------------------------- 1 | #coding:UTF-8 2 | alphabet = u'\'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗' 3 | -------------------------------------------------------------------------------- /crnn/keys.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/keys.pyc -------------------------------------------------------------------------------- /crnn/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/models/__init__.py -------------------------------------------------------------------------------- /crnn/models/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/models/__init__.pyc -------------------------------------------------------------------------------- /crnn/models/crnn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import utils 3 | 4 | 5 | class BidirectionalLSTM(nn.Module): 6 | 7 | def __init__(self, nIn, nHidden, nOut, ngpu): 8 | super(BidirectionalLSTM, self).__init__() 9 | self.ngpu = ngpu 10 | 11 | self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) 12 | self.embedding = nn.Linear(nHidden * 2, nOut) 13 | 14 | def forward(self, input): 15 | recurrent, _ = utils.data_parallel( 16 | self.rnn, input, self.ngpu) # [T, b, h * 2] 17 | 18 | T, b, h = recurrent.size() 19 | t_rec = recurrent.view(T * b, h) 20 | output = utils.data_parallel( 21 | self.embedding, t_rec, self.ngpu) # [T * b, nOut] 22 | output = output.view(T, b, -1) 23 | 24 | return output 25 | 26 | 27 | class CRNN(nn.Module): 28 | 29 | def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False): 30 | super(CRNN, self).__init__() 31 | self.ngpu = ngpu 32 | assert imgH % 16 == 0, 'imgH has to be a multiple of 16' 33 | 34 | ks = [3, 3, 3, 3, 3, 3, 2] 35 | ps = [1, 1, 1, 1, 1, 1, 0] 36 | ss = [1, 1, 1, 1, 1, 1, 1] 37 | nm = [64, 128, 256, 256, 512, 512, 512] 38 | 39 | cnn = nn.Sequential() 40 | 41 | def convRelu(i, batchNormalization=False): 42 | nIn = nc if i == 0 else nm[i - 1] 43 | nOut = nm[i] 44 | cnn.add_module('conv{0}'.format(i), 45 | nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) 46 | if batchNormalization: 47 | cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) 48 | if leakyRelu: 49 | cnn.add_module('relu{0}'.format(i), 50 | nn.LeakyReLU(0.2, inplace=True)) 51 | else: 52 | cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) 53 | 54 | convRelu(0) 55 | cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 56 | convRelu(1) 57 | cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 58 | convRelu(2, True) 59 | convRelu(3) 60 | cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d((2, 2), 61 | (2, 1), 62 | (0, 1))) # 256x4x16 63 | convRelu(4, True) 64 | convRelu(5) 65 | cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d((2, 2), 66 | (2, 1), 67 | (0, 1))) # 512x2x16 68 | convRelu(6, True) # 512x1x16 69 | 70 | self.cnn = cnn 71 | self.rnn = nn.Sequential( 72 | BidirectionalLSTM(512, nh, nh, ngpu), 73 | BidirectionalLSTM(nh, nh, nclass, ngpu) 74 | ) 75 | 76 | def forward(self, input): 77 | # conv features 78 | conv = utils.data_parallel(self.cnn, input, self.ngpu) 79 | b, c, h, w = conv.size() 80 | assert h == 1, "the height of conv must be 1" 81 | conv = conv.squeeze(2) 82 | conv = conv.permute(2, 0, 1) # [w, b, c] 83 | 84 | # rnn features 85 | output = utils.data_parallel(self.rnn, conv, self.ngpu) 86 | 87 | return output 88 | -------------------------------------------------------------------------------- /crnn/models/crnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/models/crnn.pyc -------------------------------------------------------------------------------- /crnn/models/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import torch.nn as nn 5 | import torch.nn.parallel 6 | 7 | 8 | def data_parallel(model, input, ngpu): 9 | if isinstance(input.data, torch.cuda.FloatTensor) and ngpu > 1: 10 | output = nn.parallel.data_parallel(model, input, range(ngpu)) 11 | else: 12 | output = model(input) 13 | return output 14 | -------------------------------------------------------------------------------- /crnn/models/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/models/utils.pyc -------------------------------------------------------------------------------- /crnn/test.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import random 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | import torch.optim as optim 7 | import torch.utils.data 8 | from torch.autograd import Variable 9 | import numpy as np 10 | import os 11 | import util 12 | import dataset 13 | from PIL import Image 14 | import models.crnn as crnn 15 | import keys 16 | alphabet = keys.alphabet 17 | print(len(alphabet)) 18 | raw_input('\ninput:') 19 | converter = util.strLabelConverter(alphabet) 20 | model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda() 21 | path = './samples/netCRNN63.pth' 22 | model.load_state_dict(torch.load(path)) 23 | print(model) 24 | 25 | 26 | while 1: 27 | im_name = raw_input("\nplease input file name:") 28 | im_path = "./img/" + im_name 29 | image = Image.open(im_path).convert('L') 30 | scale = image.size[1]*1.0 / 32 31 | w = image.size[0] / scale 32 | w = int(w) 33 | print(w) 34 | 35 | transformer = dataset.resizeNormalize((w, 32)) 36 | image = transformer(image).cuda() 37 | image = image.view(1, *image.size()) 38 | image = Variable(image) 39 | model.eval() 40 | preds = model(image) 41 | _, preds = preds.max(2) 42 | preds = preds.squeeze(2) 43 | preds = preds.transpose(1, 0).contiguous().view(-1) 44 | preds_size = Variable(torch.IntTensor([preds.size(0)])) 45 | raw_pred = converter.decode(preds.data, preds_size.data, raw=True) 46 | sim_pred = converter.decode(preds.data, preds_size.data, raw=False) 47 | print('%-20s => %-20s' % (raw_pred, sim_pred)) 48 | 49 | -------------------------------------------------------------------------------- /crnn/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | import collections 7 | 8 | 9 | class strLabelConverter(object): 10 | 11 | def __init__(self, alphabet): 12 | self.alphabet = alphabet + '-' # for `-1` index 13 | self.dict = {} 14 | for i, char in enumerate(alphabet): 15 | # NOTE: 0 is reserved for 'blank' required by wrap_ctc 16 | self.dict[char] = i + 1 17 | def encode(self, text, depth=0): 18 | """Support batch or single str.""" 19 | length = [] 20 | result=[] 21 | for str in text: 22 | str = unicode(str,"utf8") 23 | length.append(len(str)) 24 | for char in str: 25 | #print(char) 26 | index = self.dict[char] 27 | result.append(index) 28 | text = result 29 | return (torch.IntTensor(text), torch.IntTensor(length)) 30 | 31 | def decode(self, t, length, raw=False): 32 | if length.numel() == 1: 33 | length = length[0] 34 | t = t[:length] 35 | if raw: 36 | return ''.join([self.alphabet[i - 1] for i in t]) 37 | else: 38 | char_list = [] 39 | for i in range(length): 40 | if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): 41 | char_list.append(self.alphabet[t[i] - 1]) 42 | return ''.join(char_list) 43 | else: 44 | texts = [] 45 | index = 0 46 | for i in range(length.numel()): 47 | l = length[i] 48 | texts.append(self.decode( 49 | t[index:index + l], torch.IntTensor([l]), raw=raw)) 50 | index += l 51 | return texts 52 | 53 | 54 | class averager(object): 55 | 56 | def __init__(self): 57 | self.reset() 58 | 59 | def add(self, v): 60 | self.n_count += v.data.numel() 61 | # NOTE: not `+= v.sum()`, which will add a node in the compute graph, 62 | # which lead to memory leak 63 | self.sum += v.data.sum() 64 | 65 | def reset(self): 66 | self.n_count = 0 67 | self.sum = 0 68 | 69 | def val(self): 70 | res = 0 71 | if self.n_count != 0: 72 | res = self.sum / float(self.n_count) 73 | return res 74 | 75 | 76 | def oneHot(v, v_length, nc): 77 | batchSize = v_length.size(0) 78 | maxLength = v_length.max() 79 | v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0) 80 | acc = 0 81 | for i in range(batchSize): 82 | length = v_length[i] 83 | label = v[acc:acc + length].view(-1, 1).long() 84 | v_onehot[i, :length].scatter_(1, label, 1.0) 85 | acc += length 86 | return v_onehot 87 | 88 | 89 | def loadData(v, data): 90 | v.data.resize_(data.size()).copy_(data) 91 | 92 | 93 | def prettyPrint(v): 94 | print('Size {0}, Type: {1}'.format(str(v.size()), v.data.type())) 95 | print('| Max: %f | Min: %f | Mean: %f' % (v.max().data[0], v.min().data[0], v.mean().data[0])) 96 | 97 | 98 | def assureRatio(img): 99 | """Ensure imgH <= imgW.""" 100 | b, c, h, w = img.size() 101 | if h > w: 102 | main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None) 103 | img = main(img) 104 | return img 105 | -------------------------------------------------------------------------------- /crnn/util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnn/util.pyc -------------------------------------------------------------------------------- /crnnport.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import sys 3 | sys.path.insert(1, "./crnn") 4 | 5 | import random 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | import torch.optim as optim 9 | import torch.utils.data 10 | from torch.autograd import Variable 11 | import numpy as np 12 | import os 13 | import util 14 | import dataset 15 | from PIL import Image 16 | import models.crnn as crnn 17 | import keys 18 | from math import * 19 | import mahotas 20 | import cv2 21 | 22 | def dumpRotateImage(img,degree,pt1,pt2,pt3,pt4): 23 | height,width=img.shape[:2] 24 | heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree)))) 25 | widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree)))) 26 | matRotation=cv2.getRotationMatrix2D((width/2,height/2),degree,1) 27 | matRotation[0, 2] += (widthNew - width) / 2 28 | matRotation[1, 2] += (heightNew - height) / 2 29 | imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) 30 | pt1 = list(pt1) 31 | pt3 = list(pt3) 32 | 33 | 34 | [[pt1[0]], [pt1[1]]] = np.dot(matRotation, np.array([[pt1[0]], [pt1[1]], [1]])) 35 | [[pt3[0]], [pt3[1]]] = np.dot(matRotation, np.array([[pt3[0]], [pt3[1]], [1]])) 36 | imgOut=imgRotation[int(pt1[1]):int(pt3[1]),int(pt1[0]):int(pt3[0])] 37 | height,width=imgOut.shape[:2] 38 | return imgOut 39 | 40 | def crnnSource(): 41 | alphabet = keys.alphabet 42 | converter = util.strLabelConverter(alphabet) 43 | model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda() 44 | path = './crnn/samples/netCRNN63.pth' 45 | model.load_state_dict(torch.load(path)) 46 | return model,converter 47 | 48 | def crnnRec(model,converter,im,text_recs): 49 | index = 0 50 | for rec in text_recs: 51 | pt1 = (rec[0],rec[1]) 52 | pt2 = (rec[2],rec[3]) 53 | pt3 = (rec[6],rec[7]) 54 | pt4 = (rec[4],rec[5]) 55 | partImg = dumpRotateImage(im,degrees(atan2(pt2[1]-pt1[1],pt2[0]-pt1[0])),pt1,pt2,pt3,pt4) 56 | #mahotas.imsave('%s.jpg'%index, partImg) 57 | 58 | 59 | image = Image.fromarray(partImg ).convert('L') 60 | #height,width,channel=partImg.shape[:3] 61 | #print(height,width,channel) 62 | #print(image.size) 63 | 64 | #image = Image.open('./img/t4.jpg').convert('L') 65 | scale = image.size[1]*1.0 / 32 66 | w = image.size[0] / scale 67 | w = int(w) 68 | #print(w) 69 | 70 | transformer = dataset.resizeNormalize((w, 32)) 71 | image = transformer(image).cuda() 72 | image = image.view(1, *image.size()) 73 | image = Variable(image) 74 | model.eval() 75 | preds = model(image) 76 | _, preds = preds.max(2) 77 | preds = preds.squeeze(2) 78 | preds = preds.transpose(1, 0).contiguous().view(-1) 79 | preds_size = Variable(torch.IntTensor([preds.size(0)])) 80 | raw_pred = converter.decode(preds.data, preds_size.data, raw=True) 81 | sim_pred = converter.decode(preds.data, preds_size.data, raw=False) 82 | #print('%-20s => %-20s' % (raw_pred, sim_pred)) 83 | print(index) 84 | print(sim_pred) 85 | index = index + 1 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /crnnport.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/crnnport.pyc -------------------------------------------------------------------------------- /ctpnport.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import numpy as np 4 | class cfg: 5 | MEAN=np.float32([102.9801, 115.9465, 122.7717]) 6 | TEST_GPU_ID=0 7 | SCALE=600 8 | MAX_SCALE=1000 9 | 10 | LINE_MIN_SCORE=0.7 11 | TEXT_PROPOSALS_MIN_SCORE=0.7 12 | TEXT_PROPOSALS_NMS_THRESH=0.3 13 | MAX_HORIZONTAL_GAP=50 14 | TEXT_LINE_NMS_THRESH=0.3 15 | MIN_NUM_PROPOSALS=2 16 | MIN_RATIO=1.2 17 | MIN_V_OVERLAPS=0.7 18 | MIN_SIZE_SIM=0.7 19 | TEXT_PROPOSALS_WIDTH=16 20 | 21 | def init(): 22 | sys.path.insert(0, "./CTPN/tools") 23 | sys.path.insert(0, "./CTPN/caffe/python") 24 | sys.path.insert(0, "./CTPN/src") 25 | init() 26 | 27 | from other import draw_boxes, resize_im, CaffeModel 28 | import cv2, os, caffe 29 | from detectors import TextProposalDetector, TextDetector 30 | import os.path as osp 31 | from utils.timer import Timer 32 | 33 | 34 | 35 | def ctpnSource(): 36 | DEMO_IMAGE_DIR = "img/" 37 | NET_DEF_FILE = "CTPN/models/deploy.prototxt" 38 | MODEL_FILE = "CTPN/models/ctpn_trained_model.caffemodel" 39 | caffe.set_mode_gpu() 40 | caffe.set_device(cfg.TEST_GPU_ID) 41 | # initialize the detectors 42 | text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) 43 | text_detector = TextDetector(text_proposals_detector) 44 | return text_detector 45 | 46 | def getCharBlock(text_detector,im): 47 | im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) 48 | cv2.imshow("src", im) 49 | tmp = im.copy() 50 | #timer=Timer() 51 | #timer.tic() 52 | text_lines=text_detector.detect(im) 53 | 54 | #print "Number of the detected text lines: %s"%len(text_lines) 55 | #print "Time: %f"%timer.toc() 56 | 57 | text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) 58 | return tmp,text_recs 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /ctpnport.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/ctpnport.pyc -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | 2 | from ctpnport import * 3 | from crnnport import * 4 | 5 | 6 | #ctpn 7 | text_detector = ctpnSource() 8 | #crnn 9 | model,converter = crnnSource() 10 | 11 | timer=Timer() 12 | print "\ninput exit break\n" 13 | while 1 : 14 | im_name = raw_input("\nplease input file name:") 15 | if im_name == "exit": 16 | break 17 | im_path = "./img/" + im_name 18 | im = cv2.imread(im_path) 19 | if im is None: 20 | continue 21 | timer.tic() 22 | img,text_recs = getCharBlock(text_detector,im) 23 | crnnRec(model,converter,img,text_recs) 24 | print "Time: %f"%timer.toc() 25 | cv2.waitKey(0) 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /img/1755.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/img/1755.jpg -------------------------------------------------------------------------------- /img/21.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/img/21.bmp -------------------------------------------------------------------------------- /img/6408.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/img/6408.bmp -------------------------------------------------------------------------------- /img/t37.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bear63/sceneReco/ae1731e4a344359e1ff5a147ec8d3834afb16ba1/img/t37.jpg --------------------------------------------------------------------------------