├── .gitignore
├── demo
    └── re1.jpg
├── location
    ├── README.md
    ├── output_crop
    │   ├── 07010974437.jpg
    │   ├── 18893039105.jpg
    │   ├── 38661425763.jpg
    │   ├── 50758360544.jpg
    │   ├── 55038743175.jpg
    │   ├── 070109744371.jpg
    │   ├── 070109744372.jpg
    │   ├── 070109744373.jpg
    │   ├── 188930391052.jpg
    │   ├── 188930391053.jpg
    │   ├── 293072937641.jpg
    │   ├── 309392130741.jpg
    │   └── 311990100321.jpg
    ├── test_imgs
    │   ├── 07010974437.jpg
    │   ├── 070109744371.jpg
    │   ├── 070109744372.jpg
    │   ├── 070109744373.jpg
    │   ├── 18893039105.jpg
    │   ├── 188930391051.jpg
    │   ├── 188930391052.jpg
    │   ├── 188930391053.jpg
    │   ├── 293072937641.jpg
    │   ├── 309392130741.jpg
    │   ├── 311990100321.jpg
    │   ├── 38661425763.jpg
    │   ├── 50758360544.jpg
    │   └── 55038743175.jpg
    ├── output
    │   ├── 070109744371_predict.jpg
    │   ├── 070109744372_predict.jpg
    │   ├── 070109744373_predict.jpg
    │   ├── 07010974437_predict.jpg
    │   ├── 188930391052_predict.jpg
    │   ├── 188930391053_predict.jpg
    │   ├── 18893039105_predict.jpg
    │   ├── 293072937641_predict.jpg
    │   ├── 309392130741_predict.jpg
    │   ├── 311990100321_predict.jpg
    │   ├── 38661425763_predict.jpg
    │   ├── 50758360544_predict.jpg
    │   └── 55038743175_predict.jpg
    ├── LICENSE
    ├── data_loader.py
    ├── cfg.py
    ├── train.py
    ├── losses.py
    ├── nms.py
    ├── network.py
    ├── predict.py
    ├── label.py
    └── preprocess.py
├── recognition
    ├── test
    │   ├── 12233418739.jpg
    │   ├── 22046298859.jpg
    │   ├── 28510715459.jpg
    │   ├── 37774346979.jpg
    │   ├── 41679405336.jpg
    │   ├── 84999825604.jpg
    │   ├── 97785067838.jpg
    │   └── 99851544924.jpg
    ├── result.txt
    ├── README.md
    ├── cfg.py
    ├── predict.py
    ├── train.py
    ├── data_loader.py
    └── network.py
├── cfg.py
├── LICENSE
├── README.md
├── loc_and_reg.py
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | data_util/
3 | .idea/
4 | 


--------------------------------------------------------------------------------
/demo/re1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/demo/re1.jpg


--------------------------------------------------------------------------------
/location/README.md:
--------------------------------------------------------------------------------
1 | This image's number detection network is based on [Advanced_EAST](https://github.com/huoyijie/AdvancedEAST)
2 | 


--------------------------------------------------------------------------------
/recognition/test/12233418739.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/12233418739.jpg


--------------------------------------------------------------------------------
/recognition/test/22046298859.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/22046298859.jpg


--------------------------------------------------------------------------------
/recognition/test/28510715459.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/28510715459.jpg


--------------------------------------------------------------------------------
/recognition/test/37774346979.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/37774346979.jpg


--------------------------------------------------------------------------------
/recognition/test/41679405336.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/41679405336.jpg


--------------------------------------------------------------------------------
/recognition/test/84999825604.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/84999825604.jpg


--------------------------------------------------------------------------------
/recognition/test/97785067838.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/97785067838.jpg


--------------------------------------------------------------------------------
/recognition/test/99851544924.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/recognition/test/99851544924.jpg


--------------------------------------------------------------------------------
/location/output_crop/07010974437.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/07010974437.jpg


--------------------------------------------------------------------------------
/location/output_crop/18893039105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/18893039105.jpg


--------------------------------------------------------------------------------
/location/output_crop/38661425763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/38661425763.jpg


--------------------------------------------------------------------------------
/location/output_crop/50758360544.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/50758360544.jpg


--------------------------------------------------------------------------------
/location/output_crop/55038743175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/55038743175.jpg


--------------------------------------------------------------------------------
/location/test_imgs/07010974437.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/07010974437.jpg


--------------------------------------------------------------------------------
/location/test_imgs/070109744371.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/070109744371.jpg


--------------------------------------------------------------------------------
/location/test_imgs/070109744372.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/070109744372.jpg


--------------------------------------------------------------------------------
/location/test_imgs/070109744373.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/070109744373.jpg


--------------------------------------------------------------------------------
/location/test_imgs/18893039105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/18893039105.jpg


--------------------------------------------------------------------------------
/location/test_imgs/188930391051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/188930391051.jpg


--------------------------------------------------------------------------------
/location/test_imgs/188930391052.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/188930391052.jpg


--------------------------------------------------------------------------------
/location/test_imgs/188930391053.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/188930391053.jpg


--------------------------------------------------------------------------------
/location/test_imgs/293072937641.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/293072937641.jpg


--------------------------------------------------------------------------------
/location/test_imgs/309392130741.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/309392130741.jpg


--------------------------------------------------------------------------------
/location/test_imgs/311990100321.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/311990100321.jpg


--------------------------------------------------------------------------------
/location/test_imgs/38661425763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/38661425763.jpg


--------------------------------------------------------------------------------
/location/test_imgs/50758360544.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/50758360544.jpg


--------------------------------------------------------------------------------
/location/test_imgs/55038743175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/test_imgs/55038743175.jpg


--------------------------------------------------------------------------------
/location/output_crop/070109744371.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/070109744371.jpg


--------------------------------------------------------------------------------
/location/output_crop/070109744372.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/070109744372.jpg


--------------------------------------------------------------------------------
/location/output_crop/070109744373.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/070109744373.jpg


--------------------------------------------------------------------------------
/location/output_crop/188930391052.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/188930391052.jpg


--------------------------------------------------------------------------------
/location/output_crop/188930391053.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/188930391053.jpg


--------------------------------------------------------------------------------
/location/output_crop/293072937641.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/293072937641.jpg


--------------------------------------------------------------------------------
/location/output_crop/309392130741.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/309392130741.jpg


--------------------------------------------------------------------------------
/location/output_crop/311990100321.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output_crop/311990100321.jpg


--------------------------------------------------------------------------------
/location/output/070109744371_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/070109744371_predict.jpg


--------------------------------------------------------------------------------
/location/output/070109744372_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/070109744372_predict.jpg


--------------------------------------------------------------------------------
/location/output/070109744373_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/070109744373_predict.jpg


--------------------------------------------------------------------------------
/location/output/07010974437_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/07010974437_predict.jpg


--------------------------------------------------------------------------------
/location/output/188930391052_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/188930391052_predict.jpg


--------------------------------------------------------------------------------
/location/output/188930391053_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/188930391053_predict.jpg


--------------------------------------------------------------------------------
/location/output/18893039105_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/18893039105_predict.jpg


--------------------------------------------------------------------------------
/location/output/293072937641_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/293072937641_predict.jpg


--------------------------------------------------------------------------------
/location/output/309392130741_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/309392130741_predict.jpg


--------------------------------------------------------------------------------
/location/output/311990100321_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/311990100321_predict.jpg


--------------------------------------------------------------------------------
/location/output/38661425763_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/38661425763_predict.jpg


--------------------------------------------------------------------------------
/location/output/50758360544_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/50758360544_predict.jpg


--------------------------------------------------------------------------------
/location/output/55038743175_predict.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bobo-y/number_detection_recognition/HEAD/location/output/55038743175_predict.jpg


--------------------------------------------------------------------------------
/recognition/result.txt:
--------------------------------------------------------------------------------
1 | 22046298859.jpg : 22046298859
2 | 97785067838.jpg : 97785067838
3 | 84999825604.jpg : 84999825604
4 | 99851544924.jpg : 99851544924
5 | 28510715459.jpg : 28510715459
6 | 12233418739.jpg : 12233418739
7 | 41679405336.jpg : 41679405336
8 | 37774346979.jpg : 37774346979
9 | 


--------------------------------------------------------------------------------
/cfg.py:
--------------------------------------------------------------------------------
 1 | image_size = 512
 2 | pixel_threshold = 0.9
 3 | side_vertex_pixel_threshold = 0.9
 4 | trunc_threshold = 0.1
 5 | pixel_size = 4
 6 | width = 200
 7 | height = 31
 8 | label_len = 11
 9 | characters = '0123456789' + '-'
10 | label_classes = len(characters)
11 | location_model = "loc.h5"
12 | recognition_model = "recog.h5"
13 | epsilon = 1e-4


--------------------------------------------------------------------------------
/recognition/README.md:
--------------------------------------------------------------------------------
 1 | Number recognition based on CRNN(cnn + bi_lstm)
 2 | 
 3 | train:
 4 | * prepare your dataset, format like the [MJSynth data](http://www.robots.ox.ac.uk/~vgg/data/text/mjsynth.tar.gz)
 5 | * modify config params in cfg.py, see default values.
 6 | * python train.py
 7 | 
 8 | predict:
 9 | * specify test images dir
10 | * python predict.py


--------------------------------------------------------------------------------
/recognition/cfg.py:
--------------------------------------------------------------------------------
 1 | width = 200
 2 | height = 31
 3 | label_len = 11    # we recognition phone-number, and We think the maximum length is 11, it can be changed at will
 4 | # according to the actual needs
 5 | characters = '0123456789' + '-'  # recognition character 0 to 9, '-' for blank(ctc loss)
 6 | label_classes = len(characters)  # Number of categories requiring character recognition
 7 | ocr_dataset_path = "dataset/imgs"
 8 | save_model_path = "saved_model/weights.h5"
 9 | log_dir = "logs"
10 | load_model = True
11 | load_model_path = "model/weights_base.h5"
12 | checkpoint_path = "save_model/val_model.h5"
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 lulu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/location/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright © 2018 huoyijie, https://github.com/huoyijie/AdvancedEAST
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/location/data_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from keras.preprocessing import image
 4 | from keras.applications.vgg16 import preprocess_input
 5 | 
 6 | import cfg
 7 | 
 8 | 
 9 | def gen(batch_size=cfg.batch_size, is_val=False):
10 |     img_h, img_w = cfg.max_train_img_size, cfg.max_train_img_size
11 |     x = np.zeros((batch_size, img_h, img_w, cfg.num_channels), dtype=np.float32)
12 |     pixel_num_h = img_h // cfg.pixel_size
13 |     pixel_num_w = img_w // cfg.pixel_size
14 |     y = np.zeros((batch_size, pixel_num_h, pixel_num_w, 7), dtype=np.float32)
15 |     if is_val:
16 |         with open(os.path.join(cfg.data_dir, cfg.val_fname), 'r') as f_val:
17 |             f_list = f_val.readlines()
18 |     else:
19 |         with open(os.path.join(cfg.data_dir, cfg.train_fname), 'r') as f_train:
20 |             f_list = f_train.readlines()
21 |     while True:
22 |         for i in range(batch_size):
23 |             # random gen an image name
24 |             random_img = np.random.choice(f_list)
25 |             img_filename = str(random_img).strip().split(',')[0]
26 |             # load img and img anno
27 |             img_path = os.path.join(cfg.data_dir,
28 |                                     cfg.train_image_dir_name,
29 |                                     img_filename)
30 |             img = image.load_img(img_path)
31 |             img = image.img_to_array(img)
32 |             x[i] = preprocess_input(img, mode='tf')
33 |             gt_file = os.path.join(cfg.data_dir,
34 |                                    cfg.train_label_dir_name,
35 |                                    img_filename[:-4] + '_gt.npy')
36 |             y[i] = np.load(gt_file)
37 |         yield x, y
38 | 


--------------------------------------------------------------------------------
/location/cfg.py:
--------------------------------------------------------------------------------
 1 | epoch_num = 24
 2 | lr = 1e-3
 3 | decay = 5e-4
 4 | patience = 5
 5 | load_weights = False
 6 | lambda_inside_score_loss = 4.0
 7 | lambda_side_vertex_code_loss = 1.0
 8 | lambda_side_vertex_coord_loss = 1.0
 9 | 
10 | total_img = 10000
11 | validation_split_ratio = 0.1
12 | image_size = 512  # (height == width, in [256, 384, 512, 640, 736])
13 | batch_size = 4
14 | steps_per_epoch = total_img * (1 - validation_split_ratio) // batch_size
15 | validation_steps = total_img * validation_split_ratio // batch_size
16 | 
17 | data_dir = 'icpr/'
18 | origin_image_dir_name = 'image_10000/'
19 | origin_txt_dir_name = 'txt_10000/'
20 | train_image_dir_name = 'images_%s/' % image_size
21 | train_label_dir_name = 'labels_%s/' % image_size
22 | show_gt_image_dir_name = 'show_gt_images_%s/' % image_size
23 | show_act_image_dir_name = 'show_act_images_%s/' % image_size
24 | gen_origin_img = True
25 | draw_gt_quad = True
26 | draw_act_quad = True
27 | val_fname = 'val_%s.txt' % image_size
28 | train_fname = 'train_%s.txt' % image_size
29 | # in paper it's 0.3, maybe to large to this problem
30 | shrink_ratio = 0.2
31 | # pixels between 0.2 and 0.6 are side pixels
32 | shrink_side_ratio = 0.6
33 | epsilon = 1e-4
34 | 
35 | num_channels = 3
36 | feature_layers_range = range(5, 1, -1)
37 | # feature_layers_range = range(3, 0, -1)
38 | feature_layers_num = len(feature_layers_range)
39 | # pixel_size = 4
40 | pixel_size = 2 ** feature_layers_range[-1]
41 | 
42 | model_weights_path = 'saved_model/weights_%s.h5' % image_size
43 | saved_model_file_path = 'saved_model/model_%s.h5' % image_size
44 | saved_model_weights_file_path = 'model/weights_base.h5'
45 | 
46 | pixel_threshold = 0.9
47 | side_vertex_pixel_threshold = 0.9
48 | trunc_threshold = 0.1
49 | predict_write2txt = False
50 | detection_box_crop = True
51 | 


--------------------------------------------------------------------------------
/recognition/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | import warnings
 5 | from network import CRNN
 6 | import cfg
 7 | import sys
 8 | import keras.backend as ktf
 9 | 
10 | 
11 | warnings.filterwarnings('ignore')
12 | _, model = CRNN(cfg.width, cfg.height, cfg.label_len, cfg.characters).network()
13 | model.load_weights(cfg.save_model_path)
14 | # model.summary()
15 | 
16 | 
17 | def predict(infer_model, img_path):
18 |     img = cv2.imread(img_path)
19 |     img_size = img.shape
20 |     if (img_size[1] / img_size[0] * 1.0) < 6:
21 |         img_reshape = cv2.resize(img, (int(31.0 / img_size[0] * img_size[1]), cfg.height))
22 | 
23 |         mat_ori = np.zeros((cfg.height, cfg.width - int(31.0 / img_size[0] * img_size[1]), 3), dtype=np.uint8)
24 |         out_img = np.concatenate([img_reshape, mat_ori], axis=1).transpose([1, 0, 2])
25 |     else:
26 |         out_img = cv2.resize(img, (cfg.width, cfg.height), interpolation=cv2.INTER_CUBIC)
27 |         out_img = np.asarray(out_img)
28 |         out_img = out_img.transpose([1, 0, 2])
29 | 
30 |     y_pred = infer_model.predict(np.expand_dims(out_img, axis=0))
31 |     shape = y_pred[:, 2:, :].shape
32 |     ctc_decode = ktf.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0]
33 |     out = ktf.get_value(ctc_decode)[:, :cfg.label_len]
34 |     result = ''.join([cfg.characters[k] for k in out[0]])
35 |     return result
36 | 
37 | 
38 | def main():
39 |     imgs_list = os.listdir('test')
40 |     result_txt = open('result.txt', 'a+')
41 |     for img_name in imgs_list:
42 |         result = predict(model, os.path.join('test', img_name))
43 |         result = img_name + " : " + result + "\n"
44 |         print(result)
45 |         result_txt.write(result)
46 |     result_txt.close()
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 
52 | 


--------------------------------------------------------------------------------
/location/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 3 | from keras.optimizers import Adam
 4 | import keras.backend as ktf
 5 | import tensorflow as tf
 6 | import cfg
 7 | from network_vgg import East
 8 | from losses import quad_loss
 9 | from data_loader import gen
10 | 
11 | 
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | tf_config = tf.ConfigProto()
14 | tf_config.gpu_options.per_process_gpu_memory_fraction = 0.5
15 | session = tf.Session(config=tf_config)
16 | ktf.set_session(session)
17 | 
18 | east = East()
19 | east_network = east.east_network()
20 | east_network.summary()
21 | east_network.compile(loss=quad_loss, optimizer=Adam(lr=cfg.lr,
22 |                                                     decay=cfg.decay))
23 | if cfg.load_weights and os.path.exists(cfg.saved_model_weights_file_path):
24 |     east_network.load_weights(cfg.saved_model_weights_file_path)
25 | 
26 | east_network.fit_generator(generator=gen(),
27 |                            steps_per_epoch=cfg.steps_per_epoch,
28 |                            epochs=cfg.epoch_num,
29 |                            validation_data=gen(is_val=True),
30 |                            validation_steps=cfg.validation_steps,
31 |                            verbose=1,
32 |                            initial_epoch=cfg.initial_epoch,
33 |                            callbacks=[EarlyStopping(patience=cfg.patience, verbose=1),
34 |                                       ModelCheckpoint(filepath=cfg.model_weights_path,
35 |                                                       save_best_only=True,
36 |                                                       save_weights_only=True,
37 |                                                       verbose=1)])
38 | east_network.save(cfg.saved_model_file_path)
39 | east_network.save_weights(cfg.saved_model_weights_file_path)
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | number detection and recognition based on [AdvancedEast](https://github.com/huoyijie/AdvancedEASTg) and [CRNN](https://arxiv.org/abs/1507.05717)
 2 | ____
 3 | Detection and Crop:
 4 | <img src="https://github.com/yl305237731/number_detection_recognition/blob/master/demo/re1.jpg">
 5 | 
 6 | ____
 7 | Recognition:
 8 | 
 9 | * 22046298859.jpg : 22046298859
10 | * 97785067838.jpg : 97785067838
11 | * 84999825604.jpg : 84999825604
12 | * 99851544924.jpg : 99851544924
13 | * 28510715459.jpg : 28510715459
14 | * 12233418739.jpg : 12233418739
15 | * 41679405336.jpg : 41679405336
16 | * 37774346979.jpg : 37774346979
17 | 
18 | ____
19 | limitations:
20 | *When the two models are test on their respective validation sets , they can reach an acc of about 0.9. However, the number of the training data for recognizer I generated is horizontal, and the number in the crop image after the detection result introduces the rotation and other factors, resulting in poor results when used in combination.*
21 | 
22 | 
23 | ----
24 | # Detection
25 | 
26 | ## training
27 | * prepare training data, data format refer to [ICPR](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.100066.0.0.3bcad780oQ9Ce4&raceId=231651)
28 | * modify params in cfg.py
29 | * run python preprocess.py to resize image and generator .npy training files
30 | * run python label.py
31 | * run python train.py, train the network
32 | ## testing
33 | * modify your images' dir in predict.py, and run python predict.py, then we will get three outputs: bounding box on origin 
34 | images, the cropped image, and coordinates(txt file).
35 | 
36 | *more details please refer to [AdvancedEast](https://github.com/huoyijie/AdvancedEASTg)*
37 | 
38 | -----
39 | # Recognition
40 | 
41 | ## training
42 | * prepare training data, data format refer to [MJSynth data](http://www.robots.ox.ac.uk/~vgg/data/text/mjsynth.tar.gz)
43 | * modify params in cfg.py
44 | * modify input_shape=(None, 50,7,512) in train.py line 55, the input_shape is refer to your bn_shape = bn4.get_shape() in network.py
45 | * run python train.py
46 | ## testing
47 | * modify your images' dir in predict.py, then run python predict.py
48 | ----


--------------------------------------------------------------------------------
/recognition/train.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import ModelCheckpoint, Callback
 2 | from keras.callbacks import TensorBoard
 3 | from data_loader import *
 4 | from network import *
 5 | import keras.backend as ktf
 6 | import cfg
 7 | import os
 8 | import tensorflow as tf
 9 | 
10 | 
11 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
12 | tf_config = tf.ConfigProto()
13 | tf_config.gpu_options.per_process_gpu_memory_fraction = 0.5
14 | session = tf.Session(config=tf_config)
15 | ktf.set_session(session)
16 | 
17 | 
18 | class Evaluate(Callback):
19 | 
20 |     def on_epoch_end(self, epoch, logs=None):
21 | 
22 |         def evaluate(input_model):
23 |             correct_prediction = 0
24 |             generator = img_gen_val_lexicon()
25 |             x_test, y_test = next(generator)
26 |             y_pred = input_model.predict(x_test)
27 |             shape = y_pred[:, 2:, :].shape
28 |             ctc_decode = ktf.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0]
29 |             out = ktf.get_value(ctc_decode)[:, :cfg.label_len]
30 | 
31 |             for m in range(1000):
32 |                 result_str = ''.join([cfg.characters[k] for k in out[m]])
33 |                 result_str = result_str.replace('-', '')
34 |                 if result_str == y_test[m]:
35 |                     correct_prediction += 1
36 |                 else:
37 |                     print(result_str, y_test[m])
38 | 
39 |             return correct_prediction * 1.0 / 10
40 |         acc = evaluate(infer_model)
41 |         print('')
42 |         print('acc:'+str(acc)+"%")
43 | 
44 | 
45 | evaluator = Evaluate()
46 | 
47 | 
48 | checkpoint = ModelCheckpoint(cfg.checkpoint_path, monitor='loss', verbose=1, save_best_only=True, mode='min')
49 | 
50 | 
51 | train_model, infer_model = CRNN(cfg.width, cfg.height, cfg.label_len, cfg.characters).network()
52 | if cfg.load_model:
53 |     train_model.load_weights(cfg.load_model_path, by_name=True, skip_mismatch=True)
54 | train_model.summary()
55 | train_model.fit_generator(img_gen_lexicon(input_shape=(None, 50, 7, 512)), steps_per_epoch=2000, epochs=50, verbose=1,
56 |                           callbacks=[evaluator,
57 |                                      checkpoint,
58 |                                      TensorBoard(log_dir=cfg.log_dir)]
59 |                           )
60 | infer_model.save(cfg.save_model_path)
61 | 
62 | 


--------------------------------------------------------------------------------
/location/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | import cfg
 4 | 
 5 | 
 6 | def quad_loss(y_true, y_pred):
 7 |     # loss for inside_score
 8 |     logits = y_pred[:, :, :, :1]
 9 |     labels = y_true[:, :, :, :1]
10 |     # balance positive and negative samples in an image
11 |     beta = 1 - tf.reduce_mean(labels)
12 |     # first apply sigmoid activation
13 |     predicts = tf.nn.sigmoid(logits)
14 |     # log +epsilon for stable cal
15 |     inside_score_loss = tf.reduce_mean(
16 |         -1 * (beta * labels * tf.log(predicts + cfg.epsilon) +
17 |               (1 - beta) * (1 - labels) * tf.log(1 - predicts + cfg.epsilon)))
18 |     inside_score_loss *= cfg.lambda_inside_score_loss
19 | 
20 |     # loss for side_vertex_code
21 |     vertex_logits = y_pred[:, :, :, 1:3]
22 |     vertex_labels = y_true[:, :, :, 1:3]
23 |     vertex_beta = 1 - (tf.reduce_mean(y_true[:, :, :, 1:2])
24 |                        / (tf.reduce_mean(labels) + cfg.epsilon))
25 |     vertex_predicts = tf.nn.sigmoid(vertex_logits)
26 |     pos = -1 * vertex_beta * vertex_labels * tf.log(vertex_predicts +
27 |                                                     cfg.epsilon)
28 |     neg = -1 * (1 - vertex_beta) * (1 - vertex_labels) * tf.log(
29 |         1 - vertex_predicts + cfg.epsilon)
30 |     positive_weights = tf.cast(tf.equal(y_true[:, :, :, 0], 1), tf.float32)
31 |     side_vertex_code_loss = \
32 |         tf.reduce_sum(tf.reduce_sum(pos + neg, axis=-1) * positive_weights) / (
33 |                 tf.reduce_sum(positive_weights) + cfg.epsilon)
34 |     side_vertex_code_loss *= cfg.lambda_side_vertex_code_loss
35 | 
36 |     # loss for side_vertex_coord delta
37 |     g_hat = y_pred[:, :, :, 3:]
38 |     g_true = y_true[:, :, :, 3:]
39 |     vertex_weights = tf.cast(tf.equal(y_true[:, :, :, 1], 1), tf.float32)
40 |     pixel_wise_smooth_l1norm = smooth_l1_loss(g_hat, g_true, vertex_weights)
41 |     side_vertex_coord_loss = tf.reduce_sum(pixel_wise_smooth_l1norm) / (
42 |             tf.reduce_sum(vertex_weights) + cfg.epsilon)
43 |     side_vertex_coord_loss *= cfg.lambda_side_vertex_coord_loss
44 |     return inside_score_loss + side_vertex_code_loss + side_vertex_coord_loss
45 | 
46 | 
47 | def smooth_l1_loss(prediction_tensor, target_tensor, weights):
48 |     n_q = tf.reshape(quad_norm(target_tensor), tf.shape(weights))
49 |     diff = prediction_tensor - target_tensor
50 |     abs_diff = tf.abs(diff)
51 |     abs_diff_lt_1 = tf.less(abs_diff, 1)
52 |     pixel_wise_smooth_l1norm = (tf.reduce_sum(
53 |         tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5),
54 |         axis=-1) / n_q) * weights
55 |     return pixel_wise_smooth_l1norm
56 | 
57 | 
58 | def quad_norm(g_true):
59 |     shape = tf.shape(g_true)
60 |     delta_xy_matrix = tf.reshape(g_true, [-1, 2, 2])
61 |     diff = delta_xy_matrix[:, 0:1, :] - delta_xy_matrix[:, 1:2, :]
62 |     square = tf.square(diff)
63 |     distance = tf.sqrt(tf.reduce_sum(square, axis=-1))
64 |     distance *= 4.0
65 |     distance += cfg.epsilon
66 |     return tf.reshape(distance, shape[:-1])
67 | 


--------------------------------------------------------------------------------
/location/nms.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import numpy as np
 3 | 
 4 | import cfg
 5 | 
 6 | 
 7 | def should_merge(region, i, j):
 8 |     neighbor = {(i, j - 1)}
 9 |     return not region.isdisjoint(neighbor)
10 | 
11 | 
12 | def region_neighbor(region_set):
13 |     region_pixels = np.array(list(region_set))
14 |     j_min = np.amin(region_pixels, axis=0)[1] - 1
15 |     j_max = np.amax(region_pixels, axis=0)[1] + 1
16 |     i_m = np.amin(region_pixels, axis=0)[0] + 1
17 |     region_pixels[:, 0] += 1
18 |     neighbor = {(region_pixels[n, 0], region_pixels[n, 1]) for n in
19 |                 range(len(region_pixels))}
20 |     neighbor.add((i_m, j_min))
21 |     neighbor.add((i_m, j_max))
22 |     return neighbor
23 | 
24 | 
25 | def region_group(region_list):
26 |     S = [i for i in range(len(region_list))]
27 |     D = []
28 |     while len(S) > 0:
29 |         m = S.pop(0)
30 |         if len(S) == 0:
31 |             # S has only one element, put it to D
32 |             D.append([m])
33 |         else:
34 |             D.append(rec_region_merge(region_list, m, S))
35 |     return D
36 | 
37 | 
38 | def rec_region_merge(region_list, m, S):
39 |     rows = [m]
40 |     tmp = []
41 |     for n in S:
42 |         if not region_neighbor(region_list[m]).isdisjoint(region_list[n]) or \
43 |                 not region_neighbor(region_list[n]).isdisjoint(region_list[m]):
44 |             # 第m与n相交
45 |             tmp.append(n)
46 |     for d in tmp:
47 |         S.remove(d)
48 |     for e in tmp:
49 |         rows.extend(rec_region_merge(region_list, e, S))
50 |     return rows
51 | 
52 | 
53 | def nms(predict, activation_pixels, threshold=cfg.side_vertex_pixel_threshold):
54 |     region_list = []
55 |     for i, j in zip(activation_pixels[0], activation_pixels[1]):
56 |         merge = False
57 |         for k in range(len(region_list)):
58 |             if should_merge(region_list[k], i, j):
59 |                 region_list[k].add((i, j))
60 |                 merge = True
61 |                 # Fixme 重叠文本区域处理，存在和多个区域邻接的pixels，先都merge试试
62 |                 # break
63 |         if not merge:
64 |             region_list.append({(i, j)})
65 |     D = region_group(region_list)
66 |     quad_list = np.zeros((len(D), 4, 2))
67 |     score_list = np.zeros((len(D), 4))
68 |     for group, g_th in zip(D, range(len(D))):
69 |         total_score = np.zeros((4, 2))
70 |         for row in group:
71 |             for ij in region_list[row]:
72 |                 score = predict[ij[0], ij[1], 1]
73 |                 if score >= threshold:
74 |                     ith_score = predict[ij[0], ij[1], 2:3]
75 |                     if not (cfg.trunc_threshold <= ith_score < 1 -
76 |                             cfg.trunc_threshold):
77 |                         ith = int(np.around(ith_score))
78 |                         total_score[ith * 2:(ith + 1) * 2] += score
79 |                         px = (ij[1] + 0.5) * cfg.pixel_size
80 |                         py = (ij[0] + 0.5) * cfg.pixel_size
81 |                         p_v = [px, py] + np.reshape(predict[ij[0], ij[1], 3:7],
82 |                                               (2, 2))
83 |                         quad_list[g_th, ith * 2:(ith + 1) * 2] += score * p_v
84 |         score_list[g_th] = total_score[:, 0]
85 |         quad_list[g_th] /= (total_score + cfg.epsilon)
86 |     return score_list, quad_list
87 | 


--------------------------------------------------------------------------------
/loc_and_reg.py:
--------------------------------------------------------------------------------
 1 | from keras.models import load_model
 2 | from PIL import Image, ImageDraw
 3 | from keras.preprocessing import image
 4 | from keras.applications.vgg16 import preprocess_input
 5 | from util import *
 6 | import numpy as np
 7 | import os
 8 | import cv2
 9 | import keras.backend as ktf
10 | import cfg
11 | 
12 | 
13 | def location(model, img_path, pixel_threshold):
14 |     img = image.load_img(img_path)
15 |     d_wight, d_height = resize_image(img, cfg.image_size)
16 |     img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
17 |     img = image.img_to_array(img)
18 |     img = preprocess_input(img, mode='tf')
19 |     x = np.expand_dims(img, axis=0)
20 |     y = model.predict(x)
21 |     y = np.squeeze(y, axis=0)
22 |     y[:, :, :3] = sigmoid(y[:, :, :3])
23 |     cond = np.greater_equal(y[:, :, 0], pixel_threshold)
24 |     activation_pixels = np.where(cond)
25 |     quad_scores, quad_after_nms = nms(y, activation_pixels)
26 |     results = []
27 |     with Image.open(img_path) as im:
28 |         d_wight, d_height = resize_image(im, cfg.image_size)
29 |         scale_ratio_w = d_wight / im.width
30 |         scale_ratio_h = d_height / im.height
31 |         im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
32 |         for score, geo, s in zip(quad_scores, quad_after_nms,
33 |                                  range(len(quad_scores))):
34 |             if np.amin(score) > 0:
35 |                 rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
36 |                 im = cv2.cvtColor(np.asarray(im), cv2.COLOR_RGB2BGR)
37 |                 im = crop_rectangle(im, rescaled_geo)
38 |                 results.append(im)
39 |     return results
40 | 
41 | 
42 | def recognition(model, img):
43 |     img_size = img.shape
44 |     if (img_size[1] / img_size[0] * 1.0) < 6:
45 |         img_reshape = cv2.resize(img, (int(31.0 / img_size[0] * img_size[1]), cfg.height))
46 | 
47 |         mat_ori = np.zeros((cfg.height, cfg.width - int(31.0 / img_size[0] * img_size[1]), 3), dtype=np.uint8)
48 |         out_img = np.concatenate([img_reshape, mat_ori], axis=1).transpose([1, 0, 2])
49 |     else:
50 |         out_img = cv2.resize(img, (cfg.width, cfg.height), interpolation=cv2.INTER_CUBIC)
51 |         out_img = np.asarray(out_img)
52 |         out_img = out_img.transpose([1, 0, 2])
53 | 
54 |     y_pred = model.predict(np.expand_dims(out_img, axis=0))
55 |     shape = y_pred[:, 2:, :].shape
56 |     ctc_decode = ktf.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0]
57 |     out = ktf.get_value(ctc_decode)[:, :cfg.label_len]
58 |     result = ''.join([cfg.characters[k] for k in out[0]])
59 |     return result
60 | 
61 | 
62 | def main():
63 |     location_model = load_model(cfg.location_model)
64 |     recognition_model = load_model(cfg.recognition_model)
65 |     imgs = os.listdir('test')
66 |     result_txt = open('result.txt', 'a+')
67 |     for im in imgs:
68 |         img_path = os.path.join('test', im)
69 |         ims_re = location(location_model, img_path, cfg.pixel_threshold)
70 |         if len(ims_re) > 0:
71 |             for i in range(len(ims_re)):
72 |                 re_text = recognition(recognition_model, ims_re[i])
73 |                 result = im + " : " + re_text + "\n"
74 |                 result_txt.write(result)
75 |     result_txt.close()
76 | 
77 | 
78 | main()


--------------------------------------------------------------------------------
/location/network.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | from keras import Input, Model
 3 | from keras.applications.vgg16 import VGG16
 4 | from keras.layers import Concatenate, Conv2D, UpSampling2D, BatchNormalization
 5 | import tensorflow as tf
 6 | import cfg
 7 | from keras.layers import Layer
 8 | 
 9 | 
10 | class Att(Layer):
11 |     def __init__(self, **kwargs):
12 |         super(Att, self).__init__(**kwargs)
13 | 
14 |     def call(self, layer_input, **kwargs):
15 |         x = layer_input
16 |         attention = tf.reduce_mean(layer_input, axis=-1, keep_dims=True)
17 |         importance_map = tf.sigmoid(attention)
18 |         output = tf.multiply(x, importance_map)
19 |         return output
20 | 
21 |     def compute_output_shape(self, input_shape):
22 |         return input_shape
23 | 
24 | 
25 | class East:
26 | 
27 |     def __init__(self):
28 |         self.input_img = Input(name='input_img',
29 |                                shape=(None, None, cfg.num_channels),
30 |                                dtype='float32')
31 |         vgg16 = VGG16(input_tensor=self.input_img,
32 |                       weights=None,
33 |                       include_top=False)
34 |         self.vgg_pools = [vgg16.get_layer('block%d_pool' % i).output
35 |                           for i in range(2, 6)]
36 | 
37 |     def east_network(self):
38 |         
39 |         def decoder(layer_input, skip_input, channel):
40 |             concat = Concatenate(axis=-1)([layer_input, skip_input])
41 |             bn1 = BatchNormalization()(concat)
42 |             conv_1 = Conv2D(channel, 1,
43 |                             activation='relu', padding='same')(bn1)
44 |             bn2 = BatchNormalization()(conv_1)
45 |             conv_3 = Conv2D(channel, 3,
46 |                             activation='relu', padding='same')(bn2)
47 |             return conv_3
48 | 
49 |         # d1 = Att()(self.vgg_pools[3])
50 |         # d1 = decoder(UpSampling2D((2, 2))(d1), self.vgg_pools[2], 128)
51 |         d1 = decoder(UpSampling2D((2, 2))(self.vgg_pools[3]), self.vgg_pools[2], 128)
52 |         # d1 = Att()(d1)
53 |         d2 = decoder(UpSampling2D((2, 2))(d1), self.vgg_pools[1], 64)
54 |         # d2 = Att()(d2)
55 |         d3 = decoder(UpSampling2D((2, 2))(d2), self.vgg_pools[0], 32)
56 |         # d3 = Att()(d3)
57 |         bn = BatchNormalization()(d3)
58 |         before_output = Conv2D(32, 3, activation='relu', padding='same')(bn)
59 |         inside_score = Conv2D(1, 1, padding='same', name='inside_score'
60 |                               )(before_output)
61 |         side_v_code = Conv2D(2, 1, padding='same', name='side_vertex_code'
62 |                              )(before_output)
63 |         side_v_coord = Conv2D(4, 1, padding='same', name='side_vertex_coord'
64 |                               )(before_output)
65 |         east_detect = Concatenate(axis=-1,
66 |                                   name='east_detect')([inside_score,
67 |                                                        side_v_code,
68 |                                                        side_v_coord])
69 |         model = Model(inputs=self.input_img, outputs=[east_detect])
70 |         # model.summary()
71 |         return model
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     east = East()
76 |     east_network = east.east_network()
77 |     east_network.load_weights('saved_model/east_model_weights_origin.h5')
78 |     east_network.summary()
79 | 


--------------------------------------------------------------------------------
/recognition/data_loader.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import linecache
 4 | import os
 5 | import cfg
 6 | 
 7 | 
 8 | train_imgs = open(os.path.join(cfg.ocr_dataset_path, "annotation_train.txt"), 'r').readlines()
 9 | train_imgs_num = len(train_imgs)
10 | 
11 | val_imgs = open(os.path.join(cfg.ocr_dataset_path, "annotation_val.txt"), 'r').readlines()
12 | val_imgs_num = len(val_imgs)
13 | 
14 | lexicon_dic_path = os.path.join(cfg.ocr_dataset_path, "lexicon.txt")
15 | 
16 | 
17 | def img_gen_lexicon(batch_size=50, input_shape=None):
18 |     imgs = np.zeros((batch_size, cfg.width, cfg.height, 3), dtype=np.uint8)
19 |     labels = np.zeros((batch_size, cfg.label_len), dtype=np.uint8)
20 | 
21 |     while True:
22 |         for i in range(batch_size):
23 |             while True:
24 |                 pick_index = np.random.randint(0, train_imgs_num - 1)
25 |                 train_imgs_split = [m for m in train_imgs[pick_index].split()]
26 |                 lexicon = linecache.getline(lexicon_dic_path, int(train_imgs_split[1]) + 1).strip("\n")
27 |                 img_path = cfg.ocr_dataset_path + train_imgs_split[0][1:]
28 |                 img = cv2.imread(img_path)
29 |                 if (img is not None) and len(lexicon) <= cfg.label_len:
30 |                     img_size = img.shape  # (height, width, channels)
31 |                     if img_size[1] > 2 and img_size[0] > 2:
32 |                         break
33 |             if (img_size[1]/(img_size[0]*1.0)) < 6.4:
34 |                 img_reshape = cv2.resize(img, (int(31.0/img_size[0]*img_size[1]), cfg.height))
35 |                 mat_ori = np.zeros((cfg.height, cfg.width - int(31.0/img_size[0]*img_size[1]), 3), dtype=np.uint8)
36 |                 out_img = np.concatenate([img_reshape, mat_ori], axis=1).transpose([1, 0, 2])
37 |             else:
38 |                 out_img = cv2.resize(img, (cfg.width, cfg.height), interpolation=cv2.INTER_CUBIC)
39 |                 out_img = np.asarray(out_img).transpose([1, 0, 2])
40 | 
41 |             # due to the explanation of ctc_loss, try to not add "-" for blank
42 |             while len(lexicon) < cfg.label_len:
43 |                 lexicon += "-"
44 | 
45 |             imgs[i] = out_img
46 |             labels[i] = [cfg.characters.find(c) for c in lexicon]
47 |         yield [imgs, labels, np.ones(batch_size) * int(input_shape[1] - 2), np.ones(batch_size) * cfg.label_len], labels
48 | 
49 | 
50 | def img_gen_val_lexicon(batch_size=1000):
51 |     imgs = np.zeros((batch_size, cfg.width, cfg.height, 3), dtype=np.uint8)
52 |     labels = []
53 | 
54 |     while True:
55 |         for i in range(batch_size):
56 | 
57 |             while True:
58 |                 pick_index = np.random.randint(0, val_imgs_num - 1)
59 |                 train_imgs_split = [m for m in val_imgs[pick_index].split()]
60 |                 lexicon = linecache.getline(lexicon_dic_path, int(train_imgs_split[1]) + 1).strip("\n")
61 |                 img_path = cfg.ocr_dataset_path + train_imgs_split[0][1:]
62 |                 img = cv2.imread(img_path)
63 | 
64 |                 if (img is not None) and len(lexicon) <= cfg.label_len:
65 |                     img_size = img.shape  # (height, width, channels)
66 |                     if img_size[1] > 2 and img_size[0] > 2:
67 |                         break
68 |             if (img_size[1]/(img_size[0]*1.0)) < 6.4:
69 |                 img_reshape = cv2.resize(img, (int(31.0/img_size[0]*img_size[1]), cfg.height))
70 |                 mat_ori = np.zeros((cfg.height, cfg.width - int(31.0/img_size[0]*img_size[1]), 3), dtype=np.uint8)
71 |                 out_img = np.concatenate([img_reshape, mat_ori], axis=1).transpose([1, 0, 2])
72 |             else:
73 |                 out_img = cv2.resize(img, (cfg.width, cfg.height), interpolation=cv2.INTER_CUBIC)
74 |                 out_img = np.asarray(out_img).transpose([1, 0, 2])
75 | 
76 |             imgs[i] = out_img
77 |             labels.append(lexicon)
78 |         yield imgs, labels
79 | 
80 | 


--------------------------------------------------------------------------------
/recognition/network.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as ktf
 2 | from keras.layers import Conv2D, LSTM, Lambda, BatchNormalization, MaxPooling2D, Reshape, Dense, Dropout, add, concatenate, Bidirectional
 3 | from keras.models import Model, Input
 4 | from keras.optimizers import SGD
 5 | 
 6 | 
 7 | class CRNN:
 8 |     def __init__(self, width, height, label_len, characters):
 9 |         self.height = height
10 |         self.width = width
11 |         self.label_len = label_len
12 |         self.characters = characters
13 |         self.label_classes = len(self.characters)
14 | 
15 |     def ctc_loss(self, args):
16 |         iy_pred, ilabels, iinput_length, ilabel_length = args
17 |         # the 2 is critical here since the first couple outputs of the RNN
18 |         # tend to be garbage:
19 |         iy_pred = iy_pred[:, 2:, :]  # no such influence
20 |         return ktf.ctc_batch_cost(ilabels, iy_pred, iinput_length, ilabel_length)
21 | 
22 |     def network(self):
23 |         input_im = Input(shape=(self.width, self.height, 3))
24 | 
25 |         conv_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_im)
26 |         bn1 = BatchNormalization()(conv_1)
27 | 
28 |         conv_2_1 = Conv2D(128, (3, 3), activation='relu', padding='same')(bn1)
29 |         conv_2_2 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv_2_1)
30 |         bn2 = BatchNormalization()(conv_2_2)
31 |         pool_1 = MaxPooling2D(pool_size=(2, 2))(bn2)
32 | 
33 |         conv_3_1 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool_1)
34 |         conv_3_2 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv_3_1)
35 |         bn3 = BatchNormalization()(conv_3_2)
36 |         pool_2 = MaxPooling2D(pool_size=(2, 2))(bn3)
37 | 
38 |         conv_4_1 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool_2)
39 |         conv_4_2 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv_4_1)
40 |         bn4 = BatchNormalization()(conv_4_2)
41 | 
42 |         bn_shape = bn4.get_shape()
43 | 
44 |         x_reshape = Reshape(target_shape=(int(bn_shape[1]), int(bn_shape[2] * bn_shape[3])))(bn4)
45 | 
46 |         fc_1 = Dense(128, activation='relu')(x_reshape)
47 | 
48 |         rnn_1 = LSTM(128, kernel_initializer="he_normal", return_sequences=True)(fc_1)
49 |         rnn_1b = LSTM(128, kernel_initializer="he_normal", go_backwards=True, return_sequences=True)(fc_1)
50 |         rnn1_merged = add([rnn_1, rnn_1b])
51 |         # bi_lstm1 = Bidirectional(LSTM(128, kernel_initializer="he_normal", return_sequences=True), merge_mode='sum')(fc_1)
52 |         #
53 |         rnn_2 = LSTM(128, kernel_initializer="he_normal", return_sequences=True)(rnn1_merged)
54 |         rnn_2b = LSTM(128, kernel_initializer="he_normal", go_backwards=True, return_sequences=True)(rnn1_merged)
55 |         rnn2_merged = concatenate([rnn_2, rnn_2b])
56 |         # bi_lstm2 = Bidirectional(LSTM(128, kernel_initializer="he_normal", return_sequences=True), merge_mode='concat')(bi_lstm1)
57 | 
58 |         drop_1 = Dropout(0.25)(rnn2_merged)
59 |         # drop_1 = Dropout(0.25)(bi_lstm2)
60 | 
61 |         fc_2 = Dense(self.label_classes, kernel_initializer='he_normal', activation='softmax')(drop_1)
62 | 
63 |         infer_model = Model(inputs=input_im, outputs=fc_2)
64 | 
65 |         labels = Input(name='the_labels', shape=[self.label_len], dtype='float32')
66 |         input_length = Input(name='input_length', shape=[1], dtype='int64')
67 |         label_length = Input(name='label_length', shape=[1], dtype='int64')
68 | 
69 |         loss_out = Lambda(self.ctc_loss, output_shape=(1,), name='ctc')([fc_2, labels, input_length,
70 |                                                                         label_length])
71 | 
72 |         train_model = Model(inputs=[input_im, labels, input_length, label_length], outputs=[loss_out])
73 |         sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
74 | 
75 |         train_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
76 |         infer_model.summary()
77 |         train_model.summary()
78 | 
79 |         return train_model, infer_model
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     import string
84 |     CRNN(200, 31, 11, '0123456789'+'-').network()
85 | 


--------------------------------------------------------------------------------
/location/predict.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image, ImageDraw
 3 | from keras.preprocessing import image
 4 | from keras.applications.vgg16 import preprocess_input
 5 | import cv2
 6 | import cfg
 7 | from network import East
 8 | from preprocess import resize_image
 9 | from nms import nms
10 | import os
11 | 
12 | 
13 | def sigmoid(x):
14 |     """`y = 1 / (1 + exp(-x))`"""
15 |     return 1 / (1 + np.exp(-x))
16 | 
17 | 
18 | def crop_rectangle(img, geo):
19 |     rect = cv2.minAreaRect(geo.astype(int))
20 |     center, size, angle = rect[0], rect[1], rect[2]
21 |     if(angle > -45):
22 |         center = tuple(map(int, center))
23 |         size = tuple([int(rect[1][0] + 10), int(rect[1][1] + 10)])
24 |         height, width = img.shape[0], img.shape[1]
25 |         M = cv2.getRotationMatrix2D(center, angle, 1)
26 |         img_rot = cv2.warpAffine(img, M, (width, height))
27 |         img_crop = cv2.getRectSubPix(img_rot, size, center)
28 |     else:
29 |         center = tuple(map(int, center))
30 |         size = tuple([int(rect[1][1] + 10), int(rect[1][0]) + 10])
31 |         angle -= 270
32 |         height, width = img.shape[0], img.shape[1]
33 |         M = cv2.getRotationMatrix2D(center, angle, 1)
34 |         img_rot = cv2.warpAffine(img, M, (width, height))
35 |         img_crop = cv2.getRectSubPix(img_rot, size, center)
36 |     return img_crop
37 | 
38 | 
39 | def predict(east_detect, img_path, pixel_threshold, quiet=False):
40 |     img = image.load_img(img_path)
41 |     d_wight, d_height = resize_image(img, cfg.image_size)
42 |     img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
43 |     img = image.img_to_array(img)
44 |     img = preprocess_input(img, mode='tf')
45 |     x = np.expand_dims(img, axis=0)
46 |     y = east_detect.predict(x)
47 |     y = np.squeeze(y, axis=0)
48 |     y[:, :, :3] = sigmoid(y[:, :, :3])
49 |     cond = np.greater_equal(y[:, :, 0], pixel_threshold)
50 |     activation_pixels = np.where(cond)
51 |     quad_scores, quad_after_nms = nms(y, activation_pixels)
52 |     with Image.open(img_path) as im:
53 |         im_array = image.img_to_array(im.convert('RGB'))
54 |         d_wight, d_height = resize_image(im, cfg.image_size)
55 |         scale_ratio_w = d_wight / im.width
56 |         scale_ratio_h = d_height / im.height
57 |         im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
58 |         quad_im = im.copy()
59 |         quad_draw = ImageDraw.Draw(quad_im)
60 |         txt_items = []
61 |         flag = False
62 |         for score, geo, s in zip(quad_scores, quad_after_nms,
63 |                                  range(len(quad_scores))):
64 |             if np.amin(score) > 0:
65 |                 flag = True
66 |                 quad_draw.line([tuple(geo[0]),
67 |                                 tuple(geo[1]),
68 |                                 tuple(geo[2]),
69 |                                 tuple(geo[3]),
70 |                                 tuple(geo[0])], width=2, fill='blue')
71 |                 rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
72 |                 rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist()
73 |                 txt_item = ','.join(map(str, rescaled_geo_list))
74 |                 txt_items.append(txt_item + '\n')
75 |                 if cfg.detection_box_crop:
76 |                     img_crop = crop_rectangle(im_array, rescaled_geo)
77 |                     cv2.imwrite(os.path.join('output_crop', img_path.split('/')[-1].split('.')[0] + '.jpg'), img_crop)
78 |             elif not quiet:
79 |                 print('quad invalid with vertex num less then 4.')
80 |         if flag:
81 |             quad_im.save(os.path.join('output', img_path.split('/')[-1].split('.')[0] + '_predict.jpg'))
82 |         if cfg.predict_write2txt and len(txt_items) > 0:
83 |             with open(os.path.join("output_txt", img_path.split('/')[-1].split('.')[0] + '.txt'), 'w') as f_txt:
84 |                 f_txt.writelines(txt_items)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     east = East()
89 |     east_detect = east.east_network()
90 |     east_detect.summary()
91 |     east_detect.load_weights(cfg.model_weights_path)
92 |     img_list = os.listdir('test_imgs')
93 |     for img_path in img_list:
94 |         predict(east_detect, os.path.join('test_imgs', img_path), cfg.pixel_threshold)
95 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cfg as cfg
  3 | import cv2
  4 | 
  5 | 
  6 | def sigmoid(x):
  7 |     """`y = 1 / (1 + exp(-x))`"""
  8 |     return 1 / (1 + np.exp(-x))
  9 | 
 10 | 
 11 | def resize_image(im, max_img_size=cfg.image_size):
 12 |     im_width = np.minimum(im.width, max_img_size)
 13 |     if im_width == max_img_size < im.width:
 14 |         im_height = int((im_width / im.width) * im.height)
 15 |     else:
 16 |         im_height = im.height
 17 |     o_height = np.minimum(im_height, max_img_size)
 18 |     if o_height == max_img_size < im_height:
 19 |         o_width = int((o_height / im_height) * im_width)
 20 |     else:
 21 |         o_width = im_width
 22 |     d_wight = o_width - (o_width % 32)
 23 |     d_height = o_height - (o_height % 32)
 24 |     return d_wight, d_height
 25 | 
 26 | 
 27 | def should_merge(region, i, j):
 28 |     neighbor = {(i, j - 1)}
 29 |     return not region.isdisjoint(neighbor)
 30 | 
 31 | 
 32 | def region_neighbor(region_set):
 33 |     region_pixels = np.array(list(region_set))
 34 |     j_min = np.amin(region_pixels, axis=0)[1] - 1
 35 |     j_max = np.amax(region_pixels, axis=0)[1] + 1
 36 |     i_m = np.amin(region_pixels, axis=0)[0] + 1
 37 |     region_pixels[:, 0] += 1
 38 |     neighbor = {(region_pixels[n, 0], region_pixels[n, 1]) for n in
 39 |                 range(len(region_pixels))}
 40 |     neighbor.add((i_m, j_min))
 41 |     neighbor.add((i_m, j_max))
 42 |     return neighbor
 43 | 
 44 | 
 45 | def region_group(region_list):
 46 |     S = [i for i in range(len(region_list))]
 47 |     D = []
 48 |     while len(S) > 0:
 49 |         m = S.pop(0)
 50 |         if len(S) == 0:
 51 |             # S has only one element, put it to D
 52 |             D.append([m])
 53 |         else:
 54 |             D.append(rec_region_merge(region_list, m, S))
 55 |     return D
 56 | 
 57 | 
 58 | def rec_region_merge(region_list, m, S):
 59 |     rows = [m]
 60 |     tmp = []
 61 |     for n in S:
 62 |         if not region_neighbor(region_list[m]).isdisjoint(region_list[n]) or \
 63 |                 not region_neighbor(region_list[n]).isdisjoint(region_list[m]):
 64 |             # 第m与n相交
 65 |             tmp.append(n)
 66 |     for d in tmp:
 67 |         S.remove(d)
 68 |     for e in tmp:
 69 |         rows.extend(rec_region_merge(region_list, e, S))
 70 |     return rows
 71 | 
 72 | 
 73 | def nms(predict, activation_pixels, threshold=cfg.side_vertex_pixel_threshold):
 74 |     region_list = []
 75 |     for i, j in zip(activation_pixels[0], activation_pixels[1]):
 76 |         merge = False
 77 |         for k in range(len(region_list)):
 78 |             if should_merge(region_list[k], i, j):
 79 |                 region_list[k].add((i, j))
 80 |                 merge = True
 81 |         if not merge:
 82 |             region_list.append({(i, j)})
 83 |     D = region_group(region_list)
 84 |     quad_list = np.zeros((len(D), 4, 2))
 85 |     score_list = np.zeros((len(D), 4))
 86 |     for group, g_th in zip(D, range(len(D))):
 87 |         total_score = np.zeros((4, 2))
 88 |         for row in group:
 89 |             for ij in region_list[row]:
 90 |                 score = predict[ij[0], ij[1], 1]
 91 |                 if score >= threshold:
 92 |                     ith_score = predict[ij[0], ij[1], 2:3]
 93 |                     if not (cfg.trunc_threshold <= ith_score < 1 -
 94 |                             cfg.trunc_threshold):
 95 |                         ith = int(np.around(ith_score))
 96 |                         total_score[ith * 2:(ith + 1) * 2] += score
 97 |                         px = (ij[1] + 0.5) * cfg.pixel_size
 98 |                         py = (ij[0] + 0.5) * cfg.pixel_size
 99 |                         p_v = [px, py] + np.reshape(predict[ij[0], ij[1], 3:7],
100 |                                               (2, 2))
101 |                         quad_list[g_th, ith * 2:(ith + 1) * 2] += score * p_v
102 |         score_list[g_th] = total_score[:, 0]
103 |         quad_list[g_th] /= (total_score + cfg.epsilon)
104 |     return score_list, quad_list
105 | 
106 | 
107 | def crop_rectangle(img, geo):
108 |     rect = cv2.minAreaRect(geo.astype(int))
109 |     center, size, angle = rect[0], rect[1], rect[2]
110 |     if(angle > -45):
111 |         center = tuple(map(int, center))
112 |         size = tuple([int(rect[1][0] + 10), int(rect[1][1] + 10)])
113 |         height, width = img.shape[0], img.shape[1]
114 |         M = cv2.getRotationMatrix2D(center, angle, 1)
115 |         img_rot = cv2.warpAffine(img, M, (width, height))
116 |         img_crop = cv2.getRectSubPix(img_rot, size, center)
117 |     else:
118 |         center = tuple(map(int, center))
119 |         size = tuple([int(rect[1][1] + 10), int(rect[1][0]) + 10])
120 |         angle -= 270
121 |         height, width = img.shape[0], img.shape[1]
122 |         M = cv2.getRotationMatrix2D(center, angle, 1)
123 |         img_rot = cv2.warpAffine(img, M, (width, height))
124 |         img_crop = cv2.getRectSubPix(img_rot, size, center)
125 |     return img_crop


--------------------------------------------------------------------------------
/location/label.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from PIL import Image, ImageDraw
  4 | from tqdm import tqdm
  5 | import cfg
  6 | 
  7 | 
  8 | def point_inside_of_quad(px, py, quad_xy_list, p_min, p_max):
  9 |     if (p_min[0] <= px <= p_max[0]) and (p_min[1] <= py <= p_max[1]):
 10 |         xy_list = np.zeros((4, 2))
 11 |         xy_list[:3, :] = quad_xy_list[1:4, :] - quad_xy_list[:3, :]
 12 |         xy_list[3] = quad_xy_list[0, :] - quad_xy_list[3, :]
 13 |         yx_list = np.zeros((4, 2))
 14 |         yx_list[:, :] = quad_xy_list[:, -1:-3:-1]
 15 |         a = xy_list * ([py, px] - yx_list)
 16 |         b = a[:, 0] - a[:, 1]
 17 |         if np.amin(b) >= 0 or np.amax(b) <= 0:
 18 |             return True
 19 |         else:
 20 |             return False
 21 |     else:
 22 |         return False
 23 | 
 24 | 
 25 | def point_inside_of_nth_quad(px, py, xy_list, shrink_1, long_edge):
 26 |     nth = -1
 27 |     vs = [[[0, 0, 3, 3, 0], [1, 1, 2, 2, 1]],
 28 |           [[0, 0, 1, 1, 0], [2, 2, 3, 3, 2]]]
 29 |     for ith in range(2):
 30 |         quad_xy_list = np.concatenate((
 31 |             np.reshape(xy_list[vs[long_edge][ith][0]], (1, 2)),
 32 |             np.reshape(shrink_1[vs[long_edge][ith][1]], (1, 2)),
 33 |             np.reshape(shrink_1[vs[long_edge][ith][2]], (1, 2)),
 34 |             np.reshape(xy_list[vs[long_edge][ith][3]], (1, 2))), axis=0)
 35 |         p_min = np.amin(quad_xy_list, axis=0)
 36 |         p_max = np.amax(quad_xy_list, axis=0)
 37 |         if point_inside_of_quad(px, py, quad_xy_list, p_min, p_max):
 38 |             if nth == -1:
 39 |                 nth = ith
 40 |             else:
 41 |                 nth = -1
 42 |                 break
 43 |     return nth
 44 | 
 45 | 
 46 | def shrink(xy_list, ratio=cfg.shrink_ratio):
 47 |     if ratio == 0.0:
 48 |         return xy_list, xy_list
 49 |     diff_1to3 = xy_list[:3, :] - xy_list[1:4, :]
 50 |     diff_4 = xy_list[3:4, :] - xy_list[0:1, :]
 51 |     diff = np.concatenate((diff_1to3, diff_4), axis=0)
 52 |     dis = np.sqrt(np.sum(np.square(diff), axis=-1))
 53 |     # determine which are long or short edges
 54 |     long_edge = int(np.argmax(np.sum(np.reshape(dis, (2, 2)), axis=0)))
 55 |     short_edge = 1 - long_edge
 56 |     # cal r length array
 57 |     r = [np.minimum(dis[i], dis[(i + 1) % 4]) for i in range(4)]
 58 |     # cal theta array
 59 |     diff_abs = np.abs(diff)
 60 |     diff_abs[:, 0] += cfg.epsilon
 61 |     theta = np.arctan(diff_abs[:, 1] / diff_abs[:, 0])
 62 |     # shrink two long edges
 63 |     temp_new_xy_list = np.copy(xy_list)
 64 |     shrink_edge(xy_list, temp_new_xy_list, long_edge, r, theta, ratio)
 65 |     shrink_edge(xy_list, temp_new_xy_list, long_edge + 2, r, theta, ratio)
 66 |     # shrink two short edges
 67 |     new_xy_list = np.copy(temp_new_xy_list)
 68 |     shrink_edge(temp_new_xy_list, new_xy_list, short_edge, r, theta, ratio)
 69 |     shrink_edge(temp_new_xy_list, new_xy_list, short_edge + 2, r, theta, ratio)
 70 |     return temp_new_xy_list, new_xy_list, long_edge
 71 | 
 72 | 
 73 | def shrink_edge(xy_list, new_xy_list, edge, r, theta, ratio=cfg.shrink_ratio):
 74 |     if ratio == 0.0:
 75 |         return
 76 |     start_point = edge
 77 |     end_point = (edge + 1) % 4
 78 |     long_start_sign_x = np.sign(
 79 |         xy_list[end_point, 0] - xy_list[start_point, 0])
 80 |     new_xy_list[start_point, 0] = \
 81 |         xy_list[start_point, 0] + \
 82 |         long_start_sign_x * ratio * r[start_point] * np.cos(theta[start_point])
 83 |     long_start_sign_y = np.sign(
 84 |         xy_list[end_point, 1] - xy_list[start_point, 1])
 85 |     new_xy_list[start_point, 1] = \
 86 |         xy_list[start_point, 1] + \
 87 |         long_start_sign_y * ratio * r[start_point] * np.sin(theta[start_point])
 88 |     # long edge one, end point
 89 |     long_end_sign_x = -1 * long_start_sign_x
 90 |     new_xy_list[end_point, 0] = \
 91 |         xy_list[end_point, 0] + \
 92 |         long_end_sign_x * ratio * r[end_point] * np.cos(theta[start_point])
 93 |     long_end_sign_y = -1 * long_start_sign_y
 94 |     new_xy_list[end_point, 1] = \
 95 |         xy_list[end_point, 1] + \
 96 |         long_end_sign_y * ratio * r[end_point] * np.sin(theta[start_point])
 97 | 
 98 | 
 99 | def process_label(data_dir=cfg.data_dir):
100 |     with open(os.path.join(data_dir, cfg.val_fname), 'r') as f_val:
101 |         f_list = f_val.readlines()
102 |     with open(os.path.join(data_dir, cfg.train_fname), 'r') as f_train:
103 |         f_list.extend(f_train.readlines())
104 |     for line, _ in zip(f_list, tqdm(range(len(f_list)))):
105 |         line_cols = str(line).strip().split(',')
106 |         img_name, width, height = \
107 |             line_cols[0].strip(), int(line_cols[1].strip()), \
108 |             int(line_cols[2].strip())
109 |         gt = np.zeros((height // cfg.pixel_size, width // cfg.pixel_size, 7))
110 |         train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name)
111 |         xy_list_array = np.load(os.path.join(train_label_dir,
112 |                                              img_name[:-4] + '.npy'))
113 |         train_image_dir = os.path.join(data_dir, cfg.train_image_dir_name)
114 |         with Image.open(os.path.join(train_image_dir, img_name)) as im:
115 |             draw = ImageDraw.Draw(im)
116 |             for xy_list in xy_list_array:
117 |                 _, shrink_xy_list, _ = shrink(xy_list, cfg.shrink_ratio)
118 |                 shrink_1, _, long_edge = shrink(xy_list, cfg.shrink_side_ratio)
119 |                 p_min = np.amin(shrink_xy_list, axis=0)
120 |                 p_max = np.amax(shrink_xy_list, axis=0)
121 |                 # floor of the float
122 |                 ji_min = (p_min / cfg.pixel_size - 0.5).astype(int) - 1
123 |                 # +1 for ceil of the float and +1 for include the end
124 |                 ji_max = (p_max / cfg.pixel_size - 0.5).astype(int) + 3
125 |                 imin = np.maximum(0, ji_min[1])
126 |                 imax = np.minimum(height // cfg.pixel_size, ji_max[1])
127 |                 jmin = np.maximum(0, ji_min[0])
128 |                 jmax = np.minimum(width // cfg.pixel_size, ji_max[0])
129 |                 for i in range(imin, imax):
130 |                     for j in range(jmin, jmax):
131 |                         px = (j + 0.5) * cfg.pixel_size
132 |                         py = (i + 0.5) * cfg.pixel_size
133 |                         if point_inside_of_quad(px, py,
134 |                                                 shrink_xy_list, p_min, p_max):
135 |                             gt[i, j, 0] = 1
136 |                             line_width, line_color = 1, 'red'
137 |                             ith = point_inside_of_nth_quad(px, py,
138 |                                                            xy_list,
139 |                                                            shrink_1,
140 |                                                            long_edge)
141 |                             vs = [[[3, 0], [1, 2]], [[0, 1], [2, 3]]]
142 |                             if ith in range(2):
143 |                                 gt[i, j, 1] = 1
144 |                                 if ith == 0:
145 |                                     line_width, line_color = 2, 'yellow'
146 |                                 else:
147 |                                     line_width, line_color = 2, 'green'
148 |                                 gt[i, j, 2:3] = ith
149 |                                 gt[i, j, 3:5] = \
150 |                                     xy_list[vs[long_edge][ith][0]] - [px, py]
151 |                                 gt[i, j, 5:] = \
152 |                                     xy_list[vs[long_edge][ith][1]] - [px, py]
153 |                             draw.line([(px - 0.5 * cfg.pixel_size,
154 |                                         py - 0.5 * cfg.pixel_size),
155 |                                        (px + 0.5 * cfg.pixel_size,
156 |                                         py - 0.5 * cfg.pixel_size),
157 |                                        (px + 0.5 * cfg.pixel_size,
158 |                                         py + 0.5 * cfg.pixel_size),
159 |                                        (px - 0.5 * cfg.pixel_size,
160 |                                         py + 0.5 * cfg.pixel_size),
161 |                                        (px - 0.5 * cfg.pixel_size,
162 |                                         py - 0.5 * cfg.pixel_size)],
163 |                                       width=line_width, fill=line_color)
164 |             act_image_dir = os.path.join(cfg.data_dir,
165 |                                          cfg.show_act_image_dir_name)
166 |             if cfg.draw_act_quad:
167 |                 im.save(os.path.join(act_image_dir, img_name))
168 |         train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name)
169 |         np.save(os.path.join(train_label_dir,
170 |                              img_name[:-4] + '_gt.npy'), gt)
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     process_label()
175 | 


--------------------------------------------------------------------------------
/location/preprocess.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image, ImageDraw
  3 | import os
  4 | import random
  5 | from tqdm import tqdm
  6 | from label import shrink
  7 | import cfg
  8 | 
  9 | 
 10 | def batch_reorder_vertexes(xy_list_array):
 11 |     reorder_xy_list_array = np.zeros_like(xy_list_array)
 12 |     for xy_list, i in zip(xy_list_array, range(len(xy_list_array))):
 13 |         reorder_xy_list_array[i] = reorder_vertexes(xy_list)
 14 |     return reorder_xy_list_array
 15 | 
 16 | 
 17 | def reorder_vertexes(xy_list):
 18 |     reorder_xy_list = np.zeros_like(xy_list)
 19 |     # determine the first point with the smallest x,
 20 |     # if two has same x, choose that with smallest y,
 21 |     ordered = np.argsort(xy_list, axis=0)
 22 |     xmin1_index = ordered[0, 0]
 23 |     xmin2_index = ordered[1, 0]
 24 |     if xy_list[xmin1_index, 0] == xy_list[xmin2_index, 0]:
 25 |         if xy_list[xmin1_index, 1] <= xy_list[xmin2_index, 1]:
 26 |             reorder_xy_list[0] = xy_list[xmin1_index]
 27 |             first_v = xmin1_index
 28 |         else:
 29 |             reorder_xy_list[0] = xy_list[xmin2_index]
 30 |             first_v = xmin2_index
 31 |     else:
 32 |         reorder_xy_list[0] = xy_list[xmin1_index]
 33 |         first_v = xmin1_index
 34 |     # connect the first point to others, the third point on the other side of
 35 |     # the line with the middle slope
 36 |     others = list(range(4))
 37 |     others.remove(first_v)
 38 |     k = np.zeros((len(others),))
 39 |     for index, i in zip(others, range(len(others))):
 40 |         k[i] = (xy_list[index, 1] - xy_list[first_v, 1]) \
 41 |                     / (xy_list[index, 0] - xy_list[first_v, 0] + cfg.epsilon)
 42 |     k_mid = np.argsort(k)[1]
 43 |     third_v = others[k_mid]
 44 |     reorder_xy_list[2] = xy_list[third_v]
 45 |     # determine the second point which on the bigger side of the middle line
 46 |     others.remove(third_v)
 47 |     b_mid = xy_list[first_v, 1] - k[k_mid] * xy_list[first_v, 0]
 48 |     second_v, fourth_v = 0, 0
 49 |     for index, i in zip(others, range(len(others))):
 50 |         # delta = y - (k * x + b)
 51 |         delta_y = xy_list[index, 1] - (k[k_mid] * xy_list[index, 0] + b_mid)
 52 |         if delta_y > 0:
 53 |             second_v = index
 54 |         else:
 55 |             fourth_v = index
 56 |     reorder_xy_list[1] = xy_list[second_v]
 57 |     reorder_xy_list[3] = xy_list[fourth_v]
 58 |     # compare slope of 13 and 24, determine the final order
 59 |     k13 = k[k_mid]
 60 |     k24 = (xy_list[second_v, 1] - xy_list[fourth_v, 1]) / (
 61 |                 xy_list[second_v, 0] - xy_list[fourth_v, 0] + cfg.epsilon)
 62 |     if k13 < k24:
 63 |         tmp_x, tmp_y = reorder_xy_list[3, 0], reorder_xy_list[3, 1]
 64 |         for i in range(2, -1, -1):
 65 |             reorder_xy_list[i + 1] = reorder_xy_list[i]
 66 |         reorder_xy_list[0, 0], reorder_xy_list[0, 1] = tmp_x, tmp_y
 67 |     return reorder_xy_list
 68 | 
 69 | 
 70 | def resize_image(im, max_img_size=cfg.image_size):
 71 |     im_width = np.minimum(im.width, max_img_size)
 72 |     if im_width == max_img_size < im.width:
 73 |         im_height = int((im_width / im.width) * im.height)
 74 |     else:
 75 |         im_height = im.height
 76 |     o_height = np.minimum(im_height, max_img_size)
 77 |     if o_height == max_img_size < im_height:
 78 |         o_width = int((o_height / im_height) * im_width)
 79 |     else:
 80 |         o_width = im_width
 81 |     d_wight = o_width - (o_width % 32)
 82 |     d_height = o_height - (o_height % 32)
 83 |     return d_wight, d_height
 84 | 
 85 | 
 86 | def preprocess():
 87 |     data_dir = cfg.data_dir
 88 |     origin_image_dir = os.path.join(data_dir, cfg.origin_image_dir_name)
 89 |     origin_txt_dir = os.path.join(data_dir, cfg.origin_txt_dir_name)
 90 |     train_image_dir = os.path.join(data_dir, cfg.train_image_dir_name)
 91 |     train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name)
 92 |     if not os.path.exists(train_image_dir):
 93 |         os.mkdir(train_image_dir)
 94 |     if not os.path.exists(train_label_dir):
 95 |         os.mkdir(train_label_dir)
 96 |     draw_gt_quad = cfg.draw_gt_quad
 97 |     show_gt_image_dir = os.path.join(data_dir, cfg.show_gt_image_dir_name)
 98 |     if not os.path.exists(show_gt_image_dir):
 99 |         os.mkdir(show_gt_image_dir)
100 |     show_act_image_dir = os.path.join(cfg.data_dir, cfg.show_act_image_dir_name)
101 |     if not os.path.exists(show_act_image_dir):
102 |         os.mkdir(show_act_image_dir)
103 | 
104 |     o_img_list = os.listdir(origin_image_dir)
105 |     print('found %d origin images.' % len(o_img_list))
106 |     train_val_set = []
107 |     for o_img_fname, _ in zip(o_img_list, tqdm(range(len(o_img_list)))):
108 |         with Image.open(os.path.join(origin_image_dir, o_img_fname)) as im:
109 |             # d_wight, d_height = resize_image(im)
110 |             d_wight, d_height = cfg.image_size, cfg.image_size
111 |             scale_ratio_w = d_wight / im.width
112 |             scale_ratio_h = d_height / im.height
113 |             try:
114 |                 im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
115 |                 show_gt_im = im.copy()
116 |                 # draw on the img
117 |                 draw = ImageDraw.Draw(show_gt_im)
118 |                 with open(os.path.join(origin_txt_dir,
119 |                                    o_img_fname[:-4] + '.txt'), 'r') as f:
120 |                     anno_list = f.readlines()
121 |                 xy_list_array = np.zeros((len(anno_list), 4, 2))
122 |                 for anno, i in zip(anno_list, range(len(anno_list))):
123 |                     anno_colums = anno.strip().split(',')
124 |                     anno_array = np.array(anno_colums)
125 |                     xy_list = np.reshape(anno_array[:8].astype(float), (4, 2))
126 |                     xy_list[:, 0] = xy_list[:, 0] * scale_ratio_w
127 |                     xy_list[:, 1] = xy_list[:, 1] * scale_ratio_h
128 |                     xy_list = reorder_vertexes(xy_list)
129 |                     xy_list_array[i] = xy_list
130 |                     _, shrink_xy_list, _ = shrink(xy_list, cfg.shrink_ratio)
131 |                     shrink_1, _, long_edge = shrink(xy_list, cfg.shrink_side_ratio)
132 |                     if draw_gt_quad:
133 |                         draw.line([tuple(xy_list[0]), tuple(xy_list[1]),
134 |                                tuple(xy_list[2]), tuple(xy_list[3]),
135 |                                tuple(xy_list[0])
136 |                                ],
137 |                                width=2, fill='green')
138 |                         draw.line([tuple(shrink_xy_list[0]),
139 |                                tuple(shrink_xy_list[1]),
140 |                                tuple(shrink_xy_list[2]),
141 |                                tuple(shrink_xy_list[3]),
142 |                                tuple(shrink_xy_list[0])
143 |                                ],
144 |                                width=2, fill='blue')
145 |                         vs = [[[0, 0, 3, 3, 0], [1, 1, 2, 2, 1]],
146 |                              [[0, 0, 1, 1, 0], [2, 2, 3, 3, 2]]]
147 |                         for q_th in range(2):
148 |                             draw.line([tuple(xy_list[vs[long_edge][q_th][0]]),
149 |                                    tuple(shrink_1[vs[long_edge][q_th][1]]),
150 |                                    tuple(shrink_1[vs[long_edge][q_th][2]]),
151 |                                    tuple(xy_list[vs[long_edge][q_th][3]]),
152 |                                    tuple(xy_list[vs[long_edge][q_th][4]])],
153 |                                    width=3, fill='yellow')
154 |                 if cfg.gen_origin_img:
155 |                     im.save(os.path.join(train_image_dir, o_img_fname))
156 |                 np.save(os.path.join(
157 |                     train_label_dir,
158 |                     o_img_fname[:-4] + '.npy'),
159 |                     xy_list_array)
160 |                 if draw_gt_quad:
161 |                     show_gt_im.save(os.path.join(show_gt_image_dir, o_img_fname))
162 |                 train_val_set.append('{},{},{}\n'.format(o_img_fname,
163 |                                                      d_wight,
164 |                                                      d_height))
165 |             except Exception:
166 |                 pass
167 | 
168 |     train_img_list = os.listdir(train_image_dir)
169 |     print('found %d train images.' % len(train_img_list))
170 |     train_label_list = os.listdir(train_label_dir)
171 |     print('found %d train labels.' % len(train_label_list))
172 | 
173 |     random.shuffle(train_val_set)
174 |     val_count = int(cfg.validation_split_ratio * len(train_val_set))
175 |     with open(os.path.join(data_dir, cfg.val_fname), 'w') as f_val:
176 |         f_val.writelines(train_val_set[:val_count])
177 |     with open(os.path.join(data_dir, cfg.train_fname), 'w') as f_train:
178 |         f_train.writelines(train_val_set[val_count:])
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     preprocess()
183 | 


--------------------------------------------------------------------------------