├── .gitignore
├── README.md
├── bin
    ├── docker.build
    ├── docker.train
    ├── pred.sh
    ├── tboard.sh
    └── train.sh
├── conf.py
├── config
    ├── Dockerfile
    ├── charset.4100.txt
    ├── charset.txt
    ├── memory_usage.xls
    ├── pip.conf
    └── sources.list
├── main
    ├── __init__.py
    ├── pred.py
    └── train.py
├── network
    ├── __init__.py
    ├── layers
    │   ├── __init__.py
    │   ├── class_branch_layer.py
    │   ├── fcn_layer.py
    │   ├── geometry_branch_layer.py
    │   └── word_formation_layer.py
    └── model.py
├── requirements.txt
├── test
    ├── __init__.py
    ├── gaussian_filter.py
    ├── make_decouple_map.py
    ├── test_accuracy.py
    ├── test_call.py
    ├── test_customized_layer.py
    ├── test_draw_charactor_segment.py
    ├── test_file_process.py
    ├── test_heirachy.py
    ├── test_image_process.py
    ├── test_krnn.py
    ├── test_label_maker.py
    ├── test_summary_image.py
    └── test_tensor_process.py
└── utils
    ├── __init__.py
    ├── image_utils.py
    ├── label
        ├── __init__.py
        ├── label.py
        ├── label_maker.py
        └── label_utils.py
    ├── logger.py
    ├── sequence.py
    ├── util.py
    ├── val_sequence.py
    └── visualise_callback.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | logs/
 2 | 
 3 | data/
 4 | data/*
 5 | data
 6 | 
 7 | .idea
 8 | *.pyc
 9 | */*.pyc
10 | __pycache__
11 | model
12 | 
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is the [Text Scanner paper](https://arxiv.org/abs/1912.12422) implementation code.
 2 | 
 3 | About the paper unstanding, please read about it at my blog : [TextScanner的一些研究](http://www.piginzoo.com/machine-learning/2020/04/14/ocr-fa-textscanner) 
 4 | 
 5 | # implementation list(ongoing...)
 6 | - [X] implement the network
 7 | - [ ] implement the mutual-supervision mechanism
 8 | - [X] implement loss function
 9 | - [X] create the character annotation GT, and prepare none character level GT
10 | - [X] implement train code
11 | - [X] implement evaluation code
12 | - [X] train the model
13 | 
14 | # developing logs
15 | - 2020.4.24 create the project and implement the skeleton of the project
16 | - 2020.4.30 implement the network code, and finish the GT generator and loss function
17 | - 2020.5.12 the network works now after hundreds of trouble-shootings,TF2.0/tk.keras is full of pit
18 | - 2020.6.03 make a [new branch](https://github.com/piginzoo/textscanner/tree/b_troubleshooting_OOM) to solave the OOM issue
19 | 
20 | # Branches
21 | - *[b_troubleshooting_OOM](https://github.com/piginzoo/textscanner/tree/b_troubleshooting_OOM): This try to fix the GPU OOM issue,
22 | Currently I work on this branch mainly.<------*
23 | - [b_wordform_in_model](https://github.com/piginzoo/textscanner/tree/b_wordform_in_model): Implement word formulation as internal layer of model, 
24 | this branch did not consider the OOM issue(reduce the charsets size), focus on the elegance of code implementation. 
25 | - [b_multiple_gpus_train](https://github.com/piginzoo/textscanner/tree/b_multiple_gpus_train): implements the multiple gpus training
26 | 
27 | # implement details
28 | Developing detail can be tracked by my [textscanner implementation issues](https://www.notion.so/piginzoospace/Textscanner-254a700668714f0d811afe2ab8124046).


--------------------------------------------------------------------------------
/bin/docker.build:
--------------------------------------------------------------------------------
 1 | if [ "$1" == "proxy" ]; then
 2 |     echo "Run training with proxy"
 3 |     docker build \
 4 | 	--no-cache \
 5 |         --network host \
 6 |         --build-arg http_proxy="http://172.17.0.1:8123" \
 7 |         --build-arg https_proxy="http://172.17.0.1:8123" \
 8 |         --build-arg HTTP_PROXY="http://172.17.0.1:8123" \
 9 |         --build-arg HTTPS_PROXY="http://172.17.0.1:8123" \
10 |         -f config/Dockerfile \
11 |         -t textscanner.img .
12 |     exit
13 | fi
14 | 
15 | docker build -f config/Dockerfile -t textscanner.img .
16 | 


--------------------------------------------------------------------------------
/bin/docker.train:
--------------------------------------------------------------------------------
 1 | if [ "$1" == "" ]; then
 2 |     echo "Usage: bin/docker.train #GPU <console>"
 3 |     exit
 4 | fi
 5 | 
 6 | PWD=`pwd`
 7 | echo $PWD
 8 | docker run --rm -it \
 9 |     -e NVIDIA_VISIBLE_DEVICES=$1 \
10 |     -t --rm  \
11 |     --runtime=nvidia  \
12 |     --mount type=bind,source=$PWD,target=/root/textscanner \
13 |     --mount type=bind,source=/root/.keras,target=/root/.keras \
14 |     --mount type=bind,source=/app/data/textscanner/20200602_syntext/,target=/root/textscanner/data \
15 |     --name textscanner \
16 |     --workdir /root/textscanner \
17 |     textscanner.img \
18 |     bin/train.sh $2
19 | 


--------------------------------------------------------------------------------
/bin/pred.sh:
--------------------------------------------------------------------------------
1 | if [ "$1" == "" ] || [ "$1" == "help" ]; then
2 |     echo "命令格式："
3 |     echo "\tpred.sh <image> <model>"
4 |     exit
5 | fi
6 | 
7 | echo "开始预测"
8 | 
9 | python -m main.pred $1 $2


--------------------------------------------------------------------------------
/bin/tboard.sh:
--------------------------------------------------------------------------------
1 | if [ "$1" == "" ] then;
2 |     tboard.sh <port>
3 |     exit
4 | fi
5 | 
6 | nohup /root/py3/bin/tensorboard --port=$1 --logdir=./logs/tboard >/dev/null 2>&1 &
7 | 


--------------------------------------------------------------------------------
/bin/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 参数说明：
 3 | # python -m main.train \
 4 | #    --name=attention_ocr \
 5 | #    --epochs=200 \                 # 200个epochs，但是不一定能跑完，因为由ealy stop
 6 | #    --steps_per_epoch=1000 \       # 每个epoch对应的批次数，其实应该是总样本数/批次数，但是我们的样本上百万，太慢，所以，我们只去1000个批次
 7 | #                                   # 作为一个epoch，为何要这样呢？因为只有每个epoch结束，keras才回调，包括validate、ealystop等
 8 | #    --batch=64 \
 9 | #    --learning_rate=0.001 \
10 | #    --validation_batch=64 \
11 | #    --retrain=True \               # 从新训练，还是从checkpoint中继续训练
12 | #    --validation_steps=10 \        # 这个是说你测试几个批次，steps这个词不好听，应该是batchs，实际上可以算出来，共测试64x10=640个样本
13 | #    --workers=10 \
14 | #    --preprocess_num=100 \
15 | #    --early_stop=10 \              # 如果10个epochs都没提高，就停了吧，大概是1万个batch
16 | 
17 | echo "Begin to train ..."
18 | 
19 | Date=$(date +%Y%m%d%H%M)
20 | export CUDA_VISIBLE_DEVICES=0
21 | 
22 | 
23 | if [ "$1" == "console" ] || [ "$1" == "debug" ]; then
24 | 
25 |     if [ "$1" == "debug" ]; then
26 |         echo "_/_/_/_/_/_/  Start PDB Debugging...  _/_/_/_/_/_/"
27 |         sed -i '1i\import pdb; pdb.set_trace()\n' main/train.py
28 |     fi
29 | 
30 |     echo "In DEBUG mode ..."
31 |     #    --validation_steps=1  \
32 |     # 测试：
33 |     # 训练：10张训练，但是steps_per_epoch=2，batch=3，预想6张后，就会重新shuffle
34 |     # 验证：使用sequence是不需要要validation_steps参数的，他会自己算，len(data)/batch
35 |     #      如果你规定，那就得比它小才可以，另外还要验证，是不是把每个批次的结果做平均，还是算整体的
36 |     export CUDA_VISIBLE_DEVICES=0 # 调试不用GPU
37 |     python -m main.train \
38 |     --name=textscanner \
39 |     --epochs=1 \
40 |     --debug_mode \
41 |     --debug_step=1 \
42 |     --steps_per_epoch=1 \
43 |     --batch=3 \
44 |     --retrain=True \
45 |     --learning_rate=0.001 \
46 |     --train_label_dir=data/test \
47 |     --validate_label_dir=data/test \
48 |     --validation_batch=1 \
49 |     --validation_steps=1 \
50 |     --preprocess_num=1 \
51 |     --workers=3 \
52 |     --early_stop=1
53 | 
54 |     if [ "$1" == "debug" ]; then
55 |         # 恢复源文件，防止git提交
56 |         sed -i '1d' main/train.py
57 |     fi
58 | 
59 |     exit
60 | fi
61 | 
62 | if [ "$1" = "stop" ]; then
63 |     echo "Stop Training!"
64 |     ps aux|grep python|grep name=textscanner|awk '{print $2}'|xargs kill -9
65 |     exit
66 | fi
67 | 
68 | 
69 | echo "Production Mode ..."
70 | echo "Using #$CUDA_VISIBLE_DEVICES GPU"
71 | 
72 | nohup python -m main.train \
73 |     --name=textscanner \
74 |     --steps_per_epoch=1000 \
75 |     --epochs=5000000 \
76 |     --debug_step=1000 \
77 |     --batch=32 \
78 |     --retrain=True \
79 |     --learning_rate=0.001 \
80 |     --validation_batch=64 \
81 |     --validation_steps=10 \
82 |     --workers=10 \
83 |     --early_stop=100 \
84 |     >> ./logs/Attention_GPU$CUDA_VISIBLE_DEVICES_$Date.log 2>&1 &
85 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
 1 | import argparse,sys
 2 | 
 3 | '''
 4 |     define the basic configuration parameters,
 5 |     also define one command-lines argument parsing method: init_args
 6 | '''
 7 | MAX_SEQUENCE = 30        # 最大的识别汉字的长度
 8 | MASK_VALUE = 0
 9 | CHARSET = "config/charset.4100.txt" # 一级字库+标点符号+数字+二级字库中的地名/人名常用字（TianT.制作的）
10 | INPUT_IMAGE_HEIGHT = 64  # 图像归一化的高度
11 | INPUT_IMAGE_WIDTH = 256  # 最大的图像宽度
12 | GRU_HIDDEN_SIZE = 64     # GRU隐含层神经元数量
13 | FEATURE_MAP_REDUCE = 8   # 相比原始图片，feature map缩小几倍（送入bi-gru的解码器之前的feature map），目前是8，因为用的resnet50，缩小8倍
14 | FILTER_NUM = 64          # 自定义层中的默认隐含神经元的个数
15 | 
16 | DEBUG = True
17 | 
18 | DIR_LOGS="logs"
19 | DIR_TBOARD="logs/tboard"
20 | DIR_MODEL="model"
21 | DIR_CHECKPOINT="model/checkpoint"
22 | LABLE_FORMAT="plaintext" # 标签格式：labelme，json格式的；plaintext，纯文本的
23 | 
24 | # 伐喜欢tensorflow的flags方式，使用朴素的argparse
25 | # dislike the flags style of tensorflow, instead using argparse
26 | def init_args():
27 |     parser = argparse.ArgumentParser()
28 |     parser.add_argument("--name" ,default="attention_ocr",type=str,help="")
29 |     parser.add_argument("--train_label_dir",    default="data/train",    type=str, help="")
30 |     parser.add_argument("--validate_label_dir", default="data/train", type=str, help="")
31 |     parser.add_argument("--train_label_file",    default="data/train/train.txt", type=str, help="")
32 |     parser.add_argument("--validate_label_file", default="data/train/train.txt", type=str, help="")
33 |     parser.add_argument("--epochs" ,default=1,type=int,help="")
34 |     parser.add_argument("--debug_mode", default=False, action='store_true', help="")
35 |     parser.add_argument("--debug_step", default=1,type=int,help="") # 多少步骤打印注意力
36 |     parser.add_argument("--steps_per_epoch", default=None,type=int,help="")
37 |     parser.add_argument("--batch" , default=1,type=int,help="")
38 |     parser.add_argument("--learning_rate", default=0.001, type=float, help="")
39 |     parser.add_argument("--workers",default=1,type=int,help="")
40 |     parser.add_argument("--retrain", default=False, type=bool, help="")
41 |     parser.add_argument("--preprocess_num",default=None,type=int,help="") # 整个数据的个数，用于调试，None就是所有样本
42 |     parser.add_argument("--validation_steps",default=1,type=int, help="")
43 |     parser.add_argument("--validation_batch",default=1,type=int, help="")
44 |     parser.add_argument("--early_stop", default=1, type=int, help="")
45 |     args = parser.parse_args()
46 |     print("==============================")
47 |     print(" Configurations : ")
48 |     print("==============================")
49 |     print(args)
50 | 
51 |     sys.modules[__name__].DEBUG = args.debug_mode
52 | 
53 |     # if args.debug_mode:
54 |     #     print("Running in DEBUG mode!")
55 |     #     sys.modules[__name__].FILTER_NUM = 1
56 | 
57 |     return args
58 | 
59 | 
60 | def init_pred_args():
61 |     parser = argparse.ArgumentParser()
62 |     parser.add_argument("--image" ,default=1,type=str,help="")
63 |     parser.add_argument("--model" ,default=1,type=str,help="")
64 |     args = parser.parse_args()
65 |     return args
66 | 


--------------------------------------------------------------------------------
/config/Dockerfile:
--------------------------------------------------------------------------------
 1 | From tensorflow/tensorflow:2.1.0-gpu-py3
 2 | 
 3 | MAINTAINER piginzoo
 4 | 
 5 | RUN cp /etc/apt/sources.list /etc/apt/sources.list.backup
 6 | ADD config/sources.list /etc/apt/sources.list
 7 | RUN apt-get update
 8 | RUN apt-get install -y vim build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
 9 | 
10 | RUN mkdir /root/.pip
11 | ADD config/pip.conf /root/.pip
12 | ADD requirements.txt /root/requirements.txt
13 | RUN pip install -r /root/requirements.txt
14 | 


--------------------------------------------------------------------------------
/config/charset.4100.txt:
--------------------------------------------------------------------------------
1 | 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
2 | !@#$%^&*()-_+=×{}[]|\<>,.;:?/"'~
3 | 《》①②③④⑤⑥⑦⑧⑨⑩【】。、“”‘’°￥○●□■
4 | 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋靶把耙坝霸罢爸白柏百摆佰败拜稗斑班搬扳般颁板版扮拌伴瓣半办绊邦帮梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥薄雹保堡饱宝抱报暴豹鲍爆杯碑悲卑北辈背贝 钡倍狈备惫焙被奔苯本笨崩绷甭泵蹦迸逼鼻比鄙笔彼碧蓖蔽毕毙毖币庇痹闭敝弊必辟壁臂避陛鞭边编贬扁便变卞辨辩辫遍标彪膘表鳖憋别瘪彬斌濒滨宾摈兵冰柄丙秉饼炳病并玻菠播拨钵波博勃搏铂箔伯帛舶脖膊渤泊驳捕卜哺补埠不布步簿部怖擦猜裁材才财睬踩采彩菜蔡餐参蚕残惭惨灿苍舱仓沧藏操糙槽曹草厕策侧册测层蹭插叉茬茶查碴搽察岔差诧拆柴豺搀掺蝉馋谗缠铲产阐颤昌猖场尝常长偿肠厂敞畅唱倡超抄钞朝嘲潮巢吵炒车扯撤掣彻澈郴臣辰尘晨忱沉陈趁衬撑称城橙成呈乘程惩澄诚承逞骋秤吃痴持匙池迟弛驰耻齿侈尺赤翅斥炽充冲虫崇宠抽酬畴踌稠愁筹仇绸瞅丑臭初出橱厨躇锄雏滁除楚础储矗搐触处揣川穿椽传船喘串疮窗幢床闯创吹炊捶锤垂春椿醇唇淳纯蠢戳绰疵茨磁雌辞慈瓷词此刺赐次聪葱囱匆从丛凑粗醋簇促蹿篡窜摧崔催脆瘁粹淬翠村存寸磋撮搓措挫错搭达答瘩打大呆歹傣戴带殆代贷袋待逮怠耽担丹单郸掸胆旦氮但惮淡诞弹蛋当挡党荡档刀捣蹈倒岛祷导到稻悼道盗德得的蹬灯登等瞪凳邓堤低滴迪敌笛狄涤翟嫡抵底地蒂第帝弟递缔颠掂滇碘点典靛垫电佃甸店惦奠淀殿碉叼雕凋刁掉吊钓调跌爹碟蝶迭谍叠丁盯叮钉顶鼎锭定订丢东冬董懂动栋侗恫冻洞兜抖斗陡豆逗痘都督毒犊独读堵睹赌杜镀肚度渡妒端短锻段断缎堆兑队对墩吨蹲敦顿囤钝盾遁掇哆多夺垛躲朵跺舵剁惰堕蛾峨鹅俄额讹娥恶厄扼遏鄂饿恩而儿耳尔饵洱二贰发罚筏伐乏阀法珐藩帆番翻樊矾钒繁凡烦反返范贩犯饭泛坊芳方肪房防妨仿访纺放菲非啡飞肥匪诽吠肺废沸费芬酚吩氛分纷坟焚汾粉奋份忿愤粪丰封枫蜂峰锋风疯烽逢冯缝讽奉凤佛否夫敷肤孵扶拂辐幅氟符伏俘服浮涪福袱弗甫抚辅俯釜斧脯腑府腐赴副覆赋复傅付阜父腹负富讣附妇缚咐噶嘎该改概钙盖溉干甘杆柑竿肝赶感秆敢赣冈刚钢缸肛纲岗港杠篙皋高膏羔糕搞镐稿告哥歌搁戈鸽胳疙割革葛格蛤阁隔铬个各给根跟耕更庚羹埂耿梗工攻功恭龚供躬公宫弓巩汞拱贡共钩勾沟苟狗垢构购够辜菇咕箍估沽孤姑鼓古蛊骨谷股故顾固雇刮瓜剐寡挂褂乖拐怪棺关官冠观管馆罐惯灌贯光广逛瑰规圭硅归龟闺轨鬼诡癸桂柜跪贵刽辊滚棍锅郭国果裹过哈骸孩海氦亥害骇酣憨邯韩含涵寒函喊罕翰撼捍旱憾悍焊汗汉夯杭航壕嚎豪毫郝好耗号浩呵喝荷菏核禾和何合盒貉阂河涸赫褐鹤贺嘿黑痕很狠恨哼亨横衡恒轰哄烘虹鸿洪宏弘红喉侯猴吼厚候后呼乎忽瑚壶葫胡蝴狐糊湖弧虎唬护互沪户花哗华猾滑画划化话槐徊怀淮坏欢环桓还缓换患唤痪豢焕涣宦幻荒慌黄磺蝗簧皇凰惶煌晃幌恍谎灰挥辉徽恢蛔回毁悔慧卉惠晦贿秽会烩汇讳诲绘荤昏婚魂浑混豁活伙火获或惑霍货祸击圾基机畸稽积箕肌饥迹激讥鸡姬绩缉吉极棘辑籍集及急疾汲即嫉级挤几脊己蓟技冀季伎祭剂悸济寄寂计记既忌际妓继纪嘉枷夹佳家加荚颊贾甲钾假稼价架驾嫁歼监坚尖笺间煎兼肩艰奸缄茧检柬碱硷拣捡简俭剪减荐槛鉴践贱见键箭件健舰剑饯渐溅涧建僵姜将浆江疆蒋桨奖讲匠酱降蕉椒礁焦胶交郊浇骄娇嚼搅铰矫侥脚狡角饺缴绞剿教酵轿较叫窖揭接皆秸街阶截劫节桔杰捷睫竭洁结解姐戒藉芥界借介疥诫届巾筋斤金今津襟紧锦仅谨进靳晋禁近烬浸尽劲荆兢茎睛晶鲸京惊精粳经井警景颈静境敬镜径痉靖竟竞净炯窘揪究纠玖韭久灸九酒厩救旧臼舅咎就疚鞠拘狙疽居驹菊局咀矩举沮聚拒据巨具距踞锯俱句惧炬剧捐鹃娟倦眷卷绢撅攫抉掘倔爵觉决诀绝均菌钧军君峻俊竣浚郡骏喀咖卡咯开揩楷凯慨刊堪勘坎砍看康慷糠扛抗亢炕考拷烤靠坷苛柯棵磕颗科壳咳可渴克刻客课肯啃垦恳坑吭空恐孔控抠口扣寇枯哭窟苦酷库裤夸垮挎跨胯块筷侩快宽款匡筐狂框矿眶旷况亏盔岿窥葵奎魁傀馈愧溃坤昆捆困括扩廓阔垃拉喇蜡腊辣啦莱来赖蓝婪栏拦篮阑兰澜谰揽览懒缆烂滥琅榔狼廊郎朗浪捞劳牢老佬姥酪烙涝勒乐雷镭蕾磊累儡垒擂肋类泪棱楞冷厘梨犁黎篱狸离漓理李里鲤礼莉荔吏栗丽厉励砾历利傈例俐痢立粒沥隶力璃哩俩联莲连镰廉怜涟帘敛脸链恋炼练粮凉梁粱良两辆量晾亮谅撩聊僚疗燎寥辽潦了撂镣廖料列裂烈劣猎琳林磷霖临邻鳞淋凛赁吝拎玲菱零龄铃伶羚凌灵陵岭领另令溜琉榴硫馏留刘瘤流柳六龙聋咙笼窿隆垄拢陇楼娄搂篓漏陋芦卢颅庐炉掳卤虏鲁麓碌露路赂鹿潞禄录陆戮驴吕铝侣旅履屡缕虑氯律率滤绿峦挛孪滦卵乱掠略抡轮伦仑沦纶论萝螺罗逻锣箩骡裸落洛骆络妈麻玛码蚂马骂嘛吗埋买麦卖迈脉瞒馒蛮满蔓曼慢漫谩芒茫盲氓忙莽猫茅锚毛矛铆卯茂冒帽貌贸么玫枚梅酶霉煤没眉媒镁每美昧寐妹媚门闷们萌蒙檬盟锰猛梦孟眯醚靡糜迷谜弥米秘觅泌蜜密幂棉眠绵冕免勉娩缅面苗描瞄藐秒渺庙妙蔑灭民抿皿敏悯闽明螟鸣铭名命谬摸摹蘑模膜磨摩魔抹末莫墨默沫漠寞陌谋牟某拇牡亩姆母墓暮幕募慕木目睦牧穆拿哪呐钠那娜纳氖乃奶耐奈南男难囊挠脑恼闹淖呢馁内嫩能妮霓倪泥尼拟你匿腻逆溺蔫拈年碾撵捻念娘酿鸟尿捏聂孽啮镊镍涅您柠狞凝宁拧泞牛扭钮纽脓浓农弄奴努怒女暖虐疟挪懦糯诺哦欧鸥殴藕呕偶沤啪趴爬帕怕琶拍排牌徘湃派攀潘盘磐盼畔判叛乓庞旁耪胖抛咆刨炮袍跑泡呸胚培裴赔陪配佩沛喷盆砰抨烹澎彭蓬棚硼篷膨朋鹏捧碰坯砒霹批披劈琵毗啤脾疲皮匹痞僻屁譬篇偏片骗飘漂瓢票撇瞥拼频贫品聘乒坪苹萍平凭瓶评屏坡泼颇婆破魄迫粕剖扑铺仆莆葡菩蒲埔朴圃普浦谱曝瀑期欺栖戚妻七凄漆柒沏其棋奇歧畦崎脐齐旗祈祁骑起岂乞企启契砌器气迄弃汽泣讫掐恰洽牵扦钎铅千迁签仟谦乾黔钱钳前潜遣浅谴堑嵌欠歉枪呛腔羌墙蔷强抢橇锹敲悄桥瞧乔侨巧鞘撬翘峭俏窍切茄且怯窃钦侵亲秦琴勤芹擒禽寝沁青轻氢倾卿清擎晴氰情顷请庆琼穷秋丘邱球求囚酋泅趋区蛆曲躯屈驱渠取娶龋趣去圈颧权醛泉全痊拳犬券劝缺炔瘸却鹊榷确雀裙群然燃冉染瓤壤攘嚷让饶扰绕惹热壬仁人忍韧任认刃妊纫扔仍日戎茸蓉荣融熔溶容绒冗揉柔肉茹蠕儒孺如辱乳汝入褥软阮蕊瑞锐闰润若弱撒洒萨腮鳃塞赛三叁伞散桑嗓丧搔骚扫嫂瑟色涩森僧莎砂杀刹沙纱傻啥煞筛晒珊苫杉山删煽衫闪陕擅赡膳善汕扇缮墒伤商赏晌上尚裳梢捎稍烧芍勺韶少哨邵绍奢赊蛇舌舍赦摄射慑涉社设砷申呻伸身深娠绅神沈审婶甚肾慎渗声生甥牲升绳省盛剩胜圣师失狮施湿诗尸虱十石拾时什食蚀实识史矢使屎驶始式示士世柿事拭誓逝势是嗜噬适仕侍释饰氏市恃室视试收手首守寿授售受瘦兽蔬枢梳殊抒输叔舒淑疏书赎孰熟薯暑曙署蜀黍鼠属术述树束戍竖墅庶数漱恕刷耍摔衰甩帅栓拴霜双爽谁水睡税吮瞬顺舜说硕朔烁斯撕嘶思私司丝死肆寺嗣四伺似饲巳松耸怂颂送宋讼诵搜艘擞嗽苏酥俗素速粟僳塑溯宿诉肃酸蒜算虽隋随绥髓碎岁穗遂隧祟孙损笋蓑梭唆缩琐索锁所塌他它她塔獭挞蹋踏胎苔抬台泰酞太态汰坍摊贪瘫滩坛檀痰潭谭谈坦毯袒碳探叹炭汤塘搪堂棠膛唐糖倘躺淌趟烫掏涛滔绦萄桃逃淘陶讨套特藤腾疼誊梯剔踢锑提题蹄啼体替嚏惕涕剃屉天添填田甜恬舔腆挑条迢眺跳贴铁帖厅听烃汀廷停亭庭挺艇通桐酮瞳同铜彤童桶捅筒统痛偷投头透凸秃突图徒途涂屠土吐兔湍团推颓腿蜕褪退吞屯臀拖托脱鸵陀驮驼椭妥拓唾挖哇蛙洼娃瓦袜歪外豌弯湾玩顽丸烷完碗挽晚皖惋宛婉万腕汪王亡枉网往旺望忘妄威巍微危韦违桅围唯惟为潍维苇萎委伟伪尾纬未蔚味畏胃喂魏位渭谓尉慰卫瘟温蚊文闻纹吻稳紊问嗡翁瓮挝蜗涡窝我斡卧握沃巫呜钨乌污诬屋无芜梧吾吴毋武五捂午舞伍侮坞戊雾晤物勿务悟误昔熙析西硒矽晰嘻吸锡牺稀息希悉膝夕惜熄烯溪汐犀檄袭席习媳喜铣洗系隙戏细瞎虾匣霞辖暇峡侠狭下厦夏吓掀锨先仙鲜纤咸贤衔舷闲涎弦嫌显险现献县腺馅羡宪陷限线相厢镶香箱襄湘乡翔祥详想响享项巷橡像向象萧硝霄削哮嚣销消宵淆晓小孝校肖啸笑效楔些歇蝎鞋协挟携邪斜胁谐写械卸蟹懈泄泻谢屑薪芯锌欣辛新忻心信衅星腥猩惺兴刑型形邢行醒幸杏性姓兄凶胸匈汹雄熊休修羞朽嗅锈秀袖绣墟戌需虚嘘须徐许蓄酗叙旭序畜恤絮婿绪续轩喧宣悬旋玄选癣眩绚靴薛学穴雪血勋熏循旬询寻驯巡殉汛训讯逊迅压押鸦鸭呀丫芽牙蚜崖衙涯雅哑亚讶焉咽阉烟淹盐严研蜒岩延言颜阎炎沿奄掩眼衍演艳堰燕厌砚雁唁彦焰宴谚验殃央鸯秧杨扬佯疡羊洋阳氧仰痒养样漾邀腰妖瑶摇尧遥窑谣姚咬舀药要耀椰噎耶爷野冶也页掖业叶曳腋夜液一壹医揖铱依伊衣颐夷遗移仪胰疑沂宜姨彝椅蚁倚已乙矣以艺抑易邑屹亿役臆逸肄疫亦裔意毅忆义益溢诣议谊译异翼翌绎茵荫因殷音阴姻吟银淫寅饮尹引隐印英樱婴鹰应缨莹萤营荧蝇迎赢盈影颖硬映哟拥佣臃痈庸雍踊蛹咏泳涌永恿勇用幽优悠忧尤由邮铀犹油游酉有友右佑釉诱又幼迂淤于盂榆虞愚舆余俞逾鱼愉渝渔隅予娱雨与屿禹宇语羽玉域芋郁吁遇喻峪御愈欲狱育誉浴寓裕预豫驭鸳渊冤元垣袁原援辕园员圆猿源缘远苑愿怨院曰约越跃钥岳粤月悦阅耘云郧匀陨允运蕴酝晕韵孕匝砸杂栽哉灾宰载再在咱攒暂赞赃脏葬遭糟凿藻枣早澡蚤躁噪造皂灶燥责择则泽贼怎增憎曾赠扎喳渣札轧铡闸眨栅榨咋乍炸诈摘斋宅窄债寨瞻毡詹粘沾盏斩辗崭展蘸栈占战站湛绽樟章彰漳张掌涨杖丈帐账仗胀瘴障招昭找沼赵照罩兆肇召遮折哲蛰辙者锗蔗这浙珍斟真甄砧臻贞针侦枕疹诊震振镇阵蒸挣睁征狰争怔整拯正政帧症郑证芝枝支吱蜘知肢脂汁之织职直植殖执值侄址指止趾只旨纸志挚掷至致置帜峙制智秩稚质炙痔滞治窒中盅忠钟衷终种肿重仲众舟周州洲诌粥轴肘帚咒皱宙昼骤珠株蛛朱猪诸诛逐竹烛煮拄瞩嘱主著柱助蛀贮铸筑住注祝驻抓爪拽专砖转撰赚篆桩庄装妆撞壮状椎锥追赘坠缀谆准捉拙卓桌琢茁酌啄着灼浊兹咨资姿滋淄孜紫仔籽滓子自渍字鬃棕踪宗综总纵邹走奏揍租足卒族祖诅阻组钻纂嘴醉最罪尊遵昨左佐柞做作坐座
5 | 衢亳濮漯圳莞儋泸泗颍佤岚泾潼祜赉桦洮睢沅陉栾涞涿绛溧沭瓯浔嵊婺岱弋谯璧旌柘汶莒荥嵩淇驿澧圩榕岑梓仡麟勐湟坻藁妃蠡骅猗稷芮岢隰磴岫鲅蛟珲讷箐闵邺邳盱眙邗鄞暨缙畲鸠庵濉枞歙黟琊埇砀芗诏濂鄱崂峄滕罘朐兖郯茌莘棣郓鄄杞瀍偃郏陟鄢郾渑淅浉潢硚陂猇秭浠蕲芙浏淞渌攸醴晖耒汨溆芷禺浈濠禅邕覃仫碚綦郫邛崃蔺邡犍沐阆珙筠蓥孚湄阡谟麒蒗濞迦灞鄠岐崆峒岷宕晏坂鄯耆伽
6 | 婷晗鑫祺瑾琪倩媛楠馨缤罡闫昊珂睿瑛裱炜怡妍芸宸缪苡烨畈嘟炫鞫邸摽窦雯薇玮钊淼琦珞佥曦钰煜渎璐姣娅晟恪


--------------------------------------------------------------------------------
/config/charset.txt:
--------------------------------------------------------------------------------
1 | 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()-_+={}[]|\<>,.。;:、?/'"
2 | 《》①②③④⑤⑥⑦⑧⑨⑩【】￥
3 | 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋靶把耙坝霸罢爸白柏百摆佰败拜稗斑班搬扳般颁板版扮拌伴瓣半办绊邦帮梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥薄雹保堡饱宝抱报暴豹鲍爆杯碑悲卑北辈背贝钡倍狈备惫焙被奔苯本笨崩绷甭泵蹦迸逼鼻比鄙笔彼碧蓖蔽毕毙毖币庇痹闭敝弊必辟壁臂避陛鞭边编贬扁便变卞辨辩辫遍标彪膘表鳖憋别瘪彬斌濒滨宾摈兵冰柄丙秉饼炳病并玻菠播拨钵波博勃搏铂箔伯帛舶脖膊渤泊驳捕卜哺补埠不布步簿部怖擦猜裁材才财睬踩采彩菜蔡餐参蚕残惭惨灿苍舱仓沧藏操糙槽曹草厕策侧册测层蹭插叉茬茶查碴搽察岔差诧拆柴豺搀掺蝉馋谗缠铲产阐颤昌猖场尝常长偿肠厂敞畅唱倡超抄钞朝嘲潮巢吵炒车扯撤掣彻澈郴臣辰尘晨忱沉陈趁衬撑称城橙成呈乘程惩澄诚承逞骋秤吃痴持匙池迟弛驰耻齿侈尺赤翅斥炽充冲虫崇宠抽酬畴踌稠愁筹仇绸瞅丑臭初出橱厨躇锄雏滁除楚础储矗搐触处揣川穿椽传船喘串疮窗幢床闯创吹炊捶锤垂春椿醇唇淳纯蠢戳绰疵茨磁雌辞慈瓷词此刺赐次聪葱囱匆从丛凑粗醋簇促蹿篡窜摧崔催脆瘁粹淬翠村存寸磋撮搓措挫错搭达答瘩打大呆歹傣戴带殆代贷袋待逮怠耽担丹单郸掸胆旦氮但惮淡诞弹蛋当挡党荡档刀捣蹈倒岛祷导到稻悼道盗德得的蹬灯登等瞪凳邓堤低滴迪敌笛狄涤翟嫡抵底地蒂第帝弟递缔颠掂滇碘点典靛垫电佃甸店惦奠淀殿碉叼雕凋刁掉吊钓调跌爹碟蝶迭谍叠丁盯叮钉顶鼎锭定订丢东冬董懂动栋侗恫冻洞兜抖斗陡豆逗痘都督毒犊独读堵睹赌杜镀肚度渡妒端短锻段断缎堆兑队对墩吨蹲敦顿囤钝盾遁掇哆多夺垛躲朵跺舵剁惰堕蛾峨鹅俄额讹娥恶厄扼遏鄂饿恩而儿耳尔饵洱二贰发罚筏伐乏阀法珐藩帆番翻樊矾钒繁凡烦反返范贩犯饭泛坊芳方肪房防妨仿访纺放菲非啡飞肥匪诽吠肺废沸费芬酚吩氛分纷坟焚汾粉奋份忿愤粪丰封枫蜂峰锋风疯烽逢冯缝讽奉凤佛否夫敷肤孵扶拂辐幅氟符伏俘服浮涪福袱弗甫抚辅俯釜斧脯腑府腐赴副覆赋复傅付阜父腹负富讣附妇缚咐噶嘎该改概钙盖溉干甘杆柑竿肝赶感秆敢赣冈刚钢缸肛纲岗港杠篙皋高膏羔糕搞镐稿告哥歌搁戈鸽胳疙割革葛格蛤阁隔铬个各给根跟耕更庚羹埂耿梗工攻功恭龚供躬公宫弓巩汞拱贡共钩勾沟苟狗垢构购够辜菇咕箍估沽孤姑鼓古蛊骨谷股故顾固雇刮瓜剐寡挂褂乖拐怪棺关官冠观管馆罐惯灌贯光广逛瑰规圭硅归龟闺轨鬼诡癸桂柜跪贵刽辊滚棍锅郭国果裹过哈骸孩海氦亥害骇酣憨邯韩含涵寒函喊罕翰撼捍旱憾悍焊汗汉夯杭航壕嚎豪毫郝好耗号浩呵喝荷菏核禾和何合盒貉阂河涸赫褐鹤贺嘿黑痕很狠恨哼亨横衡恒轰哄烘虹鸿洪宏弘红喉侯猴吼厚候后呼乎忽瑚壶葫胡蝴狐糊湖弧虎唬护互沪户花哗华猾滑画划化话槐徊怀淮坏欢环桓还缓换患唤痪豢焕涣宦幻荒慌黄磺蝗簧皇凰惶煌晃幌恍谎灰挥辉徽恢蛔回毁悔慧卉惠晦贿秽会烩汇讳诲绘荤昏婚魂浑混豁活伙火获或惑霍货祸击圾基机畸稽积箕肌饥迹激讥鸡姬绩缉吉极棘辑籍集及急疾汲即嫉级挤几脊己蓟技冀季伎祭剂悸济寄寂计记既忌际妓继纪嘉枷夹佳家加荚颊贾甲钾假稼价架驾嫁歼监坚尖笺间煎兼肩艰奸缄茧检柬碱硷拣捡简俭剪减荐槛鉴践贱见键箭件健舰剑饯渐溅涧建僵姜将浆江疆蒋桨奖讲匠酱降蕉椒礁焦胶交郊浇骄娇嚼搅铰矫侥脚狡角饺缴绞剿教酵轿较叫窖揭接皆秸街阶截劫节桔杰捷睫竭洁结解姐戒藉芥界借介疥诫届巾筋斤金今津襟紧锦仅谨进靳晋禁近烬浸尽劲荆兢茎睛晶鲸京惊精粳经井警景颈静境敬镜径痉靖竟竞净炯窘揪究纠玖韭久灸九酒厩救旧臼舅咎就疚鞠拘狙疽居驹菊局咀矩举沮聚拒据巨具距踞锯俱句惧炬剧捐鹃娟倦眷卷绢撅攫抉掘倔爵觉决诀绝均菌钧军君峻俊竣浚郡骏喀咖卡咯开揩楷凯慨刊堪勘坎砍看康慷糠扛抗亢炕考拷烤靠坷苛柯棵磕颗科壳咳可渴克刻客课肯啃垦恳坑吭空恐孔控抠口扣寇枯哭窟苦酷库裤夸垮挎跨胯块筷侩快宽款匡筐狂框矿眶旷况亏盔岿窥葵奎魁傀馈愧溃坤昆捆困括扩廓阔垃拉喇蜡腊辣啦莱来赖蓝婪栏拦篮阑兰澜谰揽览懒缆烂滥琅榔狼廊郎朗浪捞劳牢老佬姥酪烙涝勒乐雷镭蕾磊累儡垒擂肋类泪棱楞冷厘梨犁黎篱狸离漓理李里鲤礼莉荔吏栗丽厉励砾历利傈例俐痢立粒沥隶力璃哩俩联莲连镰廉怜涟帘敛脸链恋炼练粮凉梁粱良两辆量晾亮谅撩聊僚疗燎寥辽潦了撂镣廖料列裂烈劣猎琳林磷霖临邻鳞淋凛赁吝拎玲菱零龄铃伶羚凌灵陵岭领另令溜琉榴硫馏留刘瘤流柳六龙聋咙笼窿隆垄拢陇楼娄搂篓漏陋芦卢颅庐炉掳卤虏鲁麓碌露路赂鹿潞禄录陆戮驴吕铝侣旅履屡缕虑氯律率滤绿峦挛孪滦卵乱掠略抡轮伦仑沦纶论萝螺罗逻锣箩骡裸落洛骆络妈麻玛码蚂马骂嘛吗埋买麦卖迈脉瞒馒蛮满蔓曼慢漫谩芒茫盲氓忙莽猫茅锚毛矛铆卯茂冒帽貌贸么玫枚梅酶霉煤没眉媒镁每美昧寐妹媚门闷们萌蒙檬盟锰猛梦孟眯醚靡糜迷谜弥米秘觅泌蜜密幂棉眠绵冕免勉娩缅面苗描瞄藐秒渺庙妙蔑灭民抿皿敏悯闽明螟鸣铭名命谬摸摹蘑模膜磨摩魔抹末莫墨默沫漠寞陌谋牟某拇牡亩姆母墓暮幕募慕木目睦牧穆拿哪呐钠那娜纳氖乃奶耐奈南男难囊挠脑恼闹淖呢馁内嫩能妮霓倪泥尼拟你匿腻逆溺蔫拈年碾撵捻念娘酿鸟尿捏聂孽啮镊镍涅您柠狞凝宁拧泞牛扭钮纽脓浓农弄奴努怒女暖虐疟挪懦糯诺哦欧鸥殴藕呕偶沤啪趴爬帕怕琶拍排牌徘湃派攀潘盘磐盼畔判叛乓庞旁耪胖抛咆刨炮袍跑泡呸胚培裴赔陪配佩沛喷盆砰抨烹澎彭蓬棚硼篷膨朋鹏捧碰坯砒霹批披劈琵毗啤脾疲皮匹痞僻屁譬篇偏片骗飘漂瓢票撇瞥拼频贫品聘乒坪苹萍平凭瓶评屏坡泼颇婆破魄迫粕剖扑铺仆莆葡菩蒲埔朴圃普浦谱曝瀑期欺栖戚妻七凄漆柒沏其棋奇歧畦崎脐齐旗祈祁骑起岂乞企启契砌器气迄弃汽泣讫掐恰洽牵扦钎铅千迁签仟谦乾黔钱钳前潜遣浅谴堑嵌欠歉枪呛腔羌墙蔷强抢橇锹敲悄桥瞧乔侨巧鞘撬翘峭俏窍切茄且怯窃钦侵亲秦琴勤芹擒禽寝沁青轻氢倾卿清擎晴氰情顷请庆琼穷秋丘邱球求囚酋泅趋区蛆曲躯屈驱渠取娶龋趣去圈颧权醛泉全痊拳犬券劝缺炔瘸却鹊榷确雀裙群然燃冉染瓤壤攘嚷让饶扰绕惹热壬仁人忍韧任认刃妊纫扔仍日戎茸蓉荣融熔溶容绒冗揉柔肉茹蠕儒孺如辱乳汝入褥软阮蕊瑞锐闰润若弱撒洒萨腮鳃塞赛三叁伞散桑嗓丧搔骚扫嫂瑟色涩森僧莎砂杀刹沙纱傻啥煞筛晒珊苫杉山删煽衫闪陕擅赡膳善汕扇缮墒伤商赏晌上尚裳梢捎稍烧芍勺韶少哨邵绍奢赊蛇舌舍赦摄射慑涉社设砷申呻伸身深娠绅神沈审婶甚肾慎渗声生甥牲升绳省盛剩胜圣师失狮施湿诗尸虱十石拾时什食蚀实识史矢使屎驶始式示士世柿事拭誓逝势是嗜噬适仕侍释饰氏市恃室视试收手首守寿授售受瘦兽蔬枢梳殊抒输叔舒淑疏书赎孰熟薯暑曙署蜀黍鼠属术述树束戍竖墅庶数漱恕刷耍摔衰甩帅栓拴霜双爽谁水睡税吮瞬顺舜说硕朔烁斯撕嘶思私司丝死肆寺嗣四伺似饲巳松耸怂颂送宋讼诵搜艘擞嗽苏酥俗素速粟僳塑溯宿诉肃酸蒜算虽隋随绥髓碎岁穗遂隧祟孙损笋蓑梭唆缩琐索锁所塌他它她塔獭挞蹋踏胎苔抬台泰酞太态汰坍摊贪瘫滩坛檀痰潭谭谈坦毯袒碳探叹炭汤塘搪堂棠膛唐糖倘躺淌趟烫掏涛滔绦萄桃逃淘陶讨套特藤腾疼誊梯剔踢锑提题蹄啼体替嚏惕涕剃屉天添填田甜恬舔腆挑条迢眺跳贴铁帖厅听烃汀廷停亭庭挺艇通桐酮瞳同铜彤童桶捅筒统痛偷投头透凸秃突图徒途涂屠土吐兔湍团推颓腿蜕褪退吞屯臀拖托脱鸵陀驮驼椭妥拓唾挖哇蛙洼娃瓦袜歪外豌弯湾玩顽丸烷完碗挽晚皖惋宛婉万腕汪王亡枉网往旺望忘妄威巍微危韦违桅围唯惟为潍维苇萎委伟伪尾纬未蔚味畏胃喂魏位渭谓尉慰卫瘟温蚊文闻纹吻稳紊问嗡翁瓮挝蜗涡窝我斡卧握沃巫呜钨乌污诬屋无芜梧吾吴毋武五捂午舞伍侮坞戊雾晤物勿务悟误昔熙析西硒矽晰嘻吸锡牺稀息希悉膝夕惜熄烯溪汐犀檄袭席习媳喜铣洗系隙戏细瞎虾匣霞辖暇峡侠狭下厦夏吓掀锨先仙鲜纤咸贤衔舷闲涎弦嫌显险现献县腺馅羡宪陷限线相厢镶香箱襄湘乡翔祥详想响享项巷橡像向象萧硝霄削哮嚣销消宵淆晓小孝校肖啸笑效楔些歇蝎鞋协挟携邪斜胁谐写械卸蟹懈泄泻谢屑薪芯锌欣辛新忻心信衅星腥猩惺兴刑型形邢行醒幸杏性姓兄凶胸匈汹雄熊休修羞朽嗅锈秀袖绣墟戌需虚嘘须徐许蓄酗叙旭序畜恤絮婿绪续轩喧宣悬旋玄选癣眩绚靴薛学穴雪血勋熏循旬询寻驯巡殉汛训讯逊迅压押鸦鸭呀丫芽牙蚜崖衙涯雅哑亚讶焉咽阉烟淹盐严研蜒岩延言颜阎炎沿奄掩眼衍演艳堰燕厌砚雁唁彦焰宴谚验殃央鸯秧杨扬佯疡羊洋阳氧仰痒养样漾邀腰妖瑶摇尧遥窑谣姚咬舀药要耀椰噎耶爷野冶也页掖业叶曳腋夜液一壹医揖铱依伊衣颐夷遗移仪胰疑沂宜姨彝椅蚁倚已乙矣以艺抑易邑屹亿役臆逸肄疫亦裔意毅忆义益溢诣议谊译异翼翌绎茵荫因殷音阴姻吟银淫寅饮尹引隐印英樱婴鹰应缨莹萤营荧蝇迎赢盈影颖硬映哟拥佣臃痈庸雍踊蛹咏泳涌永恿勇用幽优悠忧尤由邮铀犹油游酉有友右佑釉诱又幼迂淤于盂榆虞愚舆余俞逾鱼愉渝渔隅予娱雨与屿禹宇语羽玉域芋郁吁遇喻峪御愈欲狱育誉浴寓裕预豫驭鸳渊冤元垣袁原援辕园员圆猿源缘远苑愿怨院曰约越跃钥岳粤月悦阅耘云郧匀陨允运蕴酝晕韵孕匝砸杂栽哉灾宰载再在咱攒暂赞赃脏葬遭糟凿藻枣早澡蚤躁噪造皂灶燥责择则泽贼怎增憎曾赠扎喳渣札轧铡闸眨栅榨咋乍炸诈摘斋宅窄债寨瞻毡詹粘沾盏斩辗崭展蘸栈占战站湛绽樟章彰漳张掌涨杖丈帐账仗胀瘴障招昭找沼赵照罩兆肇召遮折哲蛰辙者锗蔗这浙珍斟真甄砧臻贞针侦枕疹诊震振镇阵蒸挣睁征狰争怔整拯正政帧症郑证芝枝支吱蜘知肢脂汁之织职直植殖执值侄址指止趾只旨纸志挚掷至致置帜峙制智秩稚质炙痔滞治窒中盅忠钟衷终种肿重仲众舟周州洲诌粥轴肘帚咒皱宙昼骤珠株蛛朱猪诸诛逐竹烛煮拄瞩嘱主著柱助蛀贮铸筑住注祝驻抓爪拽专砖转撰赚篆桩庄装妆撞壮状椎锥追赘坠缀谆准捉拙卓桌琢茁酌啄着灼浊兹咨资姿滋淄孜紫仔籽滓子自渍字鬃棕踪宗综总纵邹走奏揍租足卒族祖诅阻组钻纂嘴醉最罪尊遵昨左佐柞做作坐座


--------------------------------------------------------------------------------
/config/memory_usage.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/config/memory_usage.xls


--------------------------------------------------------------------------------
/config/pip.conf:
--------------------------------------------------------------------------------
1 | [global]
2 | index-url = http://pypi.douban.com/simple
3 | [install]
4 | trusted-host=pypi.douban.com
5 | 


--------------------------------------------------------------------------------
/config/sources.list:
--------------------------------------------------------------------------------
 1 | # deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties
 2 | deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted
 3 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties
 4 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted
 5 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties
 6 | deb http://mirrors.aliyun.com/ubuntu/ xenial universe
 7 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
 8 | deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse
 9 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse
10 | deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse
11 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties
12 | deb http://archive.canonical.com/ubuntu xenial partner
13 | deb-src http://archive.canonical.com/ubuntu xenial partner
14 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted
15 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties
16 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe
17 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse


--------------------------------------------------------------------------------
/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/main/__init__.py


--------------------------------------------------------------------------------
/main/pred.py:
--------------------------------------------------------------------------------
  1 | from utils import logger as log,label_utils
  2 | import conf
  3 | from network import model as _model
  4 | import logging,cv2
  5 | import numpy as np
  6 | from tensorflow.keras.utils import to_categorical
  7 | from tensorflow.keras.preprocessing.sequence import pad_sequences
  8 | 
  9 | logger = logging.getLogger("Train")
 10 | 
 11 | def pred(args):
 12 |     charset = label_utils.get_charset(conf.CHARSET)
 13 |     CHARSET_SIZE = len(charset)
 14 | 
 15 |     # 定义模型
 16 |     _,decoder_model,encoder_model = _model.model(conf, args)
 17 | 
 18 |     # 分别加载模型
 19 |     encoder_model.load_model(args.model)
 20 |     decoder_model.load_model(args.model)
 21 |     logger.info("加载了模型：%s", args.model)
 22 | 
 23 |     logger.info("开始预测图片：%s",args.image)
 24 |     image = cv2.imread(args.image)
 25 | 
 26 | 
 27 |     # 编码器先预测
 28 |     encoder_out_states, encoder_fwd_state, encoder_back_state = encoder_model.predict(image)
 29 | 
 30 |     # 准备编码器的初始输入状态
 31 |     decoder_init_state = np.concatenate([encoder_fwd_state, encoder_back_state], axis=-1)
 32 | 
 33 |     attention_weights = []
 34 | 
 35 |     # 开始是STX
 36 |     from utils.label_utils import convert_to_id
 37 |     decoder_index = convert_to_id([conf.CHAR_STX], charset)
 38 |     decoder_state = decoder_init_state
 39 | 
 40 |     result = ""
 41 | 
 42 |     # 开始预测字符
 43 |     for i in range(conf.MAX_SEQUENCE):
 44 | 
 45 |         # 别看又padding啥的，其实就是一个字符，这样做是为了凑输入的维度定义
 46 |         decoder_inputs = pad_sequences(decoder_index, maxlen=conf.MAX_SEQUENCE, padding="post", value=0)
 47 |         decoder_inputs = to_categorical(decoder_inputs,num_classes=CHARSET_SIZE)
 48 | 
 49 |         # infer_decoder_model : Model(inputs=[decoder_inputs, encoder_out_states,decoder_init_state],
 50 |         # outputs=[decoder_pred,attn_states,decoder_state])
 51 |         # encoder_out_states->attention用
 52 |         decoder_out, attention, decoder_state = \
 53 |             decoder_model.predict([decoder_inputs,decoder_state])
 54 | 
 55 |         encoder_out_states
 56 | 
 57 | 
 58 |         # beam search impl
 59 |         max_k_index = decoder_out.argsort()[:3]
 60 |         max_prob = decoder_out[max_k_index]
 61 |         max_labels = label_utils.id2strs(max_k_index) #TODO id2strs
 62 | 
 63 | 
 64 | 
 65 |         # 得到当前时间的输出，是一个3770的概率分布，所以要argmax，得到一个id
 66 |         decoder_index = np.argmax(decoder_out, axis=-1)[0, 0]
 67 | 
 68 | 
 69 | 
 70 | 
 71 |         if decoder_index == 2:
 72 |             logger.info("预测字符为ETX，退出")
 73 |             break #==>conf.CHAR_ETX: break
 74 | 
 75 |         attention_weights.append(attention)
 76 | 
 77 |         pred_char = label_utils.ids2str(decoder_index,charset=charset)
 78 | 
 79 |         logger.info("预测字符为:%s",pred_char)
 80 |         result+= pred_char
 81 | 
 82 |     if len(result)>= conf.MAX_SEQUENCE:
 83 |         logger.debug("预测字符为：%s，达到最大预测长度", result)
 84 |     else:
 85 |         logger.debug("预测字符为：%s，解码最后为ETX", result)
 86 | 
 87 |     return pred_char,attention_weights
 88 | 
 89 | def sents2sequences(tokenizer, sentences, reverse=False, pad_length=None, padding_type='post'):
 90 |     encoded_text = tokenizer.texts_to_sequences(sentences)
 91 |     preproc_text = pad_sequences(encoded_text, padding=padding_type, maxlen=pad_length, value=0)
 92 |     if reverse:
 93 |         preproc_text = np.flip(preproc_text, axis=1)
 94 | 
 95 |     return preproc_text
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     log.init()
103 |     args = conf.init_pred_args()
104 |     result,attention_probs = pred(args)
105 |     logger.info("预测字符串为：%s",result)
106 |     logger.info("注意力概率为：%r", attention_probs)


--------------------------------------------------------------------------------
/main/train.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.callbacks import TensorBoard
 2 | from tensorflow.keras.callbacks import ModelCheckpoint
 3 | from tensorflow.keras.callbacks import EarlyStopping
 4 | from tensorflow.keras.models import load_model
 5 | from network.model import TextScannerModel
 6 | from utils.visualise_callback import TBoardVisual
 7 | from utils.sequence import SequenceData
 8 | from utils.label import label_utils
 9 | from utils import logger as log
10 | from utils import util
11 | import logging
12 | import conf
13 | import os
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def train(args):
19 |     charset = label_utils.get_charset(conf.CHARSET)
20 |     conf.CHARSET_SIZE = len(charset)
21 | 
22 |     model = TextScannerModel(conf, charset)
23 |     model.comile_model()
24 | 
25 |     train_sequence = SequenceData(name="Train",
26 |                                   label_dir=args.train_label_dir,
27 |                                   label_file=args.train_label_file,
28 |                                   charsets=charset,
29 |                                   conf=conf,
30 |                                   args=args,
31 |                                   batch_size=args.batch)
32 |     valid_sequence = SequenceData(name="Validate",
33 |                                   label_dir=args.validate_label_dir,
34 |                                   label_file=args.validate_label_file,
35 |                                   charsets=charset,
36 |                                   conf=conf,
37 |                                   args=args,
38 |                                   batch_size=args.validation_batch)
39 | 
40 |     timestamp = util.timestamp_s()
41 |     tb_log_name = os.path.join(conf.DIR_TBOARD, timestamp)
42 |     # checkpoint_path = conf.DIR_MODEL + "/model-" + timestamp + "-epoch{epoch:03d}-acc{accuracy:.4f}-val{val_accuracy:.4f}.hdf5"
43 |     checkpoint_path = conf.DIR_MODEL + "/model-" + timestamp + "-epoch{epoch:03d}.hdf5"
44 | 
45 |     # 如果checkpoint文件存在，就加载之
46 |     if args.retrain:
47 |         logger.info("Train from beginning ...")
48 |     else:
49 |         logger.info("基于之前的checkpoint训练...")
50 |         _checkpoint_path = util.get_checkpoint(conf.DIR_CHECKPOINT)
51 |         if _checkpoint_path is not None:
52 |             model = load_model(_checkpoint_path)
53 |             logger.info("加载checkpoint模型[%s]", _checkpoint_path)
54 |         else:
55 |             logger.warning("找不到任何checkpoint，重新开始训练")
56 | 
57 |     logger.info("Train begin：")
58 | 
59 |     tboard = TensorBoard(log_dir=tb_log_name, histogram_freq=1, batch_size=2, write_grads=True)
60 |     early_stop = EarlyStopping(patience=args.early_stop, verbose=1, mode='max')
61 |     checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1, mode='max')
62 |     visibility_debug = TBoardVisual('Attetnon Visibility', tb_log_name, charset, args, valid_sequence)
63 | 
64 |     model.fit(
65 |         x=train_sequence,
66 |         steps_per_epoch=args.steps_per_epoch,  # 其实应该是用len(train_sequence)，但是这样太慢了，所以，我规定用一个比较小的数，比如1000
67 |         epochs=args.epochs,
68 |         workers=args.workers,  # 同时启动多少个进程加载
69 |         callbacks=[tboard, checkpoint, early_stop, visibility_debug],
70 |         use_multiprocessing=True,
71 |         validation_data=valid_sequence,
72 |         validation_steps=args.validation_steps,
73 |         verbose=2)
74 | 
75 |     logger.info("Train end!")
76 | 
77 |     model_path = conf.DIR_MODEL + "/textscanner-{}.hdf5".format(util.timestamp_s())
78 |     model.save(model_path)
79 |     logger.info("Save model saved to ：%s", model_path)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     log.init()
84 |     args = conf.init_args()
85 |     train(args)
86 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/network/__init__.py


--------------------------------------------------------------------------------
/network/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/network/layers/__init__.py


--------------------------------------------------------------------------------
/network/layers/class_branch_layer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import Layer
 2 | from tensorflow.keras.layers import Convolution2D
 3 | from tensorflow.keras.layers import Softmax
 4 | from utils.util import call_debug as _call
 5 | 
 6 | 
 7 | class ClassBranchLayer(Layer):
 8 |     """
 9 |     [H,W,E] => [H,W,C]
10 |     E: encoding output channels
11 |     C: character class number
12 |     """
13 | 
14 |     def __init__(self, name, charset_size, filter_num):
15 |         super().__init__(name=name)
16 |         self.charset_size = charset_size
17 |         self.filter_num = filter_num
18 | 
19 |     def build(self, input_shape):
20 |         self.conv1 = Convolution2D(filters=self.filter_num,
21 |                                    kernel_size=(3, 3),
22 |                                    padding="same",
23 |                                    name="class_branch_conv1")
24 |         # the classification number is Character Size + 1
25 |         self.conv2 = Convolution2D(filters=self.charset_size + 1,
26 |                                    kernel_size=(1, 1),
27 |                                    padding="same",
28 |                                    name="class_branch_conv2")
29 |         self.softmax = Softmax(name="class_branch_softmax")
30 | 
31 |     def call(self, inputs, training=None):
32 |         x = _call(self.conv1, inputs)
33 |         x = _call(self.conv2, x)
34 |         x = _call(self.softmax, x)
35 |         return x
36 | 


--------------------------------------------------------------------------------
/network/layers/fcn_layer.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.keras.layers import Conv2D, Dropout, Conv2DTranspose, Add, Cropping2D, Layer
  2 | from tensorflow.keras.models import Model
  3 | from utils.util import call_debug as _call
  4 | 
  5 | 
  6 | class FCNLayer(Layer):
  7 |     """
  8 |     # Resnet：http://www.piginzoo.com/machine-learning/2019/08/28/east &  https://i.stack.imgur.com/tkUYS.png
  9 |     # FCN：   https://github.com/divamgupta/image-segmentation-keras/blob/master/keras_segmentation/models/fcn.py
 10 |     # Resnet50+FCN：参考 http://www.piginzoo.com/machine-learning/2020/04/23/fcn-unet#resnet50%E7%9A%84fcn
 11 |     This implements FCN-8s
 12 |     """
 13 | 
 14 |     def __init__(self, name, filter_num, resnet50_model):
 15 |         super().__init__(name=name)
 16 |         resnet50_model.layers.pop()
 17 |         # resnet50_model.summary()
 18 |         self.resnet50_model = resnet50_model
 19 |         self.filter_num = filter_num
 20 | 
 21 |     def build(self, input_image):
 22 | 
 23 |         ############################
 24 |         # encoder part
 25 |         ############################
 26 | 
 27 |         layer_names = [
 28 |             "conv3_block4_out",  # 1/8
 29 |             "conv4_block6_out",  # 1/16
 30 |             "conv5_block3_out",  # 1/32
 31 |         ]
 32 |         layers = [self.resnet50_model.get_layer(name).output for name in layer_names]
 33 |         self.FCN_left = Model(inputs=self.resnet50_model.input, outputs=layers)
 34 | 
 35 |         ############################
 36 |         # decoder part
 37 |         ############################
 38 | 
 39 |         # pool5(1/32) ==> 1/16
 40 |         self.pool5_conv1 = Conv2D(filters=self.filter_num,
 41 |                                   kernel_size=(2, 2),
 42 |                                   activation='relu',
 43 |                                   padding='same',
 44 |                                   name="fcn_pool5_conv1")  # 2x2 is because the least height is 2 pixes after Resnet
 45 |         self.pool5_drop1 = Dropout(0.25, name="fcn_pool5_drop1")
 46 |         self.pool5_conv2 = Conv2D(filters=self.filter_num,
 47 |                                   kernel_size=(1, 1),
 48 |                                   activation='relu',
 49 |                                   padding='same',
 50 |                                   name="fcn_pool5_conv2")
 51 |         self.pool5_drop2 = Dropout(0.25, name="fcn_pool5_drop2")
 52 |         self.pool5_conv3 = Conv2D(filters=self.filter_num,
 53 |                                   kernel_size=(1, 1),
 54 |                                   kernel_initializer='he_normal',
 55 |                                   name="fcn_pool5_conv3")
 56 |         self.pool5_dconv1 = Conv2DTranspose(filters=self.filter_num,
 57 |                                             kernel_size=(3, 3),
 58 |                                             strides=(2, 2),
 59 |                                             use_bias=False,
 60 |                                             name="fcn_pool5_dconv1")  # stride=2后，反卷积图从2x8=>5x17（像素间padding0），采用3x3核做上卷积
 61 | 
 62 |         # pool4(1/16)+dconv ==> 1/8
 63 |         self.pool4_conv1 = Conv2D(filters=self.filter_num,
 64 |                                   kernel_size=(1, 1),
 65 |                                   kernel_initializer='he_normal',
 66 |                                   name="fcn_pool4_conv1")  # pool4做1x1卷积后 + 反卷积后的pool5，恢复到原图1/16
 67 |         self.pool4_add1 = Add(name="fcn_pool4_add1")
 68 |         self.pool4_dconv1 = Conv2DTranspose(filters=self.filter_num,
 69 |                                             kernel_size=(3, 3),
 70 |                                             strides=(2, 2),
 71 |                                             use_bias=False,
 72 |                                             name="fcn_pool4_dconv1")  # （pool4 + 上采样后的pool5）的结果 再次做反卷积，尺寸恢复到原图的1/8
 73 | 
 74 |         # pool3(1/8)+dconv ==> original size
 75 |         self.pool3_conv1 = Conv2D(filters=self.filter_num,
 76 |                                   kernel_size=(1, 1),
 77 |                                   kernel_initializer='he_normal',
 78 |                                   name="fcn_pool3_conv1")  # pool3做1x1卷积后与上面的结果融合
 79 |         self.pool3_add1 = Add(name="fcn_pool3_add1")
 80 |         self.pool3_dconv1 = Conv2DTranspose(filters=self.filter_num,
 81 |                                             kernel_size=(3, 3),
 82 |                                             strides=(8, 8),
 83 |                                             use_bias=False,
 84 |                                             name="fcn_pool3_dconv1")  # 最后一个反卷积，将尺寸从1/8，直接恢复到原图大小（stride=8)
 85 | 
 86 |     def call(self, input_image, training=True):
 87 | 
 88 |         pool3, pool4, pool5 = _call(self.FCN_left, input_image)
 89 |         o = _call(self.pool5_conv1, pool5)
 90 |         o = _call(self.pool5_drop1, o)
 91 |         o = _call(self.pool5_conv2, o)
 92 |         o = _call(self.pool5_drop2, o)
 93 |         o = _call(self.pool5_conv3, o)
 94 |         o5 = _call(self.pool5_dconv1, o)
 95 | 
 96 |         o4 = _call(self.pool4_conv1, pool4)
 97 |         o5, o4 = self.crop(o5, o4)
 98 |         o45 = _call(self.pool4_add1, [o5, o4])
 99 |         o45 = _call(self.pool4_dconv1, o45)
100 | 
101 |         o3 = _call(self.pool3_conv1, pool3)
102 |         o45, o3 = self.crop(o45, o3)
103 |         o = _call(self.pool3_add1, [o45, o3])
104 |         o = _call(self.pool3_dconv1, o)
105 | 
106 |         return o
107 | 
108 |     # cut to smaller
109 |     def crop(self, o1, o2):
110 |         o1_height, o1_width = o1.shape[1], o1.shape[2]
111 |         o2_height, o2_width = o2.shape[1], o2.shape[2]
112 | 
113 |         cx = abs(o1_width - o2_width)
114 |         cy = abs(o1_height - o2_height)
115 | 
116 |         if o1_width > o2_width:
117 |             o1 = Cropping2D(cropping=((0, 0), (0, cx)))(o1)
118 |         else:
119 |             o2 = Cropping2D(cropping=((0, 0), (0, cx)))(o2)
120 | 
121 |         if o1_height > o2_height:
122 |             o1 = Cropping2D(cropping=((0, cy), (0, 0)))(o1)
123 |         else:
124 |             o2 = Cropping2D(cropping=((0, cy), (0, 0)))(o2)
125 | 
126 |         return o1, o2
127 | 


--------------------------------------------------------------------------------
/network/layers/geometry_branch_layer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import Layer
 2 | from tensorflow.keras.layers import Convolution2D
 3 | from tensorflow.keras.layers import Conv2DTranspose
 4 | from tensorflow.keras.layers import GRU
 5 | from tensorflow.keras.layers import Permute
 6 | from tensorflow.keras.layers import Softmax
 7 | from tensorflow.keras.layers import Activation
 8 | from utils.util import call_debug as _call
 9 | import tensorflow as tf
10 | 
11 | 
12 | class GeometryBranch(Layer):
13 |     """
14 |     [H,W,E] => [H,W,C]
15 |     E: encoding output channels
16 |     C: character class number
17 |     """
18 | 
19 |     def __init__(self, name, conf):
20 |         super().__init__(name=name)
21 |         self.image_area = conf.INPUT_IMAGE_HEIGHT * conf.INPUT_IMAGE_WIDTH
22 |         self.sequence_length = conf.MAX_SEQUENCE
23 |         self.conf = conf
24 |         self.filter_num = conf.FILTER_NUM
25 | 
26 |     def build(self, input_shape):
27 |         # ########################################################################
28 |         # order segment generation network
29 | 
30 |         # 1. Convs
31 |         self.conv_order_seg1 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2,
32 |                                              name="conv_order_seg1", padding="same")  # 1/2
33 |         self.conv_order_seg2 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2,
34 |                                              name="conv_order_seg2", padding="same")  # 1/4
35 |         self.conv_order_seg3 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2,
36 |                                              name="conv_order_seg3", padding="same")  # 1/8
37 | 
38 |         # 2. GRU
39 |         self.transpose1 = Permute((2, 1, 3))  # [B,H,W,C] => [B,W,H,C]
40 |         # self.reshape1 = Reshape((-1,self.conf.INPUT_IMAGE_WIDTH,self.conf.INPUT_IMAGE_HEIGHT*self.filter_num)) # [B,W,H,C] => [B,W,H*C]
41 |         self.gru_order_seg = GRU(units=self.filter_num * (input_shape[1] // 8), return_sequences=True,
42 |                                  name="gru_order_seg")
43 |         # self.reshape2 = Reshape((-1,self.conf.INPUT_IMAGE_WIDTH,self.conf.INPUT_IMAGE_HEIGHT,self.filter_num)) # [B,W,H*C] => [B,W,H,C]
44 |         self.transpose2 = Permute((2, 1, 3))  # [B,W,H,C] => [B,H,W,C]
45 | 
46 |         # 3. DeConvs
47 |         self.dconv_order_seg3 = Conv2DTranspose(filters=self.filter_num, kernel_size=(3, 3), strides=2,
48 |                                                 name="dconv_order_seg3", padding="same")  # 1
49 |         self.dconv_order_seg2 = Conv2DTranspose(filters=self.filter_num, kernel_size=(3, 3), strides=2,
50 |                                                 name="dconv_order_seg2", padding="same")  # 1/2
51 |         self.dconv_order_seg1 = Conv2DTranspose(filters=self.sequence_length, kernel_size=(3, 3), strides=2,
52 |                                                 name="dconv_order_seg1",
53 |                                                 padding="same")  # 1/4
54 |         self.softmax = Softmax(name="softmax")
55 | 
56 |         # ########################################################################
57 |         # localization map generation network
58 |         self.conv_loc_map1 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), padding="same",
59 |                                            name="conv_loc_map1")
60 |         self.conv_loc_map2 = Convolution2D(filters=1, kernel_size=(1, 1), padding="same",
61 |                                            name="conv_loc_map2")
62 |         self.sigmoid = Activation("sigmoid", name="sigmoid")
63 | 
64 |     def call(self, inputs, training=None):
65 |         # convs
66 |         x = inputs
67 |         x = s1 = _call(self.conv_order_seg1, x)
68 |         x = s2 = _call(self.conv_order_seg2, x)
69 |         x = _call(self.conv_order_seg3, x)
70 | 
71 |         # gru
72 |         x = _call(self.transpose1, x)
73 |         height = x.shape[2]
74 |         channel = x.shape[3]
75 |         target_shape = [-1, x.shape[1], height * channel]
76 |         x = _call(tf.reshape, x, target_shape)
77 |         x = _call(self.gru_order_seg, x)
78 |         target_shape = [-1, x.shape[1], height, channel]
79 |         x = _call(tf.reshape, x, target_shape)
80 |         x = _call(self.transpose2, x)
81 | 
82 |         # de-convs,get seg
83 |         x = _call(self.dconv_order_seg3, x)
84 |         x = _call(self.dconv_order_seg2, x + s2)
85 |         x = _call(self.dconv_order_seg1, x + s1)
86 |         order_segment = _call(self.softmax, x)
87 | 
88 |         # generate Localization Map
89 |         q = _call(self.conv_loc_map1, inputs)
90 |         q = _call(self.conv_loc_map2, q)
91 |         localization_map = _call(self.sigmoid, q)
92 | 
93 |         # multiply S[B,H,W,N] * Q[B,H,W,1] => [B,H,W,N]
94 |         order_map = order_segment * localization_map  # multiply together
95 | 
96 |         return order_map, localization_map, order_segment
97 | 


--------------------------------------------------------------------------------
/network/layers/word_formation_layer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow.keras.backend as K
 2 | from tensorflow.keras.layers import Layer
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class WordFormation(Layer):
 7 |     """
 8 |        integral the product of "Character Segmentation" & "Order Maps",
 9 |        and infer the character possibility.
10 |        The threshold is 0.3(paper said "Other Detais: ... The score threshold L_score is set to 0.3 empirically...")
11 |     """
12 | 
13 |     def __init__(self, name):
14 |         super().__init__(name=name)
15 | 
16 |     def call(self, G, H, training=None):
17 |         """
18 |         G[Character Segmentation] : [N,H,W,C] - N:batch, C:charset size(3770)
19 |         H[Order Map] :              [N,H,W,S] - S: Sequence Length(30)
20 | 
21 |         return will be [N,S,C],which means each character's probilities.
22 |         """
23 |         p_k_list = []
24 |         for i in range(H.shape[-1]):
25 |             H_k = H[:, :, :, i]
26 |             H_k = H_k[:, :, :, tf.newaxis]
27 |             GH = H_k * G
28 |             p_k = K.sum(GH, axis=(1, 2))
29 |             p_k_list.append(p_k)
30 | 
31 |         pks = K.stack(p_k_list)  # P_k: (30, 10, 4100)
32 |         pks = K.permute_dimensions(pks, (1, 0, 2))
33 |         return pks
34 | 


--------------------------------------------------------------------------------
/network/model.py:
--------------------------------------------------------------------------------
 1 | from network.layers.class_branch_layer import ClassBranchLayer
 2 | from network.layers.geometry_branch_layer import GeometryBranch
 3 | from network.layers.word_formation_layer import WordFormation
 4 | from tensorflow.keras.applications.resnet import ResNet50
 5 | from network.layers.fcn_layer import FCNLayer
 6 | from tensorflow.keras.optimizers import Adam
 7 | from utils.util import call_debug as _call
 8 | from tensorflow.keras import backend as K
 9 | from tensorflow.keras.models import Model
10 | from tensorflow.keras.layers import Input
11 | import tensorflow as tf
12 | import logging
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | HUBER_DELTA = 0.5
17 | 
18 | 
19 | class TextScannerModel(Model):
20 |     """
21 |         TextScanner Core Model
22 |     """
23 | 
24 |     def __init__(self, conf, charset):
25 |         super(TextScannerModel, self).__init__()
26 |         self.input_image = Input(shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH, 3), name='input_image')
27 |         self.class_branch = ClassBranchLayer(name="ClassBranchLayer", charset_size=len(charset),
28 |                                              filter_num=conf.FILTER_NUM)
29 |         self.geometry_branch = GeometryBranch(name="GeometryBranchLayer", conf=conf)
30 |         self.word_formation = WordFormation(name="WordFormationLayer")
31 |         self.resnet50_model = ResNet50(include_top=False, weights='imagenet') # Resnet50+FCN：参考 http://www.piginzoo.com/machine-learning/2020/04/23/fcn-unet#resnet50%E7%9A%84fcn
32 |         self.resnet50_model.summary()
33 |         self.fcn = FCNLayer(name="FCNLayer", filter_num=conf.FILTER_NUM, resnet50_model=self.resnet50_model)
34 | 
35 |     def call(self, inputs, training=None):
36 |         fcn_features = _call(self.fcn, inputs)
37 |         character_segmentation = _call(self.class_branch, fcn_features)
38 |         order_map, localization_map, order_segment = _call(self.geometry_branch, fcn_features)
39 |         words = _call(self.word_formation, character_segmentation, order_map)
40 |         return character_segmentation, order_segment, localization_map, words  # the sequence of them is critical for loss & metrics
41 | 
42 |     def localization_map_loss(self):
43 |         def smoothL1(y_true, y_pred):
44 |             x = K.abs(y_true - y_pred)
45 |             x = K.switch(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA))
46 |             return K.sum(x)
47 | 
48 |         return smoothL1
49 | 
50 |     def comile_model(self):
51 |         # model predict output are: character_segmentation(G), order_segment(S), localization_map(Q), words
52 |         # the last "words" corresponding loss function is useless, will be masked by its weight, keep it only for metrics
53 |         losses = ['categorical_crossentropy',
54 |                   'categorical_crossentropy',
55 |                   self.localization_map_loss(),
56 |                   'categorical_crossentropy']
57 |         loss_weights = [1, 10, 10, 0]  # weight value refer from paper, and last 0 is mask to eliminate the words loss
58 | 
59 |         # metrics
60 |         metrics = ['categorical_accuracy',
61 |                    'categorical_accuracy',
62 |                    'binary_accuracy',
63 |                    'categorical_accuracy']
64 | 
65 |         self.compile(Adam(),
66 |                      loss=losses,
67 |                      loss_weights=loss_weights,
68 |                      metrics=metrics,
69 |                      run_eagerly=True)
70 |         logger.info("######## TextScanner Model Structure ########")
71 |         self.build(self.input_image.shape)
72 |         self.summary()
73 |         exit()
74 |         logger.info("TextScanner Model was compiled.")
75 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Keras-Preprocessing
2 | python-levenshtein
3 | Keras-Applications
4 | opencv-python
5 | matplotlib
6 | pyclipper
7 | pillow
8 | keras
9 | numpy


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/test/__init__.py


--------------------------------------------------------------------------------
/test/gaussian_filter.py:
--------------------------------------------------------------------------------
  1 | #coding=utf-8
  2 | from PIL import Image
  3 | import numpy as np
  4 | from scipy.ndimage import filters
  5 | import matplotlib.pyplot as plt
  6 | import scipy.ndimage.filters as fi
  7 | 
  8 | 
  9 | def render_image(): 
 10 |      im = np.array(Image.open('messi.jpg'))
 11 |       
 12 |      index = 141  #画1行四列的图，与 1,4,1 同
 13 |      plt.subplot(index)
 14 |      plt.imshow(im)
 15 |       
 16 |      for sigma in (2, 5, 10):
 17 |          im_blur = np.zeros(im.shape, dtype=np.uint8)
 18 |          for i in range(3):  #对图像的每一个通道都应用高斯滤波
 19 |              im_blur[:,:,i] = filters.gaussian_filter(im[:,:,i], sigma)
 20 |          index += 1
 21 |          plt.subplot(index)
 22 |          plt.imshow(im_blur)
 23 |       
 24 |      plt.show()
 25 | 
 26 | 
 27 | def render_gaussian(h,w,box):
 28 |     canvas = np.zeros((h,w), dtype=np.int32)
 29 |     xmin, xmax,ymin, ymax = box
 30 |     out = np.zeros_like(canvas).astype(np.float32)
 31 |     h, w = canvas.shape[:2]
 32 |     sigma = 2
 33 | 
 34 |     # 求中心点
 35 |     y = (ymax+ymin+1)//2
 36 |     x = (xmax+xmin+1)//2
 37 | 
 38 |     # 那个点上值为1    
 39 |     out[y, x] = 1.
 40 |     print("============================================================")
 41 |     print("原始out")
 42 |     print(out)
 43 |     # 
 44 |     h, w = canvas.shape[:2]
 45 |     fi.gaussian_filter(out, (sigma, sigma),output=out, mode='mirror')
 46 |     
 47 |     print("============================================================")
 48 |     print("高斯过滤后out")
 49 |     print(out)
 50 |     plt.subplot(131)#画1行四列的图，与 1,4,1 同
 51 |     plt.imshow(out)
 52 | 
 53 |     out = out / out.max()
 54 |     print("============================================================")
 55 |     print("归一化后out")
 56 |     print(out)    
 57 |     plt.subplot(132)#画1行四列的图，与 1,4,1 同
 58 |     plt.imshow(canvas)
 59 | 
 60 |     canvas[out > canvas] = out[out > canvas]
 61 |     print("============================================================")
 62 |     print("重新填充后的canvas")
 63 |     print(out)    
 64 |     plt.subplot(133)#画1行四列的图，与 1,4,1 同
 65 |     plt.imshow(canvas)
 66 |     
 67 |     plt.show()
 68 | 
 69 | 
 70 | def render_gaussian_thresh(h,w,box):
 71 |     canvas = np.zeros((h,w), dtype=np.int32)
 72 |     xmin, xmax,ymin, ymax = box
 73 |     value=7
 74 |     thresh=0.2
 75 |     shrink=0.6
 76 |     sigma = 2
 77 |     out = np.zeros_like(canvas)
 78 |     h, w = canvas.shape[:2]
 79 |     y = (ymax+ymin+1)//2
 80 |     x = (xmax+xmin+1)//2
 81 | 
 82 |     out = np.zeros_like(canvas).astype(np.float32)
 83 |     print(out.shape)
 84 |     out[y, x] = 1.
 85 |     print("============================================================")
 86 |     print("原始out")
 87 |     print(out)
 88 | 
 89 |     # out = filters.gaussian_filter(out,sigma=3)
 90 |     fi.gaussian_filter(out, (sigma, sigma),output=out, mode='mirror')
 91 |     print("============================================================")
 92 |     print("高斯滤波后out")
 93 |     print(out)
 94 |     out = out / out.max()
 95 |     print("============================================================")
 96 |     print("归一化后out")
 97 |     print(out)
 98 |     canvas[out > thresh] = value
 99 |     print("============================================================")
100 |     print("out大于0.2复制了%d的canvas" % value)
101 |     print(canvas)
102 |     plt.imshow(canvas)
103 |     plt.show()
104 | 
105 | if __name__ == '__main__':
106 |     #render_gaussian_thresh(64,256,(30,50,30,50))
107 |     render_gaussian(8,8,(3,5,3,5))


--------------------------------------------------------------------------------
/test/make_decouple_map.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.ndimage.filters as fi
  3 | import ipdb
  4 | from concern.config import State
  5 | 
  6 | from .data_process import DataProcess
  7 | 
  8 | 
  9 | class MakeDecoupleMap(DataProcess):
 10 |     max_size = State(default=32) # ？？？什么size，是最多的字符数么？
 11 |     shape = State(default=(64, 256)) # 图像的标准宽度
 12 |     sigma = State(default=2) # 噢，方差设成了2
 13 |     summation = State(default=False) 
 14 |     box_key = State(default='charboxes') 
 15 |     function = State(default='gaussian')
 16 |     thresh = State(default=0.2)
 17 |     order_dest = State(default='ordermaps')
 18 |     mask_dest = State(default='charmaps')
 19 |     shape_dest = State(default='shapemaps')
 20 | 
 21 |     def process(self, data):
 22 |         assert self.box_key in data, '%s in data is required' % self.box_key
 23 |         shape = data['image'].shape[:2] # h,w
 24 |         boxes = np.array(data[self.box_key]) # 这个是单字的框
 25 | 
 26 |         ratio_x = shape[1] / self.shape[1] # 高度比
 27 |         boxes[:, :, 0] = (boxes[:, :, 0] / ratio_x).clip(0, self.shape[1]) # 估计是boxes是所有的框:[b,N,2]
 28 |         ratio_y = shape[0] / self.shape[0] # 宽度比
 29 |         boxes[:, :, 1] = (boxes[:, :, 1] / ratio_y).clip(0, self.shape[0])
 30 |         boxes = (boxes + .5).astype(np.int32)
 31 |         xmins = boxes[:, :, 0].min(axis=1) # 找到x最小的值
 32 |         xmaxs = np.maximum(boxes[:, :, 0].max(axis=1), xmins + 1) # 找到x最大值
 33 |         ymins = boxes[:, :, 1].min(axis=1)
 34 |         ymaxs = np.maximum(boxes[:, :, 1].max(axis=1), ymins + 1)
 35 | 
 36 |         # 做了一张空图，h,w，全0
 37 |         shapemaps = np.zeros((self.shape[0], self.shape[1], 2), dtype=np.int32)
 38 | 
 39 | 
 40 |         if self.summation: 
 41 |             # 感觉是给localization map准备的gt
 42 |             canvas = np.zeros(self.shape, dtype=np.int32)
 43 |         else:
 44 |             # 3维度的，有点像是 order map的gt
 45 |             canvas = np.zeros((self.max_size+1, *self.shape), dtype=np.float32)
 46 | 
 47 |         mask = np.zeros(self.shape, dtype=np.float32)
 48 |         # 生成1~30的序号
 49 |         orders = self.orders(data)
 50 | 
 51 |         # 处理每个字符       
 52 |         for i in range(xmins.shape[0]):
 53 |             # 初始化一个h,w的零图
 54 |             temp = np.zeros(self.shape, dtype=np.float32)
 55 |             function = getattr(self, 'render_' + self.function)
 56 |             order = min(orders[i], self.max_size)
 57 |             if self.summation:
 58 |                 function(canvas, xmins[i], xmaxs[i], ymins[i], ymaxs[i],
 59 |                          value=order+1, shrink=0.6)
 60 |             else:
 61 |                 # 这个是每张图
 62 |                 function(canvas[order+1], xmins[i], xmaxs[i], ymins[i], ymaxs[i])
 63 |             self.render_gaussian(mask, xmins[i], xmaxs[i], ymins[i], ymaxs[i])
 64 |             self.render_gaussian(temp, xmins[i], xmaxs[i], ymins[i], ymaxs[i])
 65 |             w, h = xmaxs[i]-xmins[i], ymaxs[i]-ymins[i]
 66 |             shapemaps[temp > 0.4] = np.array([w, h])
 67 |         data[self.order_dest] = canvas
 68 |         data[self.mask_dest] = mask
 69 |         data[self.shape_dest] = shapemaps.transpose(2, 0, 1)
 70 |         return data
 71 | 
 72 |     def render_gaussian(self, canvas, xmin, xmax, ymin, ymax):
 73 |         out = np.zeros_like(canvas)
 74 |         h, w = canvas.shape[:2]
 75 |         # 求中心点
 76 |         y = (ymax+ymin+1)//2
 77 |         x = (xmax+xmin+1)//2
 78 |         if not (w > x \\and h > y): return
 79 |         # 那个点上值为1    
 80 |         out[y, x] = 1.
 81 |         h, w = canvas.shape[:2]
 82 |         fi.gaussian_filter(out, (self.sigma, self.sigma),output=out, mode='mirror')
 83 |         out = out / out.max()
 84 |         canvas[out > canvas] = out[out > canvas]# <--- 
 85 | 
 86 |     def render_gaussian_thresh(self, canvas, xmin, xmax, ymin, ymax,
 87 |                                value=1, thresh=None, shrink=None):
 88 |         if thresh is None:thresh = self.thresh
 89 |         h, w = canvas.shape[:2]
 90 |         y = (ymax+ymin+1)//2
 91 |         x = (xmax+xmin+1)//2
 92 |         if not (w > x and h > y):return
 93 |         out = np.zeros_like(canvas).astype(np.float32)
 94 |         out[y, x] = 1.
 95 |         out = fi.gaussian_filter(out, (self.sigma, self.sigma),output=out, mode='mirror')
 96 |         out = out / out.max()
 97 |         canvas[out > thresh] = value
 98 | 
 99 | 
100 |     def render_gaussian_fast(self, canvas, xmin, xmax, ymin, ymax):
101 |         out = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.float32)
102 |         out[(ymax-ymin+1)//2, (xmax-xmin+1)//2] = 1.
103 |         h, w = canvas.shape[:2]
104 |         fi.gaussian_filter(out, (self.sigma, self.sigma),
105 |                            output=out, mode='mirror')
106 |         out = out / out.max()
107 |         canvas[ymin:ymax+1, xmin:xmax+1] = np.maximum(out, canvas[ymin:ymax+1, xmin:xmax+1])
108 | 
109 |     def orders(self, data):
110 |         orders = []
111 |         if 'lines' in data: # lines什么鬼？
112 |             for text in data['lines'].texts:
113 |                 orders += list(range(min(len(text), self.max_size)))
114 |         else:
115 |             # 我理解，就是生成了一个1：max_size的序号（data[self.box_key]是就是box们）
116 |             orders = list(range(min(data[self.box_key].shape[0], self.max_size)))
117 |         return orders


--------------------------------------------------------------------------------
/test/test_accuracy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | def _p(t,name):
 5 |     print("调试计算图定义："+name, t)
 6 |     return tf.Print(t,[t],name,summarize=300)
 7 | 
 8 | # y_pred is [batch,seq,charset_size]
 9 | # pred  = np.random.rand(3,3,3)
10 | # label = np.random.rand(3,3,3)
11 | 
12 | pred = np.array(
13 | [
14 | [[1,0,0],[1,0,0],[1,0,0]],
15 | [[1,0,0],[1,0,0],[1,0,0]],
16 | [[1,0,0],[1,0,0],[1,0,0]]
17 | ])
18 | 
19 | label = np.array(
20 | [
21 | [[0.5,0.2,0.3],[0.5,0.2,0.3],[0.5,0.2,0.3]], #true,true,true=>true
22 | [[0.5,0.2,0.3],[0.5,0.2,0.3],[0.2,0.5,0.3]], #true,true,false=>false
23 | [[0.2,0.3,0.5],[0.2,0.3,0.5],[0.2,0.3,0.5]]  #false,false,false=>false
24 | ])
25 | 
26 | # 正确率应该是0.333
27 | 
28 | def accuracy(y_true, y_pred):
29 |     max_idx_p = tf.argmax(y_pred, axis=2)
30 |     max_idx_l = tf.argmax(y_true, axis=2)
31 |     max_idx_p = _p(max_idx_p,"max_idx_p")
32 |     correct_pred = tf.equal(max_idx_p, max_idx_l)
33 |     _result = tf.map_fn(fn=lambda e: tf.reduce_all(e), elems=correct_pred, dtype=tf.bool)
34 |     return tf.reduce_mean(tf.cast(_result, tf.float32))
35 | 
36 | s = tf.Session()
37 | 
38 | x = tf.placeholder(tf.float32, shape=[None, None,3], name='x')
39 | y = tf.placeholder(tf.float32, shape=[None, None,3], name='y')
40 | m = accuracy(x,y)
41 | r = s.run(m,feed_dict={x:pred,y:label})
42 | print(r)


--------------------------------------------------------------------------------
/test/test_call.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def call1(p1):
 3 |     print("call1!")
 4 |     print(p1)
 5 | 
 6 | def call2(p1,p2):
 7 |     print("call2!")
 8 |     print(p1)
 9 |     print(p2)
10 | 
11 | def call3(p_list):
12 |     print("call3!")
13 |     print(p_list)
14 | 
15 | 
16 | def test_func(c,*param):
17 |     print(type(param))
18 |     c(*param)
19 | 
20 | test_func(call1, "aaaa")
21 | test_func(call2, "bbbb","cccc")
22 | test_func(call3, ["bbbb","cccc"])


--------------------------------------------------------------------------------
/test/test_customized_layer.py:
--------------------------------------------------------------------------------
  1 | # from tensorflow.keras.layers import Conv2D
  2 | # from tensorflow.keras.layers import LeakyReLU
  3 | # from tensorflow.keras.layers import MaxPooling2D
  4 | # from tensorflow.keras.layers import BatchNormalization
  5 | # from tensorflow.keras.layers import Lambda
  6 | # from tensorflow.keras.layers import Layer
  7 | # from tensorflow.keras.backend import squeeze
  8 | from keras.layers import Conv2D
  9 | from keras.layers import LeakyReLU
 10 | from keras.layers import MaxPooling2D
 11 | from keras.layers import BatchNormalization
 12 | from keras.layers import Lambda
 13 | from keras.layers import Layer,Flatten,Dense
 14 | from keras.backend import squeeze
 15 | from keras.optimizers import Adam
 16 | from keras.models import Model
 17 | from keras.layers import Input
 18 | import numpy as np
 19 | 
 20 | class Conv(Layer):
 21 | 
 22 |     #[N,1,256/4,512] => [N,256/4,512]
 23 |     def squeeze_wrapper(self,tensor):
 24 |         print("tensor:",tensor)
 25 |         return squeeze(tensor, axis=1)
 26 | 
 27 |     def __init__(self, **kwargs):
 28 |         super(Conv, self).__init__(**kwargs)
 29 | 
 30 |     '''
 31 |         #抽feature，用的cnn网络
 32 |         # https://blog.csdn.net/Quincuntial/article/details/77679463
 33 |         在CRNN模型中，通过采用标准CNN模型（去除全连接层）中的卷积层和最大池化层来构造卷积层的组件。
 34 |         这样的组件用于从输入图像中提取序列特征表示。在进入网络之前，所有的图像需要缩放到相同的高度。
 35 |         然后从卷积层组件产生的特征图中提取特征向量序列，这些特征向量序列作为循环层的输入。
 36 |         具体地，特征序列的每一个特征向量在特征图上按列从左到右生成。这意味着第i个特征向量是所有特征图第i列的连接。
 37 |         在我们的设置中每列的宽度固定为单个像素。
 38 | 
 39 |         # 由于卷积层，最大池化层和元素激活函数在局部区域上执行，因此它们是平移不变的。
 40 |         因此，特征图的每列对应于原始图像的一个矩形区域（称为感受野），并且这些矩形区域与特征图上从左到右的相应列具有相同的顺序。
 41 |         如图2所示，特征序列中的每个向量关联一个感受野，并且可以被认为是该区域的图像描述符。
 42 |         :param inputdata: eg. batch*32*100*3 NHWC format
 43 |           |
 44 |         Conv1  -->  H*W*64          #卷积后，得到的维度
 45 |         Relu1
 46 |         Pool1       H/2 * W/2 * 64  #池化后得到的维度
 47 |           |
 48 |         Conv2       H/2 * W/2 * 128
 49 |         Relu2
 50 |         Pool2       H/4 * W/4 * 128
 51 |           |
 52 |         Conv3       H/4 * W/4 * 256
 53 |         Relu3
 54 |           |
 55 |         Conv4       H/4 * W/4 * 256
 56 |         Relu4
 57 |         Pool4       H/8 * W/4 * 64
 58 |           |
 59 |         Conv5       H/8 * W/4 * 512
 60 |         Relu5
 61 |         BatchNormal5
 62 |           |
 63 |         Conv6       H/8 * W/4 * 512
 64 |         Relu6
 65 |         BatchNormal6
 66 |         Pool6       H/16 * W/4 * 512
 67 |           |
 68 |         Conv7
 69 |         Relu7       H/32 * W/4 * 512
 70 |           |
 71 |           20层
 72 |     '''
 73 |     # 自定义的卷基层，32x100 => 1 x 25，即（1/32，1/4)
 74 |     def call(self,inputs):
 75 |         x = inputs
 76 |         for layer in self.layers:
 77 |             # print(x)
 78 |             x = layer(x)
 79 | 
 80 |         return x
 81 | 
 82 | 
 83 |     def build(self, input_shape):
 84 |         self.layers = []
 85 |         # Block 1
 86 |         self.layers.append(Conv2D(64, (3, 3), padding='same', name='block1_conv1'))
 87 |         self.layers.append(LeakyReLU())
 88 |         # self.layers.append(BatchNormalization())
 89 |         self.layers.append(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')) #1/2
 90 | 
 91 |         # Block 2
 92 |         self.layers.append(Conv2D(128, (3, 3), padding='same', name='block2_conv1'))
 93 |         self.layers.append(LeakyReLU())
 94 |         # self.layers.append(BatchNormalization())
 95 |         self.layers.append(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')) #1/2
 96 | 
 97 |         # Block 3
 98 |         self.layers.append(Conv2D(256, (3, 3), padding='same', name='block3_conv1'))
 99 |         self.layers.append(LeakyReLU())
100 |         # self.layers.append(BatchNormalization())
101 | 
102 |         # Block 4
103 |         self.layers.append(Conv2D(256, (3, 3), padding='same', name='block4_conv1'))
104 |         # self.layers.append(BatchNormalization())
105 |         self.layers.append(LeakyReLU())
106 |         self.layers.append(MaxPooling2D((2, 1), strides=(2, 1), name='block4_pool')) # 1/2 <------ pool kernel is (2,1)!!!!!
107 | 
108 |         # Block 5
109 |         self.layers.append(Conv2D(512, (3, 3), padding='same', name='block5_conv1'))
110 |         self.layers.append(LeakyReLU())
111 |         self.layers.append(BatchNormalization())
112 | 
113 |         # Block 6
114 |         self.layers.append(Conv2D(512, (3, 3), padding='same', name='block6_conv1'))
115 |         self.layers.append(LeakyReLU())
116 |         self.layers.append(BatchNormalization())
117 |         self.layers.append(MaxPooling2D((2, 1), strides=(2, 1), name='block6_pool')) #1/2 <------ pool kernel is (2,1)!!!!!
118 | 
119 |         # Block 7
120 |         self.layers.append(Conv2D(512, (2, 2), strides=[2, 1], padding='same', name='block7_conv1')) #1/2
121 |         self.layers.append(LeakyReLU())
122 | 
123 |         # 输出是(batch,1,Width/4,512),squeeze后，变成了(batch,Width/4,512)
124 |         self.layers.append(Lambda(self.squeeze_wrapper))
125 | 
126 |         super(Conv, self).build(input_shape)
127 | 
128 |     # # input_shape[N,H,W,512] => output_shape[N,W/4,512]
129 |     def compute_output_shape(self, input_shape):
130 |         print("input_shape:",input_shape)
131 |         return (None, int(input_shape[2]/4),512)
132 | 
133 | if __name__ == '__main__':
134 | 
135 |     input_image = Input(shape=(32,256,3))
136 |     conv = Conv()
137 |     conv_output = conv(input_image) # output[64,512]
138 |     print(conv_output)
139 |     flat = Flatten()(conv_output)
140 |     output = Dense(4,activation='softmax',input_shape=(-1,))(flat)
141 | 
142 |     train_model = Model(inputs=input_image, outputs=output)
143 |     adam = Adam()
144 |     train_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
145 |     train_data = np.random.random((10,32,256,3))
146 |     train_labels = np.random.random((10,4))
147 |     train_model.fit(train_data, train_labels, epochs=1, batch_size=1)
148 | 


--------------------------------------------------------------------------------
/test/test_draw_charactor_segment.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | 
4 | csi = character_segment_image = np.random.random((64,256,3840))
5 | csi = np.argmax(csi,axis=-1) # 需要把3840的那个维度，变成1
6 | 
7 | plt.imshow(csi)
8 | plt.show()
9 | 


--------------------------------------------------------------------------------
/test/test_file_process.py:
--------------------------------------------------------------------------------
1 | f = open("../data/small_vocab_en.txt", 'r')
2 | for l in f:
3 |     print(l)


--------------------------------------------------------------------------------
/test/test_heirachy.py:
--------------------------------------------------------------------------------
 1 | class Parent():
 2 |     def __init__(self,name,*args):
 3 |         self.name = name
 4 |         print("i am parent,name=", name)
 5 | 
 6 | class Child(Parent):
 7 |     def __init__(self, name, *args):
 8 |         super().__init__(name,args)
 9 |         print("i am child,name=",name)
10 | 
11 | 
12 | class GrandChild(Child):
13 |     def __init__(self, name, *args):
14 |         super().__init__(name,args)
15 |         print("i am grandchild,name=", name)
16 | 
17 | 
18 | gc = GrandChild("grand_child_hello","...")
19 | print("finally, got:",gc.name)
20 | 
21 | 


--------------------------------------------------------------------------------
/test/test_image_process.py:
--------------------------------------------------------------------------------
 1 | # 测试样本是否能被正确resize么？
 2 | from utils import image_utils
 3 | import conf
 4 | import matplotlib.pyplot as plt,cv2
 5 | #
 6 | # plt.title("processed images", fontsize='large', fontweight='bold')
 7 | #
 8 | # resize_images = image_utils.read_and_resize_image(["test/data/test/test1.jpg"], conf)
 9 | # plt.imshow(cv2.cvtColor(resize_images[0], cv2.COLOR_BGR2RGB))
10 | # plt.show()
11 | #
12 | # resize_images = image_utils.read_and_resize_image(["test/data/test/test2.jpg"], conf)
13 | # plt.imshow(cv2.cvtColor(resize_images[0], cv2.COLOR_BGR2RGB))
14 | # plt.show()
15 | 
16 | 
17 | # 测试收缩算法
18 | import numpy as np
19 | poly = np.array([[100,130],[140,126],[160,129],[170,140],[144,142],[124,135]])
20 | shrinked_poly = image_utils.shrink_poly(poly,0.75)
21 | from matplotlib import pyplot as plt
22 | fig = plt.figure()
23 | ax = fig.add_subplot(121)
24 | ax.fill(poly[:,0],poly[:,1],'g')
25 | ax = fig.add_subplot(121)
26 | ax.fill(shrinked_poly[:,0],shrinked_poly[:,1],'r',alpha=0.8)
27 | ax = fig.add_subplot(122)
28 | ax.fill(shrinked_poly[:,0],shrinked_poly[:,1],'r',alpha=0.8)
29 | plt.show()


--------------------------------------------------------------------------------
/test/test_krnn.py:
--------------------------------------------------------------------------------
 1 | from keras.layers import Layer
 2 | import keras.backend as K
 3 | from keras.layers import LSTM,Input, GRU, Dense, Concatenate, TimeDistributed, Bidirectional
 4 | from keras.models import Sequential
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | import tensorflow as tf
 8 | 
 9 | class My_RNN(Layer):
10 | 
11 |     def __init__(self, output_dim, **kwargs):
12 |         self.output_dim = output_dim # 输出维度
13 |         super(My_RNN, self).__init__(**kwargs)
14 | 
15 |     def build(self, input_shape): # 定义可训练参数
16 |         self.kernel1 = self.add_weight(name='kernel1',
17 |                                       shape=(self.output_dim, self.output_dim),
18 |                                       initializer='glorot_normal',
19 |                                       trainable=True)
20 |         self.kernel2 = self.add_weight(name='kernel2',
21 |                                       shape=(input_shape[-1], self.output_dim),
22 |                                       initializer='glorot_normal',
23 |                                       trainable=True)
24 |         self.bias = self.add_weight(name='kernel',
25 |                                       shape=(self.output_dim,),
26 |                                       initializer='glorot_normal',
27 |                                       trainable=True)
28 | 
29 |     def step_do(self, step_in, states): # 定义每一步的迭代
30 |         print("step_in:",step_in)
31 |         print("states:",states)
32 |         step_in  = tf.Print(step_in,[tf.shape(step_in)],"step_in")
33 |         states  = tf.Print(states,[tf.shape(states)],"states")
34 |         step_out = K.tanh(K.dot(states[0], self.kernel1) +
35 |                           K.dot(step_in, self.kernel2) +
36 |                           self.bias)
37 |         return step_out, [step_out]
38 | 
39 |     def call(self, inputs): # 定义正式执行的函数
40 |         init_states = [K.zeros((K.shape(inputs)[0],self.output_dim))] # 定义初始态(全零)
41 |         print("init_states.shape:",init_states)
42 |         outputs = K.rnn(self.step_do, inputs, init_states) # 循环执行step_do函数
43 |         return outputs[0] # outputs是一个tuple，outputs[0]为最后时刻的输出，
44 |                           # outputs[1]为整个输出的时间序列，output[2]是一个list，
45 |                           # 是中间的隐藏状态。
46 | 
47 |     def compute_output_shape(self, input_shape):
48 |         return (input_shape[0], self.output_dim)
49 | 
50 | 
51 | 
52 | train_X = np.random.rand(10,5,3)
53 | train_y = np.random.rand(10,5)
54 | 
55 | model = Sequential()
56 | model.add(My_RNN(output_dim=4, input_shape=(train_X.shape[1], train_X.shape[2])))
57 | model.add(Dense(5))
58 | model.compile(loss='mae', optimizer='adam')
59 | model.summary()
60 | # fit network
61 | history = model.fit(train_X, train_y, epochs=2, batch_size=72,verbose=2, shuffle=False)
62 | 


--------------------------------------------------------------------------------
/test/test_label_maker.py:
--------------------------------------------------------------------------------
  1 | from utils.label.label_maker import LabelGenerater
  2 | from utils.label.label import ImageLabel
  3 | from utils.label import label_utils
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import logging
  7 | import os, cv2
  8 | import conf
  9 | 
 10 | debug_dir = "data/debug"
 11 | charset_path = "config/charset.4100.txt"
 12 | shape = (conf.INPUT_IMAGE_WIDTH, conf.INPUT_IMAGE_HEIGHT)
 13 | 
 14 | """
 15 |     这个类用于测试样本生成，主要测试：
 16 |     1、是不是从视觉上看，符合要求：
 17 |         - order_segment是不是画出来，恰好是包裹字符的
 18 |         - localization map是不是围绕字符中心的一个正态分布的样子
 19 |         - order map是不是按照顺序画出了第N个字符的正态分布
 20 |     2. 测试是不是按照word formulation，可以从生成的label得到原有的字符串，
 21 |        这样变相的验证了，生成的各种map的正确性
 22 | """
 23 | 
 24 | 
 25 | def save_bbox_image(image_label, image_path):
 26 |     image = image_label.image
 27 |     bboxes = image_label.bboxes
 28 |     cv2.polylines(image,bboxes,True,(0,0,255))
 29 |     cv2.imwrite(image_path,image)
 30 | 
 31 | 
 32 | def save_image(name, gt, image=None, highlight=False):
 33 |     image = cv2.resize(image, shape)
 34 |     image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 35 | 
 36 |     if highlight:
 37 |         gt_mask = gt.copy()
 38 |         gt_mask[gt_mask > 0] = 1
 39 |         gt = 155 + 100 * gt / (gt.max() + 0.001)
 40 |         gt = gt * gt_mask
 41 |     else:
 42 |         gt = 255 * gt / (gt.max() + 0.001)
 43 | 
 44 |     image = np.ubyte(0.5 * gt + 0.5 * image)
 45 |     plt.clf()
 46 |     # plt.imshow(image)
 47 |     plt.imsave(name, image)  # 必须使用plt，才可以显示彩色的
 48 | 
 49 |     # cv2.imwrite(name,image)   # 如果使用cv2，出来的都是灰度的，不知道为何，plt对灰度的显示做了特殊的处理，使其彩色花了
 50 | 
 51 | 
 52 | def test_make_label(image_path, charset):
 53 |     dir, image_name = os.path.split(image_path)
 54 |     name, ext = os.path.splitext(image_name)
 55 |     if ext != ".png": return
 56 |     json_path = os.path.join(dir, name + ".txt")
 57 | 
 58 |     print("----------------------------------------------")
 59 |     print("Image: ", image_name)
 60 | 
 61 |     image = cv2.imread(image_path)
 62 | 
 63 |     f = open(json_path, encoding="utf-8")
 64 |     data = f.readlines()
 65 | 
 66 |     image_label = ImageLabel(image,
 67 |                              data,
 68 |                              format="plaintext",
 69 |                              target_size=(conf.INPUT_IMAGE_WIDTH, conf.INPUT_IMAGE_HEIGHT))
 70 | 
 71 |     generator = LabelGenerater(conf.MAX_SEQUENCE,
 72 |                                target_image_shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH),
 73 |                                charset=charset)
 74 | 
 75 |     character_segment, order_maps, localization_map = generator.process(image_label)
 76 | 
 77 |     if not os.path.exists(debug_dir): os.makedirs(debug_dir)
 78 | 
 79 |     save_bbox_image(image_label, os.path.join(debug_dir,f"{name}.jpg"))
 80 |     save_image(os.path.join(debug_dir, f"{name}_character_segment.jpg"), character_segment, image, True)
 81 |     save_image(os.path.join(debug_dir, f"{name}_localization_map.jpg"), localization_map, image)
 82 |     order_maps = order_maps.transpose(2, 0, 1)  # (H,W,S) => (S,H,W)
 83 | 
 84 |     for i, order_map in enumerate(order_maps):
 85 |         save_image(os.path.join(debug_dir, f"{name}_order_map_{i + 1}.jpg"), order_map, image)
 86 | 
 87 |     test_word_formulation(character_segment, charset, image_label, order_maps)
 88 | 
 89 | 
 90 | # 尝试还原结果，看看是不是可以在复原判断出原有的汉字，
 91 | # 主要是验证这样识别是不是一个合理的方法（通过标注来尝试，标注理论上应该是最容易得到正确字符的）
 92 | def test_word_formulation(character_segment_G, charset, image_label, order_maps_H):
 93 |     G = np.eye(len(charset))[character_segment_G]  # eye是对角阵生成函数，通过他，完成categorical one hot化
 94 |     H = order_maps_H
 95 |     # print("character_segment_G.shape:", character_segment_G.shape)
 96 |     # print("G.shape:", G.shape)
 97 |     # print("order_maps.shape/H:", order_maps_H.shape)
 98 | 
 99 |     pred = ""
100 |     indices,max_sum = None, None
101 |     for i, H_k in enumerate(H):
102 |         # G[H,W,C:4100] * H_k[H,W,1]
103 |         # G是每个像素字符的概率(1/4100)
104 |         # H_k是第k个字符对应的正态分布
105 |         # (G*H_k) ===> [H,W,4100]
106 |         # sum = \sum(G*H_k) ===> [4100]
107 | 
108 |         _H_k = H_k[:, :, np.newaxis]  # [H,W] => [H,W,1]
109 |         GH_k = (G * _H_k)
110 |         sum = np.sum(GH_k, axis=(0, 1))
111 |         id = sum.argmax()
112 |         print("sum max value:", sum[id])
113 | 
114 |         # print("max id of 4100:", id, ", max value is :", sum[id])
115 |         if id == 0:
116 |             indices = sum.argsort()
117 |             max_sum = sum[indices]
118 |             # print("top2 id:",indices[2:])
119 |             # print("top2 prob:",sum[indices])
120 |             break
121 | 
122 |         c = label_utils.id2str([int(id)], charset)
123 |         pred += c
124 | 
125 |     if image_label.label != pred:
126 |         print("Predict:[%s]" % pred)
127 |         print("Label  :[%s]" % image_label.label)
128 |         top = 2
129 |         print(f"Top {top}  :", indices[-top:])
130 |         print(f"Prob {top} :", max_sum[-top:])
131 |         print("Missed :", label_utils.id2str(indices[-top:].tolist(), charset))
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     logging.basicConfig(format="%(levelname)s %(message)s", level=logging.DEBUG)
136 | 
137 |     charset = label_utils.get_charset(charset_path)
138 | 
139 |     # test  目录里的所有
140 |     # dir = "data/train"
141 |     # files = os.listdir(dir)
142 |     # for f in files:
143 |     #     image_path = os.path.join(dir,f)
144 |     #     test_make_label(image_path, charset)
145 | 
146 |     # test 单张
147 |     test_make_label("data/train/3-5.png", charset)
148 |     # test_make_label("data/train/0-6.png", charset)
149 |     # test_make_label("data/train/0-23.png", charset)
150 |     # test_make_label("data/train/2-16.png", charset)
151 |     # test_make_label("data/train/1-22.png", charset)
152 | 


--------------------------------------------------------------------------------
/test/test_summary_image.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from tensorflow.keras.callbacks import Callback
 3 | from tensorflow.python.framework.ops import EagerTensor
 4 | from PIL import Image, ImageDraw, ImageFont
 5 | import matplotlib.pyplot as plt
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import logging
 9 | import io,cv2
10 | from PIL import Image
11 | 
12 | image = np.random.random((32,256))
13 | buffer = io.BytesIO()
14 | plt.imsave(buffer, image, format='jpg')
15 | image = Image.open(buffer).convert('RGB')
16 | image.save("../data/test.jpg")
17 | image = np.array(image)
18 | buffer.close()
19 | image.shape
20 | 
21 | writer = tf.summary.create_file_writer("../data/tboard")
22 | with writer.as_default():
23 |     tf.summary.image("test123", np.array([image]), step=0)


--------------------------------------------------------------------------------
/test/test_tensor_process.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.keras.backend as K
 3 | import numpy as np
 4 | G = np.random.random((10,64,256,512))
 5 | G = tf.convert_to_tensor(G)
 6 | H = np.random.random((10,64,256,3))
 7 | H = tf.convert_to_tensor(H)
 8 | 
 9 | 
10 | p_k_list = []
11 | for i in range(H.shape[-1]):
12 |     H_k = H[:,:,:,i]
13 |     H_k = H_k[:,:,:,tf.newaxis]
14 |     print("H_k:",H_k.shape)
15 |     GH = H_k*G
16 |     print("GH:", GH.shape)
17 |     p_k = K.sum(GH,axis=(1,2))
18 |     print("p_k:", p_k.shape)
19 |     print("------------")
20 |     p_k_list.append(p_k)
21 | pks = tf.stack(p_k_list) # P_k: (30, 10, 4100)
22 | pks = K.permute_dimensions(pks, (1,0,2))
23 | print("P_k:",pks.shape) # [10,30,4100]


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/utils/__init__.py


--------------------------------------------------------------------------------
/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2,numpy as np
 2 | import logging
 3 | import pyclipper
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | def show_image(img):
 8 |     # if img:plt.imshow(img)
 9 |     pass
10 | 
11 | # 图像缩放，高度都是64,这次的宽度，会和这个批次最宽的图像对齐填充padding
12 | def read_and_resize_image(image_names: list,conf):
13 | 
14 |     padded_images = []
15 | 
16 |     for image_name in image_names:
17 |         image = cv2.imread(image_name, cv2.IMREAD_COLOR)
18 |         if image is None:
19 |             logger.warning("图像%s读取失败",image_name)
20 |             continue
21 |         # logger.debug("读取文件[%s]:%r",image_name,image.shape)
22 |         h,w,_ = image.shape
23 |         ratio = conf.INPUT_IMAGE_HEIGHT/h # INPUT_IMAGE_HEIGHT
24 |         image = cv2.resize(image, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_AREA)
25 |         show_image(image)
26 |         # resize后，看实际宽度和要求的宽度，默认是256
27 |         dim_difference = conf.INPUT_IMAGE_WIDTH - image.shape[1]
28 |         if (dim_difference<0):
29 |             # 如果图像宽了，就直接resize到最大
30 |             padded_image = cv2.resize(image,(conf.INPUT_IMAGE_WIDTH,conf.INPUT_IMAGE_HEIGHT))
31 |         else:
32 |             # 否则，就给填充黑色,[(0, 0),(0, dim_difference),(0,0)]=>[高前后忽略,宽前忽略尾部加，通道前后忽略]
33 |             padded_image = np.pad(image, [(0, 0),(0, dim_difference),(0,0)], 'constant',constant_values=(0))
34 |         # show_image(padded_image)
35 |         # cv2.imwrite("data/test.jpg", padded_image)
36 |         padded_images.append(padded_image)
37 |         # logger.debug("resize文件[%s]:%r", image_name, padded_image.shape)
38 | 
39 |     images = np.stack(padded_images,axis=0)
40 |     # logger.debug("图像的shape：%r",images.shape)
41 |     return images
42 | 
43 | def perimeter(polys):
44 |     # 计算周长
45 |     p = 0
46 |     nums = polys.shape[0]
47 |     for i in range(nums):
48 |         p += abs(np.linalg.norm(polys[i % nums] - polys[(i + 1) % nums]))
49 |     # logger.debug('perimeter:{}'.format(p))
50 |     return p
51 | 
52 | # 参考：https://blog.csdn.net/m_buddy/article/details/105614620
53 | # polys[N,2]
54 | def shrink_poly(polys, ratio=0.5):
55 | 
56 |     if type(polys)==list:
57 |         polys = np.array(polys)
58 | 
59 |     if ratio==1: return polys
60 | 
61 |     """
62 |     收缩多边形
63 |     :param polys: 多边形
64 |     :param ratio: 收缩比例
65 |     :return:
66 |     """
67 |     area = abs(pyclipper.Area(polys)) # 面积
68 |     _perimeter = perimeter(polys) # 周长
69 | 
70 |     pco = pyclipper.PyclipperOffset()
71 |     if _perimeter:
72 |         # TODO:不知道为何这样计算???
73 |         d = area * (1 - ratio * ratio) / _perimeter
74 |         pco.AddPath(polys, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
75 |         # 缩小后返回多边形
76 |         polys_shrink = pco.Execute(-d)
77 |     else:
78 |         logger.warning("多边形周长为0")
79 |         return None
80 | 
81 |     if len(polys_shrink)==0:
82 |         logger.debug("收缩多边形[面积=%f]失败，使用原有坐标",area)
83 |         return polys
84 |     shrinked_bbox = np.array(polys_shrink[0])
85 |     return shrinked_bbox
86 | 
87 | if __name__=="__main__":
88 |     import conf
89 |     read_and_resize_image("data/test.jpg", conf)
90 | 


--------------------------------------------------------------------------------
/utils/label/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/utils/label/__init__.py


--------------------------------------------------------------------------------
/utils/label/label.py:
--------------------------------------------------------------------------------
  1 | from utils import util
  2 | import numpy as np
  3 | import json, cv2
  4 | import logging
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | 
  8 | 
  9 | class ImageLabel:
 10 |     """
 11 |         Wrap the image and label data
 12 |         there are 2 types:
 13 |         - labelme format: like https://github.com/wkentaro/labelme/blob/master/examples/tutorial/apc2016_obj3.json
 14 |         - plaintext: like
 15 |                 >>>
 16 |                 你好，世界
 17 |                 11,12,21,22,31,32,41,42,你
 18 |                 ...
 19 |                 <<<
 20 |         more, ImageLabel is in charge of resizing to standard size (64x256).
 21 |     """
 22 | 
 23 |     def __init__(self, image, data, format, target_size):
 24 |         self.format = format
 25 |         self.image = cv2.resize(image, target_size) # do the standard resizing
 26 | 
 27 |         self.target_size = target_size  # (W,H)
 28 |         self.orignal_size = (image.shape[1], image.shape[0])  # (W,H)
 29 | 
 30 |         self.labels = self.load(data)
 31 | 
 32 |     def load(self, data):
 33 | 
 34 |         if self.format == "labelme":
 35 |             return self._load_labelme(data)
 36 | 
 37 |         if self.format == "plaintext":
 38 |             return self._load_plaintext(data)
 39 | 
 40 |         raise ValueError("Unknow label type:", self.format)
 41 | 
 42 |     # labelme json format reference: https://github.com/wkentaro/labelme/blob/master/examples/tutorial/apc2016_obj3.json
 43 |     def _load_labelme(self, data):
 44 | 
 45 |         assert type(data) == list
 46 | 
 47 |         data = "".join(data)
 48 | 
 49 |         image_labels = json.loads(data)
 50 |         shapes = image_labels['shapes']
 51 |         labels = []
 52 |         for s in shapes:
 53 |             label = s['label']
 54 |             points = s['points']
 55 |             points = util.resize_bboxes(points, original_size=self.orignal_size, target_size=self.target_size)
 56 |             labels.append(Label(label, points))
 57 |         return labels
 58 | 
 59 |     # format：
 60 |     #   你好，世界
 61 |     #   11,12,21,22,31,32,41,42,你
 62 |     #   11,12,21,22,31,32,41,42,好
 63 |     #   ....
 64 |     def _load_plaintext(self, data):
 65 | 
 66 |         assert type(data) == list
 67 | 
 68 |         # data[0], bypass the first line, which is the label strings
 69 | 
 70 |         # parse line #2 to end
 71 |         labels = []
 72 |         for i in range(1, len(data)):
 73 |             # "11,12,21,22,31,32,41,42,你"
 74 |             line = data[i]
 75 |             line = line.replace(" ", "")
 76 |             line = line.replace("\n", "")
 77 | 
 78 |             line_data = line.split(",")
 79 |             points = line_data[:8]
 80 |             label = line_data[8]
 81 | 
 82 |             # handle exceptional case: "11,12,21,22,31,32,41,42,,"
 83 |             if line[-2:]==",,":
 84 |                 label = ","
 85 | 
 86 |             # "11,12,21,22,31,32,41,42" => [[11,12],[21,22],[31,32],[41,42]]
 87 |             points = [int(p.strip()) for p in points]
 88 |             points = np.array(points)
 89 |             points = np.reshape(points, (4, 2))
 90 | 
 91 |             # adjust all bboxes' coordinators
 92 |             points = util.resize_bboxes(points, original_size=self.orignal_size, target_size=self.target_size)
 93 | 
 94 |             # logger.debug("resized bbox:%r", points)
 95 | 
 96 |             labels.append(Label(label, points))
 97 |         return labels
 98 | 
 99 |     @property
100 |     def bboxes(self):
101 |         return np.array([l.bbox for l in self.labels])
102 | 
103 |     @property
104 |     def label(self):
105 |         return "".join([l.label for l in self.labels])
106 | 
107 | class Label:
108 |     """
109 |         Single word label format:
110 |             "label": "X",
111 |             "points": [ [x1,y1],....,[xn,yn]]
112 |     """
113 | 
114 |     def __init__(self, label, bbox):
115 |         if type(bbox) == list:
116 |             bbox = np.array(bbox)
117 |         assert bbox.shape == (4, 2)
118 |         assert label is not None and label != " " and label!=""
119 |         self.bbox = bbox
120 |         self.label = label
121 | 


--------------------------------------------------------------------------------
/utils/label/label_maker.py:
--------------------------------------------------------------------------------
  1 | from utils.label import label_utils
  2 | import scipy.ndimage.filters as fi
  3 | from utils import image_utils
  4 | import numpy as np
  5 | import logging
  6 | import cv2
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class LabelGenerater():
 12 |     """
 13 |     The class is used to generate the GT labels.
 14 |     In loss function, we need 3 GT: Q,H,G
 15 |     Refer to : http://www.piginzoo.com/machine-learning/2020/04/14/ocr-fa-textscanner#%E5%85%B3%E4%BA%8E%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0
 16 |     - Order map GT : H
 17 |     - Localization map GT : Q
 18 |     - Character Segmentation : G
 19 |     """
 20 |     shrink = 1  # shrink ratio for one character wrapped polygon
 21 |     ζ = 0.5  # threshold for normalization
 22 |     δ = 5  # variation for Gaussian distribution
 23 | 
 24 |     def __init__(self, max_sequence, target_image_shape, charset):
 25 |         self.max_sequence = max_sequence
 26 |         self.target_image_shape = target_image_shape  # [H,W]: [64,256]
 27 |         self.target_width = target_image_shape[1]
 28 |         self.target_height = target_image_shape[0]
 29 |         self.charset = charset
 30 | 
 31 |     # # adjust all polygens' co-ordinations
 32 |     # def _adjust_by_size(self, boxes, original_shape):
 33 |     #     assert len(boxes.shape) == 2 or len(boxes.shape) == 3
 34 |     #
 35 |     #     ratio_x = original_shape[1] / self.target_width
 36 |     #     ratio_y = original_shape[0] / self.target_height
 37 |     #
 38 |     #     if len(boxes.shape) == 3:
 39 |     #         boxes[:, :, 0] = (boxes[:, :, 0] / ratio_x).clip(0, self.target_width)
 40 |     #         boxes[:, :, 1] = (boxes[:, :, 1] / ratio_y).clip(0, self.target_heigth)
 41 |     #     else:
 42 |     #         boxes[:, 0] = (boxes[:, 0] / ratio_x).clip(0, self.target_width)
 43 |     #         boxes[:, 1] = (boxes[:, 1] / ratio_y).clip(0, self.target_heigth)
 44 |     #
 45 |     #     boxes = (boxes + .5).astype(np.int32)
 46 |     #     return boxes
 47 | 
 48 |     # data is ImageLabel{image,[Label]}
 49 |     def process(self, image_labels):
 50 | 
 51 |         # adjust the coordination
 52 |         shape = image_labels.image.shape[:2]  # h,w
 53 |         boxes = image_labels.bboxes  # [N,4,2] N: words number
 54 |         label = image_labels.label
 55 | 
 56 |         # # find the one bbox boundary
 57 |         # xmins = boxes[:, :, 0].min(axis=1)
 58 |         # xmaxs = np.maximum(boxes[:, :, 0].max(axis=1), xmins + 1)
 59 |         # ymins = boxes[:, :, 1].min(axis=1)
 60 |         # ymaxs = np.maximum(boxes[:, :, 1].max(axis=1), ymins + 1)
 61 | 
 62 |         character_segment = self.render_character_segemention(image_labels)
 63 |         localization_map = np.zeros(self.target_image_shape, dtype=np.float32)
 64 |         order_segments = np.zeros((*self.target_image_shape, self.max_sequence), dtype=np.float32)
 65 |         #order_maps = np.zeros((*self.target_image_shape, self.max_sequence), dtype=np.float32)
 66 | 
 67 |         assert boxes.shape[0] <= self.max_sequence, \
 68 |             f"the train/validate label text length[{len(image_labels.labels)}] must be less than pre-defined max sequence length[{self.max_sequence}]"
 69 | 
 70 |         # process each character
 71 |         for i in range(boxes.shape[0]):
 72 |             # Y_hat_k is the normalized_gaussian map, comply with the name in the paper
 73 |             Y_hat_k = self.generate_Y_hat_k_by_gaussian_normalize(self.target_image_shape,
 74 |                                                                   boxes[i])  # xmins[i], xmaxs[i], ymins[i], ymaxs[i])
 75 |             if Y_hat_k is None:
 76 |                 logger.warning("Y_%d generator failed,the char[%s] of [%s]", i, label[i], label)
 77 |                 Y_hat_k = np.zeros((self.target_image_shape))
 78 | 
 79 |             self.render_order_segment(order_segments[:, :, i], Y_hat_k, threshold=self.ζ)
 80 |             localization_map = self.render_localization_map(localization_map, Y_hat_k)
 81 |             #order_maps = order_segments * localization_map[:, :, np.newaxis]
 82 | 
 83 |         return character_segment, order_segments, localization_map
 84 | 
 85 |     # 围绕中心点做一个高斯分布，但是由于每个点的概率值过小，所以要做一个归一化,使得每个点的值归一化到[0,1]之间
 86 |     # Make a gaussian distribution with the center, and do normalization
 87 |     # def gaussian_normalize(self, shape, xmin, xmax, ymin, ymax)：
 88 |     # @return a "image" with shape[H,W], which is filled by a gaussian distribution
 89 |     def generate_Y_hat_k_by_gaussian_normalize(self, shape, one_word_bboxes):  # one_word_bboxes[4,2]
 90 |         # logger.debug("The word bbox : %r , image shape is : %r", one_word_bboxes, shape)
 91 | 
 92 |         # find the one bbox boundary
 93 |         xmin = one_word_bboxes[:, 0].min()
 94 |         xmax = one_word_bboxes[:, 0].max()
 95 |         ymin = one_word_bboxes[:, 1].min()
 96 |         ymax = one_word_bboxes[:, 1].max()
 97 | 
 98 |         out = np.zeros(shape)
 99 |         h, w = shape[:2]
100 |         # find the "Center" of polygon
101 |         y = (ymax + ymin + 1) // 2
102 |         x = (xmax + xmin + 1) // 2
103 |         if x > w or y > h:
104 |             logger.warning("标注超出图像范围，生成高斯样本失败：(xmin:%f, xmax:%f, ymin:%f, ymax:%f,w:%f,x:%f,h:%f,y:%f)", xmin, xmax,
105 |                            ymin, ymax, w, x, h, y)
106 |             return None
107 | 
108 |         # prepare the gaussian distribution,refer to paper <<Label Generation>>
109 |         out[y, x] = 1.
110 | 
111 |         fi.gaussian_filter(out, (self.δ, self.δ), output=out, mode='mirror')
112 | 
113 |         # logger.debug("Max gaussian value is :%f", out.max()) # it is 0.006367
114 |         if out is None: return None
115 | 
116 |         return out
117 | 
118 |     def render_order_segment(self, order_maps, Y_k, threshold):
119 |         Z_hat_k = Y_k / Y_k.max()
120 |         Z_hat_k[Z_hat_k < threshold] = 0
121 |         # Z_hat_k[Z_hat_k >= threshold] = 1
122 |         order_maps[:] = Z_hat_k
123 | 
124 |     # fill the shrunk zone with the value of character ID
125 |     def render_character_segemention(self, image_labels):
126 | 
127 |         character_segment = np.zeros(self.target_image_shape, dtype=np.int32)
128 | 
129 |         for one_word_label in image_labels.labels:
130 |             label = one_word_label.label
131 |             char_id = label_utils.str2id(label, self.charset)
132 | 
133 |             # shrink one word bboxes to avoid overlap
134 |             shrinked_poly = image_utils.shrink_poly(one_word_label.bbox, self.shrink)
135 | 
136 |             word_fill = np.zeros(self.target_image_shape, np.uint32)
137 | 
138 |             word_fill.fill(char_id)
139 | 
140 |             mask = np.zeros(self.target_image_shape, np.uint8)
141 |             cv2.fillPoly(mask, [shrinked_poly], 1)  # set those words' bbox area value to 1
142 |             character_segment = np.maximum(mask * word_fill, character_segment) # merge two, only by maximum, not add
143 |             character_segment.astype(np.int32)
144 | 
145 |         return character_segment
146 | 
147 |     # merge all Y_k with Max value
148 |     def render_localization_map(self, localization_map, Y_k):
149 |         return np.maximum(localization_map, Y_k)
150 | 


--------------------------------------------------------------------------------
/utils/label/label_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from Levenshtein import *
  3 | import numpy as np
  4 | import logging
  5 | import re
  6 | import os
  7 | 
  8 | logger = logging.getLogger("Data_Util")
  9 | 
 10 | rex = re.compile(' ')
 11 | 
 12 | 
 13 | def caculate_edit_distance(preds, labels):
 14 |     distances = [distance(p, l) for p, l in zip(preds, labels)]
 15 |     return sum(distances) / len(distances)
 16 | 
 17 | 
 18 | # pred[seq,3770] => xxxx
 19 | def prob2str(pred, charset):
 20 |     # 得到当前时间的输出，是一个3770的概率分布，所以要argmax，得到一个id
 21 |     decoder_index = np.argmax(pred, axis=-1)  # decoder_index[seq]
 22 |     # logger.debug("decoder_index:%r",decoder_index)
 23 |     return id2str(decoder_index, charset)
 24 | 
 25 | 
 26 | # result[b,seq] => [xx,yy,..,zz]
 27 | def ids2str(results, characters):
 28 |     values = []
 29 |     for r in results:  # 每个句子
 30 |         values.append(id2str(r))
 31 |     return values
 32 | 
 33 | 
 34 | # id[1,3,56,4,35...] => xyzqf...
 35 | def id2str(ids, characters):
 36 |     str = [characters[int(id)] for id in ids]  # 每个字
 37 |     result = ''.join(c for c in str if c != '\n')
 38 |     return result
 39 | 
 40 | 
 41 | # 'c' => 215
 42 | def str2id(str_val, characters):
 43 |     if not str_val in characters:
 44 |         logger.warning("字符[{}]在字典中不存在".format(str_val))
 45 |         return 0
 46 |     return characters.index(str_val)
 47 | 
 48 | 
 49 | # 'abc' => [213,214,215]
 50 | def strs2id(strings, characters):
 51 |     ids = []
 52 |     for c in strings:
 53 |         ids.append(str2id(c,characters))
 54 |     return ids
 55 | 
 56 | 
 57 | # load charset, the first one is foreground, left are characters
 58 | def get_charset(charset_file):
 59 |     charset = open(charset_file, 'r', encoding='utf-8').readlines()
 60 |     charset = [ch.strip("\n") for ch in charset]
 61 |     charset = "".join(charset)
 62 |     charset = list(charset)
 63 |     charset.insert(0, ' ')  # this is important to for character map
 64 |     logger.info(" Load character table, totally [%d] characters", len(charset))
 65 |     return charset
 66 | 
 67 | 
 68 | # 字符串
 69 | def caculate_accuracy(preds, labels):
 70 |     result = [p == l for p, l in zip(preds, labels)]
 71 |     return np.array(result).mean()
 72 | 
 73 | 
 74 | # 从文件中读取样本路径和标签值
 75 | # >data/train/21.png )beiji
 76 | # >data/train/22.png 市平谷区金海
 77 | # >data/train/23.png 江中路53
 78 | # bin_num:分箱个数
 79 | def read_data_file(label_file_name, process_num=None):
 80 |     f = open(label_file_name, 'r', encoding="utf-8")
 81 |     data = []
 82 |     count = 0
 83 |     for line in f:
 84 | 
 85 |         if process_num and count > process_num:
 86 |             logger.debug("加载完成！仅仅加载[%d]条数据，", process_num)
 87 |             break
 88 | 
 89 |         filename, _, label = line[:-1].partition(' ')  # partition函数只读取第一次出现的标志，分为左右两个部分,[:-1]去掉回车
 90 |         # print(filename,":",label)
 91 |         data.append((filename, label))
 92 |         count += 1
 93 |     f.close()
 94 |     return data
 95 | 
 96 | 
 97 | def load_labels(label_dir):
 98 |     files = os.listdir(label_dir)
 99 |     image_labels = []
100 |     for f in files:
101 |         label_path = os.path.join(label_dir, f)
102 |         name, ext = os.path.splitext(f)
103 | 
104 |         if ext.upper() != ".JSON" and ext.upper() != ".TXT": continue
105 | 
106 |         image_path = None
107 |         image_subfix = [".jpg", ".png", ".jpeg"]
108 |         for subfix in image_subfix:
109 |             __image_path = os.path.join(label_dir, name + subfix)
110 | 
111 |             if os.path.exists(__image_path):
112 |                 image_path = __image_path
113 |                 break
114 | 
115 |         if image_path:
116 |             image_labels.append([image_path, label_path])
117 | 
118 |     return image_labels
119 | 
120 | 
121 | # !!! 此方法已废弃，加载目前采用fit_generator的multiprocess=True+Work=10的方式，不用自己去创建多进程了
122 | # 从文件中读取样本路径和标签值，并放入分箱中，为了是每个箱子多进程加载
123 | # >data/train/21.png )beiji
124 | # >data/train/22.png 市平谷区金海
125 | # >data/train/23.png 江中路53
126 | # bin_num:分箱个数
127 | def read_data_file_bucket(label_file_name, process_num):
128 |     f = open(label_file_name, 'r', encoding="utf-8")
129 |     data = []
130 |     for line in f:
131 |         filename, _, label = line[:-1].partition(' ')  # partition函数只读取第一次出现的标志，分为左右两个部分,[:-1]去掉回车
132 |         # print(filename,":",label)
133 |         data.append((filename, label))
134 |     f.close()
135 | 
136 |     logger.debug("从[%s]中读取了所有原始数据，一共[%d]行", label_file_name, len(data))
137 | 
138 |     # chunks函数用于分箱
139 |     def chunks(l, step):
140 |         for i in range(0, len(l), step):
141 |             yield l[i:i + step]
142 | 
143 |     # print(len(data),process_num)
144 |     data_list = list(chunks(data, len(data) // process_num))
145 |     # print(data_list)
146 |     logger.debug("所有数据[%d]条，被分箱到[%d]中", len(data), process_num)
147 | 
148 |     return data_list
149 | 
150 | 
151 | def process_lines(charsets, data):
152 |     result = []
153 |     for d in data:
154 |         # print(d)
155 |         file, label = d
156 |         filename, labels_index = process_line(file, label, charsets)
157 |         if filename is None: continue
158 |         if labels_index is None: continue
159 |         result.append((filename, labels_index))
160 |     return result
161 | 
162 | 
163 | # 处理每一行数据：data/train/22.png 市平谷区金海
164 | # 返回的是filename,labels_index
165 | def process_line(filename, label, charsets):
166 |     if not os.path.exists(filename):
167 |         logger.warning("标签文件[%s]不存在啊", filename)
168 |         return None, None
169 | 
170 |     processed_label = process_unknown_charactors(label, charsets)
171 |     if processed_label is None or len(processed_label) == 0:
172 |         logger.error("解析标签字符串失败，忽略此样本：[%s]", label)
173 |         return None, None
174 | 
175 |     labels_index = convert_labels_to_ids(processed_label, charsets)
176 |     if labels_index is None:
177 |         return None, None
178 | 
179 |     return filename, labels_index
180 | 
181 | 
182 | # labels是所有的标签的数组['我爱北京','我爱天安门',...,'他说的法定']
183 | # characters:词表
184 | def convert_to_id(labels, characters):
185 |     _lables = []
186 |     for one in labels:
187 |         _lables.append([characters.index(l) for l in one])
188 | 
189 |     return _lables
190 | 
191 | 
192 | def process_unknown_charactors_all(all_sentence, dict, replace_char=None):
193 |     result = []
194 |     for sentence in all_sentence:
195 |         result.append(process_unknown_charactors(sentence, dict, replace_char))
196 |     return result
197 | 
198 | 
199 | # 1.处理一些“宽”字符,替换成词表里的
200 | # 2.易混淆的词，变成统一的
201 | # 3.对不认识的词表中的词，是否替换成某个字符，如果不与替换，就直接返回空
202 | def process_unknown_charactors(sentence, dict, replace_char=None):
203 |     unkowns = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ！＠＃＄％＾＆＊（）－＿＋＝｛｝［］｜＼＜＞，．。；：､？／×·■"
204 |     knows = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()-_+={}[]|\<>,.。;:、?/x.."
205 | 
206 |     result = ""
207 | 
208 |     # 先去除空格
209 |     sentence = rex.sub('', sentence)
210 | 
211 |     for one in sentence:
212 |         # 对一些特殊字符进行替换，替换成词表的词
213 |         i = unkowns.find(one)
214 |         if i == -1:
215 |             letter = one
216 |         else:
217 |             letter = knows[i]
218 |             # logger.debug("字符[%s]被替换成[%s]", one, letter)
219 | 
220 |         # 看是否在字典里，如果不在，给替换成一个怪怪的字符'■'来训练，也就是不认识的字，都当做一类，这个是为了将来识别的时候，都可以明确识别出来我不认识，而且不会浪费不认识的字的样本
221 |         # 但是，转念又一想，这样也不好，容易失去后期用形近字纠错的机会，嗯，算了，还是返回空，抛弃这样的样本把
222 |         if letter not in dict:
223 |             if replace_char:
224 |                 letter = replace_char  # '■'
225 |             else:
226 |                 logger.error("句子[%s]的字[%s]不属于词表,剔除此样本", sentence, letter)
227 |                 return None
228 | 
229 |         result += letter
230 |     return result
231 | 
232 | 
233 | # 将labels转换为one_hot, "我爱北京"=> [(0,0,0,......,0,1,0,.....0,0),(0,0,0,......,0,1,0,.....0,0),..]维度是词表维度
234 | def convert_labels_to_ids(label, charsets):
235 |     labels_index = []
236 |     for l in label:
237 |         if not l in charsets:
238 |             logger.error("字符串[%s]中的字符[%s]未在词表中", label, l)
239 |             return None
240 |         labels_index.append(charsets.index(l))
241 |     return labels_index
242 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | import os
 4 | from logging import handlers
 5 | import datetime
 6 | import tensorflow as tf
 7 | 
 8 | Tensor_DEBUG = "None"  # tensor | shape | None
 9 | 
10 | 
11 | def _p(tensor, msg):
12 |     if Tensor_DEBUG == "tensor":
13 |         dt = datetime.datetime.now().strftime('[ TF_DEBUG ] : %m-%d %H:%M:%S: ')
14 |         msg = dt + msg
15 |         tensor = tf.Print(tensor, [tf.shape(tensor)], msg, summarize=100)
16 |         return tf.Print(tensor, [tensor], "", summarize=100)
17 | 
18 |     if Tensor_DEBUG == "shape":
19 |         dt = datetime.datetime.now().strftime('[ TF_DEBUG ] : %m-%d %H:%M:%S: ')
20 |         msg = dt + msg
21 |         return tf.Print(tensor, [tf.shape(tensor)], msg, summarize=100)
22 | 
23 |     return tensor
24 | 
25 | 
26 | def init(level=logging.DEBUG, when="D", backup=7,
27 |          _format="%(levelname)s: %(asctime)s: %(filename)s:%(lineno)d %(message)s"):
28 |     train_start_time = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
29 |     filename = 'logs/textscanner-' + train_start_time + '.log'
30 |     _dir = os.path.dirname(filename)
31 |     if not os.path.isdir(_dir): os.makedirs(_dir)
32 | 
33 |     logger = logging.getLogger()
34 |     if not logger.handlers:
35 |         formatter = logging.Formatter(_format)
36 |         logger.setLevel(level)
37 | 
38 |         handler = handlers.TimedRotatingFileHandler(filename, when=when, backupCount=backup, encoding="utf-8")
39 |         handler.setLevel(level)
40 |         handler.setFormatter(formatter)
41 |         logger.addHandler(handler)
42 | 
43 |         handler = logging.StreamHandler()
44 |         handler.setLevel(level)
45 |         handler.setFormatter(formatter)
46 |         logger.addHandler(handler)
47 | 


--------------------------------------------------------------------------------
/utils/sequence.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.keras.preprocessing.sequence import pad_sequences
  2 | from utils.label.label_maker import LabelGenerater
  3 | from tensorflow.keras.utils import to_categorical
  4 | from tensorflow.keras.utils import Sequence
  5 | from utils.label.label import ImageLabel
  6 | from utils.label import label_utils
  7 | import time, cv2, os
  8 | import logging, math
  9 | import numpy as np
 10 | 
 11 | logger = logging.getLogger("SequenceData")
 12 | 
 13 | 
 14 | class SequenceData(Sequence):
 15 |     def __init__(self, name, label_dir, label_file, charsets, conf, args, batch_size=32):
 16 |         self.conf = conf
 17 |         self.label_dir = label_dir
 18 |         self.name = name
 19 |         self.label_file = label_file
 20 |         self.batch_size = batch_size
 21 |         self.charsets = charsets
 22 |         self.initialize(args)
 23 |         self.start_time = time.time()
 24 |         self.target_image_shape = (conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH)
 25 |         self.label_generator = LabelGenerater(conf.MAX_SEQUENCE, self.target_image_shape, charsets)
 26 | 
 27 |     def __len__(self):
 28 |         return int(math.ceil(len(self.data_list) / self.batch_size))
 29 | 
 30 |     def load_image_label(self, batch_data_list):
 31 | 
 32 |         images = []
 33 |         batch_cs = []   # Character Segment
 34 |         batch_os = []   # Order Segment
 35 |         # batch_om = [] # Order Map
 36 |         batch_lm = []   # Localization Map
 37 |         label_text = [] # label text
 38 |         for image_path, label_path in batch_data_list:
 39 | 
 40 |             if not os.path.exists(image_path):
 41 |                 logger.warning("Image [%s] does not exist", image_path)
 42 |                 continue
 43 | 
 44 |             label_file = open(label_path, encoding="utf-8")
 45 |             data = label_file.readlines()
 46 |             label_file.close()
 47 |             logger.debug("Loaded label file [%s] %d lines", label_path, len(data))
 48 |             target_size = (self.target_image_shape[1], self.target_image_shape[0])
 49 |             il = ImageLabel(cv2.imread(image_path), data, self.conf.LABLE_FORMAT,
 50 |                             target_size=target_size)  # inside it, the bboxes size will be adjust
 51 |             logger.debug("Loaded label generates training labels")
 52 | 
 53 |             images.append(il.image)
 54 | 
 55 |             # text label
 56 |             label = il.label
 57 |             label_ids = label_utils.strs2id(label, self.charsets)
 58 |             label_text.append(label_ids)
 59 | 
 60 |             # character_segment, order_maps, localization_map = self.label_generator.process(il)
 61 |             character_segment, order_sgementation, localization_map = self.label_generator.process(il)
 62 |             character_segment = to_categorical(character_segment, num_classes=len(self.charsets) + 1)
 63 | 
 64 |             batch_cs.append(character_segment)
 65 |             # batch_om.append(order_maps)
 66 |             batch_os.append(order_sgementation)
 67 |             batch_lm.append(localization_map)
 68 | 
 69 |         images = np.array(images, np.float32)
 70 |         batch_cs = np.array(batch_cs)
 71 |         # batch_om = np.array(batch_om)
 72 |         batch_os = np.array(batch_os)
 73 |         batch_lm = np.array(batch_lm)
 74 | 
 75 |         # text one hot array
 76 |         labels = pad_sequences(label_text, maxlen=self.conf.MAX_SEQUENCE, padding="post", value=0)
 77 |         labels = to_categorical(labels, num_classes=len(self.charsets))
 78 | 
 79 |         # logger.debug("Loaded images:  %r", images.shape)
 80 |         # logger.debug("Loaded batch_cs:%r", batch_cs.shape)
 81 |         # logger.debug("Loaded batch_om:%r", batch_om.shape)
 82 |         # logger.debug("Loaded batch_lm:%r", batch_lm.shape)
 83 |         # logger.debug("[%s] loaded %d data", self.name, len(images))
 84 | 
 85 |         return images, [batch_cs, batch_os, batch_lm,labels]
 86 | 
 87 |     def __getitem__(self, idx):
 88 |         batch_data_list = self.data_list[idx * self.batch_size: (idx + 1) * self.batch_size]
 89 |         images, labels = self.load_image_label(batch_data_list)
 90 |         return images, labels
 91 | 
 92 |     def on_epoch_end(self):
 93 |         np.random.shuffle(self.data_list)
 94 |         duration = time.time() - self.start_time
 95 |         self.start_time = time.time()
 96 |         logger.debug("[%s] Epoch done, elapsed time[%d]s，re-shuffle", self.name, duration)
 97 | 
 98 |     def initialize(self, args):
 99 |         logger.info("[%s]begin to load image/labels", self.name)
100 |         start_time = time.time()
101 |         self.data_list = label_utils.load_labels(self.label_dir)
102 |         if len(self.data_list) == 0:
103 |             msg = f"[{self.name}] 图像和标签加载失败[目录：{self.label_dir}]，0条！"
104 |             raise ValueError(msg)
105 | 
106 |         logger.info("[%s]loaded [%d] labels,elapsed time [%d]s", self.name, len(self.data_list),
107 |                     (time.time() - start_time))
108 | 


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import numpy as np
 3 | import logging
 4 | import time
 5 | import conf
 6 | import os
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def call_debug(layer, *input):
12 |     if not conf.DEBUG:
13 |         return layer(*input)
14 | 
15 |     layer_name = "Unknown"
16 |     if hasattr(layer, "__name__"):
17 |         layer_name = layer.__name__
18 |     if hasattr(layer, "name"):
19 |         layer_name = layer.name
20 | 
21 |     input_shape = "Unknown"
22 |     if type(input[0]) == list:
23 |         input_shape = str([str(i.shape) for i in input[0]])
24 |     if hasattr(input[0], "shape"):
25 |         input_shape = str(input[0].shape)
26 | 
27 |     assert callable(layer), "layer[" + layer_name + "] is callable"
28 |     output = layer(*input)
29 | 
30 |     if type(output) == list or type(output) == tuple:
31 |         output_shape = str([str(o.shape) for o in output])
32 |     else:
33 |         output_shape = str(output.shape)
34 | 
35 |     logger.debug("Layer: {:25s}    {:30s} => {:30s}".format(layer_name, input_shape, output_shape))
36 |     return output
37 | 
38 | 
39 | def timestamp_s():
40 |     s = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
41 |     return s
42 | 
43 | 
44 | # bboxes[4,2]
45 | def resize_bboxes(bboxes, original_size, target_size):
46 |     # logger.debug("Adjust bboxes from %r => %r", original_size, target_size)
47 |     # logger.debug("Original:%r",bboxes)
48 | 
49 |     w, h = original_size
50 |     ratio_x = target_size[0] / w
51 |     ratio_y = target_size[1] / h
52 | 
53 |     # logger.debug("ratio_x:%r",ratio_x)
54 |     # logger.debug("ratio_y:%r", ratio_y)
55 | 
56 |     # bboxes: [4,2]
57 |     bboxes = np.array(bboxes)
58 |     bboxes[:, 0] = bboxes[:, 0] * ratio_x
59 |     bboxes[:, 1] = bboxes[:, 1] * ratio_y
60 | 
61 |     # if the coordinator is minus, set to 0
62 |     minus_indices = bboxes < 0
63 |     bboxes[minus_indices] = 0
64 | 
65 |     # adjust out boundary for width to max width
66 |     out_bound_indices = bboxes[:, 0] > target_size[0]
67 |     bboxes[out_bound_indices, 0] = target_size[0]
68 | 
69 |     # adjust out boundary for height to max height
70 |     out_bound_indices = bboxes[:, 1] > target_size[1]
71 |     bboxes[out_bound_indices, 1] = target_size[1]
72 | 
73 |     # logger.debug("Resized:%r",bboxes)
74 | 
75 |     return bboxes.tolist()
76 | 
77 | 
78 | def get_checkpoint(dir):
79 |     if not os.path.exists(dir):
80 |         logger.info("找不到最新的checkpoint文件")
81 |         return None
82 | 
83 |     list = os.listdir(dir)
84 |     if len(list) == 0:
85 |         logger.info("找不到最新的checkpoint文件")
86 |         return None
87 | 
88 |     list.sort(key=lambda fn: os.path.getmtime(os.path.join(dir, fn)))
89 | 
90 |     latest_model_name = os.path.join(dir, list[-1])
91 | 
92 |     logger.debug("在目录%s中找到最新的模型文件：%s", dir, latest_model_name)
93 | 
94 |     return latest_model_name
95 | 


--------------------------------------------------------------------------------
/utils/val_sequence.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 2 | from tensorflow.keras.utils import to_categorical
 3 | from utils.label.label import ImageLabel
 4 | from utils.sequence import SequenceData
 5 | from utils.label import label_utils
 6 | import numpy as np
 7 | import cv2, os
 8 | import logging
 9 | 
10 | logger = logging.getLogger("SequenceData")
11 | 
12 | 
13 | class ValidationSequenceData(SequenceData):
14 | 
15 |     def load_image_label(self, batch_data_list):
16 | 
17 |         images = []
18 |         labels = []
19 |         for image_path, label_path in batch_data_list:
20 | 
21 |             if not os.path.exists(image_path):
22 |                 logger.warning("Image [%s] does not exist", image_path)
23 |                 continue
24 | 
25 |             label_file = open(label_path, encoding="utf-8")
26 |             data = label_file.readlines()
27 |             label_file.close()
28 |             logger.debug("Loaded label file [%s] %d lines", label_path, len(data))
29 |             target_size = (self.target_image_shape[1], self.target_image_shape[0])
30 |             il = ImageLabel(cv2.imread(image_path), data, self.conf.LABLE_FORMAT,
31 |                             target_size=target_size)  # inside it, the bboxes size will be adjust
32 | 
33 |             images.append(il.image)
34 |             label = il.label
35 |             label_ids = label_utils.strs2id(label, self.charsets)
36 |             labels.append(label_ids)
37 | 
38 |         # import pdb; pdb.set_trace()
39 |         labels = pad_sequences(labels, maxlen=self.conf.MAX_SEQUENCE, padding="post", value=0)
40 |         labels = to_categorical(labels, num_classes=len(self.charsets))
41 |         images = np.array(images, np.float32)
42 |         return images, np.array(labels)


--------------------------------------------------------------------------------
/utils/visualise_callback.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.python.framework.ops import EagerTensor
  2 | from tensorflow.keras.callbacks import Callback
  3 | from PIL import Image, ImageFont
  4 | import matplotlib.pyplot as plt
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import logging
  8 | import io, cv2
  9 | 
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class TBoardVisual(Callback):
 15 |     """
 16 |         Visualization the training process for debugging
 17 |     """
 18 | 
 19 |     def __init__(self, tag, tboard_dir, charset, args, validate_sequence):
 20 |         super().__init__()
 21 |         self.tag = tag
 22 |         self.args = args
 23 |         self.tboard_dir = tboard_dir
 24 |         self.charset = charset
 25 |         self.font = ImageFont.truetype("data/fonts/simsun.ttc", 10)  # 设置字体
 26 |         self.validate_sequence = validate_sequence
 27 | 
 28 |     def on_batch_end(self, batch, logs=None):
 29 | 
 30 |         if batch % self.args.debug_step != 0: return
 31 | 
 32 |         logger.debug("Try to dump the debug images to tboard")
 33 | 
 34 |         # self.validation_data is framework pre-defined variable
 35 |         np.random.shuffle(self.validate_sequence.data_list)
 36 |         data = self.validate_sequence.data_list[:9]  # hard code 9 images
 37 |         # images, labels: [batch_cs,batch_om,batch_lm)]
 38 |         images, labels = self.validate_sequence.load_image_label(data)
 39 | 
 40 |         writer = tf.summary.create_file_writer(self.tboard_dir)
 41 | 
 42 |         # import pdb
 43 |         # pdb.set_trace()
 44 |         pred = self.model(images)  # return [character_segment, order_map, localization_map]
 45 |         logger.debug("Model call,input images:\t%r", images.shape)
 46 |         logger.debug("Model call,return character_segment:\t%r", pred[0].shape)
 47 |         logger.debug("Model call,return order_map:\t%r", pred[1].shape)
 48 |         logger.debug("Model call,return localization_map:\t%r", pred[2].shape)
 49 | 
 50 |         label_character_segments = labels[0]
 51 |         label_localization_maps = labels[2]
 52 |         label_order_maps = labels[1]
 53 | 
 54 |         for i in range(len(images)):
 55 |             image = images[i]
 56 | 
 57 |             label_character_segment = label_character_segments[i]
 58 |             label_localization_map = label_localization_maps[i]
 59 |             label_order_map = label_order_maps[i]
 60 | 
 61 |             pred_character_segment = pred[0][i]
 62 |             pred_localization_map = pred[2][i]
 63 |             pred_order_map = pred[1][i]
 64 |             # pred_order_segment = pred[3]
 65 | 
 66 |             logger.debug("label_character_segment:%r", label_character_segment.shape)
 67 |             logger.debug("label_localization_map:%r", label_localization_map.shape)
 68 |             logger.debug("label_order_map:%r", label_order_map.shape)
 69 |             logger.debug("pred_character_segment:%r", pred_character_segment.shape)
 70 |             logger.debug("pred_localization_map:%r", pred_localization_map.shape)
 71 |             logger.debug("pred_order_map:%r", pred_order_map.shape)
 72 | 
 73 |             label_character_segment = np.argmax(label_character_segment, axis=-1)
 74 |             pred_character_segment = np.argmax(pred_character_segment, axis=-1)
 75 |             label_order_map = np.argmax(label_order_map, axis=-1)
 76 |             pred_order_map = np.argmax(pred_order_map, axis=-1)
 77 | 
 78 |             self.draw_image(writer, f"label_character_segment_{i}", image, label_character_segment, highlight=True)
 79 |             self.draw_image(writer, f"pred_character_segment_{i}", image, pred_character_segment, highlight=True)
 80 |             self.draw_image(writer, f"label_localization_map_{i}", image, label_localization_map)
 81 |             self.draw_image(writer, f"pred_localization_map_{i}", image, pred_localization_map)
 82 |             self.draw_image(writer, f"label_order_maps_{i}", image, label_order_map)
 83 |             self.draw_image(writer, f"pred_order_maps_{i}", image, pred_order_map)
 84 | 
 85 |             # self.draw(writer, "pred_order_segment", image, pred_order_segment)
 86 | 
 87 |         writer.close()
 88 | 
 89 |         return
 90 | 
 91 |     def draw_image(self, writer, name, image, gt_pred, text=None, highlight=False):
 92 | 
 93 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 94 | 
 95 |         if type(gt_pred) == EagerTensor:
 96 |             gt_pred = gt_pred.numpy()
 97 | 
 98 |         if highlight:  # the color is too shallow, enhance it
 99 |             gt_pred_mask = gt_pred.copy()
100 |             gt_pred_mask[gt_pred_mask > 0] = 1
101 |             gt_pred = 155 + 100 * gt_pred / (gt_pred.max() + 0.001)
102 |             gt_pred = gt_pred * gt_pred_mask
103 |         else:
104 |             gt_pred = 255 * gt_pred / (gt_pred.max() + 0.001)
105 | 
106 |         # we use pyplot, because it can generator colorful image, not gray
107 |         gt_pred = np.squeeze(gt_pred)
108 |         image = np.ubyte(0.5 * gt_pred + 0.5 * image)  # merge the bbox mask and original image
109 |         plt.clf()  # we use plt, which can help convert GRAY image to colorful
110 |         buffer = io.BytesIO()
111 |         plt.imsave(buffer, image, format='jpg')  # dump the image to buffer
112 |         image = Image.open(buffer).convert('RGB')
113 |         buffer.close()
114 |         image = np.array(image)  # convert from PIL image to ndarray
115 |         image = np.array([image])  # [W,H] => [W,H,1]
116 |         with writer.as_default():
117 |             tf.summary.image(name, image, step=0)
118 | 


--------------------------------------------------------------------------------