├── .gitignore ├── README.md ├── bin ├── docker.build ├── docker.train ├── pred.sh ├── tboard.sh └── train.sh ├── conf.py ├── config ├── Dockerfile ├── charset.4100.txt ├── charset.txt ├── memory_usage.xls ├── pip.conf └── sources.list ├── main ├── __init__.py ├── pred.py └── train.py ├── network ├── __init__.py ├── layers │ ├── __init__.py │ ├── class_branch_layer.py │ ├── fcn_layer.py │ ├── geometry_branch_layer.py │ └── word_formation_layer.py └── model.py ├── requirements.txt ├── test ├── __init__.py ├── gaussian_filter.py ├── make_decouple_map.py ├── test_accuracy.py ├── test_call.py ├── test_customized_layer.py ├── test_draw_charactor_segment.py ├── test_file_process.py ├── test_heirachy.py ├── test_image_process.py ├── test_krnn.py ├── test_label_maker.py ├── test_summary_image.py └── test_tensor_process.py └── utils ├── __init__.py ├── image_utils.py ├── label ├── __init__.py ├── label.py ├── label_maker.py └── label_utils.py ├── logger.py ├── sequence.py ├── util.py ├── val_sequence.py └── visualise_callback.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | 3 | data/ 4 | data/* 5 | data 6 | 7 | .idea 8 | *.pyc 9 | */*.pyc 10 | __pycache__ 11 | model 12 | 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the [Text Scanner paper](https://arxiv.org/abs/1912.12422) implementation code. 2 | 3 | About the paper unstanding, please read about it at my blog : [TextScanner的一些研究](http://www.piginzoo.com/machine-learning/2020/04/14/ocr-fa-textscanner) 4 | 5 | # implementation list(ongoing...) 6 | - [X] implement the network 7 | - [ ] implement the mutual-supervision mechanism 8 | - [X] implement loss function 9 | - [X] create the character annotation GT, and prepare none character level GT 10 | - [X] implement train code 11 | - [X] implement evaluation code 12 | - [X] train the model 13 | 14 | # developing logs 15 | - 2020.4.24 create the project and implement the skeleton of the project 16 | - 2020.4.30 implement the network code, and finish the GT generator and loss function 17 | - 2020.5.12 the network works now after hundreds of trouble-shootings,TF2.0/tk.keras is full of pit 18 | - 2020.6.03 make a [new branch](https://github.com/piginzoo/textscanner/tree/b_troubleshooting_OOM) to solave the OOM issue 19 | 20 | # Branches 21 | - *[b_troubleshooting_OOM](https://github.com/piginzoo/textscanner/tree/b_troubleshooting_OOM): This try to fix the GPU OOM issue, 22 | Currently I work on this branch mainly.<------* 23 | - [b_wordform_in_model](https://github.com/piginzoo/textscanner/tree/b_wordform_in_model): Implement word formulation as internal layer of model, 24 | this branch did not consider the OOM issue(reduce the charsets size), focus on the elegance of code implementation. 25 | - [b_multiple_gpus_train](https://github.com/piginzoo/textscanner/tree/b_multiple_gpus_train): implements the multiple gpus training 26 | 27 | # implement details 28 | Developing detail can be tracked by my [textscanner implementation issues](https://www.notion.so/piginzoospace/Textscanner-254a700668714f0d811afe2ab8124046). -------------------------------------------------------------------------------- /bin/docker.build: -------------------------------------------------------------------------------- 1 | if [ "$1" == "proxy" ]; then 2 | echo "Run training with proxy" 3 | docker build \ 4 | --no-cache \ 5 | --network host \ 6 | --build-arg http_proxy="http://172.17.0.1:8123" \ 7 | --build-arg https_proxy="http://172.17.0.1:8123" \ 8 | --build-arg HTTP_PROXY="http://172.17.0.1:8123" \ 9 | --build-arg HTTPS_PROXY="http://172.17.0.1:8123" \ 10 | -f config/Dockerfile \ 11 | -t textscanner.img . 12 | exit 13 | fi 14 | 15 | docker build -f config/Dockerfile -t textscanner.img . 16 | -------------------------------------------------------------------------------- /bin/docker.train: -------------------------------------------------------------------------------- 1 | if [ "$1" == "" ]; then 2 | echo "Usage: bin/docker.train #GPU " 3 | exit 4 | fi 5 | 6 | PWD=`pwd` 7 | echo $PWD 8 | docker run --rm -it \ 9 | -e NVIDIA_VISIBLE_DEVICES=$1 \ 10 | -t --rm \ 11 | --runtime=nvidia \ 12 | --mount type=bind,source=$PWD,target=/root/textscanner \ 13 | --mount type=bind,source=/root/.keras,target=/root/.keras \ 14 | --mount type=bind,source=/app/data/textscanner/20200602_syntext/,target=/root/textscanner/data \ 15 | --name textscanner \ 16 | --workdir /root/textscanner \ 17 | textscanner.img \ 18 | bin/train.sh $2 19 | -------------------------------------------------------------------------------- /bin/pred.sh: -------------------------------------------------------------------------------- 1 | if [ "$1" == "" ] || [ "$1" == "help" ]; then 2 | echo "命令格式:" 3 | echo "\tpred.sh " 4 | exit 5 | fi 6 | 7 | echo "开始预测" 8 | 9 | python -m main.pred $1 $2 -------------------------------------------------------------------------------- /bin/tboard.sh: -------------------------------------------------------------------------------- 1 | if [ "$1" == "" ] then; 2 | tboard.sh 3 | exit 4 | fi 5 | 6 | nohup /root/py3/bin/tensorboard --port=$1 --logdir=./logs/tboard >/dev/null 2>&1 & 7 | -------------------------------------------------------------------------------- /bin/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 参数说明: 3 | # python -m main.train \ 4 | # --name=attention_ocr \ 5 | # --epochs=200 \ # 200个epochs,但是不一定能跑完,因为由ealy stop 6 | # --steps_per_epoch=1000 \ # 每个epoch对应的批次数,其实应该是总样本数/批次数,但是我们的样本上百万,太慢,所以,我们只去1000个批次 7 | # # 作为一个epoch,为何要这样呢?因为只有每个epoch结束,keras才回调,包括validate、ealystop等 8 | # --batch=64 \ 9 | # --learning_rate=0.001 \ 10 | # --validation_batch=64 \ 11 | # --retrain=True \ # 从新训练,还是从checkpoint中继续训练 12 | # --validation_steps=10 \ # 这个是说你测试几个批次,steps这个词不好听,应该是batchs,实际上可以算出来,共测试64x10=640个样本 13 | # --workers=10 \ 14 | # --preprocess_num=100 \ 15 | # --early_stop=10 \ # 如果10个epochs都没提高,就停了吧,大概是1万个batch 16 | 17 | echo "Begin to train ..." 18 | 19 | Date=$(date +%Y%m%d%H%M) 20 | export CUDA_VISIBLE_DEVICES=0 21 | 22 | 23 | if [ "$1" == "console" ] || [ "$1" == "debug" ]; then 24 | 25 | if [ "$1" == "debug" ]; then 26 | echo "_/_/_/_/_/_/ Start PDB Debugging... _/_/_/_/_/_/" 27 | sed -i '1i\import pdb; pdb.set_trace()\n' main/train.py 28 | fi 29 | 30 | echo "In DEBUG mode ..." 31 | # --validation_steps=1 \ 32 | # 测试: 33 | # 训练:10张训练,但是steps_per_epoch=2,batch=3,预想6张后,就会重新shuffle 34 | # 验证:使用sequence是不需要要validation_steps参数的,他会自己算,len(data)/batch 35 | # 如果你规定,那就得比它小才可以,另外还要验证,是不是把每个批次的结果做平均,还是算整体的 36 | export CUDA_VISIBLE_DEVICES=0 # 调试不用GPU 37 | python -m main.train \ 38 | --name=textscanner \ 39 | --epochs=1 \ 40 | --debug_mode \ 41 | --debug_step=1 \ 42 | --steps_per_epoch=1 \ 43 | --batch=3 \ 44 | --retrain=True \ 45 | --learning_rate=0.001 \ 46 | --train_label_dir=data/test \ 47 | --validate_label_dir=data/test \ 48 | --validation_batch=1 \ 49 | --validation_steps=1 \ 50 | --preprocess_num=1 \ 51 | --workers=3 \ 52 | --early_stop=1 53 | 54 | if [ "$1" == "debug" ]; then 55 | # 恢复源文件,防止git提交 56 | sed -i '1d' main/train.py 57 | fi 58 | 59 | exit 60 | fi 61 | 62 | if [ "$1" = "stop" ]; then 63 | echo "Stop Training!" 64 | ps aux|grep python|grep name=textscanner|awk '{print $2}'|xargs kill -9 65 | exit 66 | fi 67 | 68 | 69 | echo "Production Mode ..." 70 | echo "Using #$CUDA_VISIBLE_DEVICES GPU" 71 | 72 | nohup python -m main.train \ 73 | --name=textscanner \ 74 | --steps_per_epoch=1000 \ 75 | --epochs=5000000 \ 76 | --debug_step=1000 \ 77 | --batch=32 \ 78 | --retrain=True \ 79 | --learning_rate=0.001 \ 80 | --validation_batch=64 \ 81 | --validation_steps=10 \ 82 | --workers=10 \ 83 | --early_stop=100 \ 84 | >> ./logs/Attention_GPU$CUDA_VISIBLE_DEVICES_$Date.log 2>&1 & 85 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | import argparse,sys 2 | 3 | ''' 4 | define the basic configuration parameters, 5 | also define one command-lines argument parsing method: init_args 6 | ''' 7 | MAX_SEQUENCE = 30 # 最大的识别汉字的长度 8 | MASK_VALUE = 0 9 | CHARSET = "config/charset.4100.txt" # 一级字库+标点符号+数字+二级字库中的地名/人名常用字(TianT.制作的) 10 | INPUT_IMAGE_HEIGHT = 64 # 图像归一化的高度 11 | INPUT_IMAGE_WIDTH = 256 # 最大的图像宽度 12 | GRU_HIDDEN_SIZE = 64 # GRU隐含层神经元数量 13 | FEATURE_MAP_REDUCE = 8 # 相比原始图片,feature map缩小几倍(送入bi-gru的解码器之前的feature map),目前是8,因为用的resnet50,缩小8倍 14 | FILTER_NUM = 64 # 自定义层中的默认隐含神经元的个数 15 | 16 | DEBUG = True 17 | 18 | DIR_LOGS="logs" 19 | DIR_TBOARD="logs/tboard" 20 | DIR_MODEL="model" 21 | DIR_CHECKPOINT="model/checkpoint" 22 | LABLE_FORMAT="plaintext" # 标签格式:labelme,json格式的;plaintext,纯文本的 23 | 24 | # 伐喜欢tensorflow的flags方式,使用朴素的argparse 25 | # dislike the flags style of tensorflow, instead using argparse 26 | def init_args(): 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("--name" ,default="attention_ocr",type=str,help="") 29 | parser.add_argument("--train_label_dir", default="data/train", type=str, help="") 30 | parser.add_argument("--validate_label_dir", default="data/train", type=str, help="") 31 | parser.add_argument("--train_label_file", default="data/train/train.txt", type=str, help="") 32 | parser.add_argument("--validate_label_file", default="data/train/train.txt", type=str, help="") 33 | parser.add_argument("--epochs" ,default=1,type=int,help="") 34 | parser.add_argument("--debug_mode", default=False, action='store_true', help="") 35 | parser.add_argument("--debug_step", default=1,type=int,help="") # 多少步骤打印注意力 36 | parser.add_argument("--steps_per_epoch", default=None,type=int,help="") 37 | parser.add_argument("--batch" , default=1,type=int,help="") 38 | parser.add_argument("--learning_rate", default=0.001, type=float, help="") 39 | parser.add_argument("--workers",default=1,type=int,help="") 40 | parser.add_argument("--retrain", default=False, type=bool, help="") 41 | parser.add_argument("--preprocess_num",default=None,type=int,help="") # 整个数据的个数,用于调试,None就是所有样本 42 | parser.add_argument("--validation_steps",default=1,type=int, help="") 43 | parser.add_argument("--validation_batch",default=1,type=int, help="") 44 | parser.add_argument("--early_stop", default=1, type=int, help="") 45 | args = parser.parse_args() 46 | print("==============================") 47 | print(" Configurations : ") 48 | print("==============================") 49 | print(args) 50 | 51 | sys.modules[__name__].DEBUG = args.debug_mode 52 | 53 | # if args.debug_mode: 54 | # print("Running in DEBUG mode!") 55 | # sys.modules[__name__].FILTER_NUM = 1 56 | 57 | return args 58 | 59 | 60 | def init_pred_args(): 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument("--image" ,default=1,type=str,help="") 63 | parser.add_argument("--model" ,default=1,type=str,help="") 64 | args = parser.parse_args() 65 | return args 66 | -------------------------------------------------------------------------------- /config/Dockerfile: -------------------------------------------------------------------------------- 1 | From tensorflow/tensorflow:2.1.0-gpu-py3 2 | 3 | MAINTAINER piginzoo 4 | 5 | RUN cp /etc/apt/sources.list /etc/apt/sources.list.backup 6 | ADD config/sources.list /etc/apt/sources.list 7 | RUN apt-get update 8 | RUN apt-get install -y vim build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev 9 | 10 | RUN mkdir /root/.pip 11 | ADD config/pip.conf /root/.pip 12 | ADD requirements.txt /root/requirements.txt 13 | RUN pip install -r /root/requirements.txt 14 | -------------------------------------------------------------------------------- /config/charset.4100.txt: -------------------------------------------------------------------------------- 1 | 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz 2 | !@#$%^&*()-_+=×{}[]|\<>,.;:?/"'~ 3 | 《》①②③④⑤⑥⑦⑧⑨⑩【】。、“”‘’°¥○●□■ 4 | 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋靶把耙坝霸罢爸白柏百摆佰败拜稗斑班搬扳般颁板版扮拌伴瓣半办绊邦帮梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥薄雹保堡饱宝抱报暴豹鲍爆杯碑悲卑北辈背贝 钡倍狈备惫焙被奔苯本笨崩绷甭泵蹦迸逼鼻比鄙笔彼碧蓖蔽毕毙毖币庇痹闭敝弊必辟壁臂避陛鞭边编贬扁便变卞辨辩辫遍标彪膘表鳖憋别瘪彬斌濒滨宾摈兵冰柄丙秉饼炳病并玻菠播拨钵波博勃搏铂箔伯帛舶脖膊渤泊驳捕卜哺补埠不布步簿部怖擦猜裁材才财睬踩采彩菜蔡餐参蚕残惭惨灿苍舱仓沧藏操糙槽曹草厕策侧册测层蹭插叉茬茶查碴搽察岔差诧拆柴豺搀掺蝉馋谗缠铲产阐颤昌猖场尝常长偿肠厂敞畅唱倡超抄钞朝嘲潮巢吵炒车扯撤掣彻澈郴臣辰尘晨忱沉陈趁衬撑称城橙成呈乘程惩澄诚承逞骋秤吃痴持匙池迟弛驰耻齿侈尺赤翅斥炽充冲虫崇宠抽酬畴踌稠愁筹仇绸瞅丑臭初出橱厨躇锄雏滁除楚础储矗搐触处揣川穿椽传船喘串疮窗幢床闯创吹炊捶锤垂春椿醇唇淳纯蠢戳绰疵茨磁雌辞慈瓷词此刺赐次聪葱囱匆从丛凑粗醋簇促蹿篡窜摧崔催脆瘁粹淬翠村存寸磋撮搓措挫错搭达答瘩打大呆歹傣戴带殆代贷袋待逮怠耽担丹单郸掸胆旦氮但惮淡诞弹蛋当挡党荡档刀捣蹈倒岛祷导到稻悼道盗德得的蹬灯登等瞪凳邓堤低滴迪敌笛狄涤翟嫡抵底地蒂第帝弟递缔颠掂滇碘点典靛垫电佃甸店惦奠淀殿碉叼雕凋刁掉吊钓调跌爹碟蝶迭谍叠丁盯叮钉顶鼎锭定订丢东冬董懂动栋侗恫冻洞兜抖斗陡豆逗痘都督毒犊独读堵睹赌杜镀肚度渡妒端短锻段断缎堆兑队对墩吨蹲敦顿囤钝盾遁掇哆多夺垛躲朵跺舵剁惰堕蛾峨鹅俄额讹娥恶厄扼遏鄂饿恩而儿耳尔饵洱二贰发罚筏伐乏阀法珐藩帆番翻樊矾钒繁凡烦反返范贩犯饭泛坊芳方肪房防妨仿访纺放菲非啡飞肥匪诽吠肺废沸费芬酚吩氛分纷坟焚汾粉奋份忿愤粪丰封枫蜂峰锋风疯烽逢冯缝讽奉凤佛否夫敷肤孵扶拂辐幅氟符伏俘服浮涪福袱弗甫抚辅俯釜斧脯腑府腐赴副覆赋复傅付阜父腹负富讣附妇缚咐噶嘎该改概钙盖溉干甘杆柑竿肝赶感秆敢赣冈刚钢缸肛纲岗港杠篙皋高膏羔糕搞镐稿告哥歌搁戈鸽胳疙割革葛格蛤阁隔铬个各给根跟耕更庚羹埂耿梗工攻功恭龚供躬公宫弓巩汞拱贡共钩勾沟苟狗垢构购够辜菇咕箍估沽孤姑鼓古蛊骨谷股故顾固雇刮瓜剐寡挂褂乖拐怪棺关官冠观管馆罐惯灌贯光广逛瑰规圭硅归龟闺轨鬼诡癸桂柜跪贵刽辊滚棍锅郭国果裹过哈骸孩海氦亥害骇酣憨邯韩含涵寒函喊罕翰撼捍旱憾悍焊汗汉夯杭航壕嚎豪毫郝好耗号浩呵喝荷菏核禾和何合盒貉阂河涸赫褐鹤贺嘿黑痕很狠恨哼亨横衡恒轰哄烘虹鸿洪宏弘红喉侯猴吼厚候后呼乎忽瑚壶葫胡蝴狐糊湖弧虎唬护互沪户花哗华猾滑画划化话槐徊怀淮坏欢环桓还缓换患唤痪豢焕涣宦幻荒慌黄磺蝗簧皇凰惶煌晃幌恍谎灰挥辉徽恢蛔回毁悔慧卉惠晦贿秽会烩汇讳诲绘荤昏婚魂浑混豁活伙火获或惑霍货祸击圾基机畸稽积箕肌饥迹激讥鸡姬绩缉吉极棘辑籍集及急疾汲即嫉级挤几脊己蓟技冀季伎祭剂悸济寄寂计记既忌际妓继纪嘉枷夹佳家加荚颊贾甲钾假稼价架驾嫁歼监坚尖笺间煎兼肩艰奸缄茧检柬碱硷拣捡简俭剪减荐槛鉴践贱见键箭件健舰剑饯渐溅涧建僵姜将浆江疆蒋桨奖讲匠酱降蕉椒礁焦胶交郊浇骄娇嚼搅铰矫侥脚狡角饺缴绞剿教酵轿较叫窖揭接皆秸街阶截劫节桔杰捷睫竭洁结解姐戒藉芥界借介疥诫届巾筋斤金今津襟紧锦仅谨进靳晋禁近烬浸尽劲荆兢茎睛晶鲸京惊精粳经井警景颈静境敬镜径痉靖竟竞净炯窘揪究纠玖韭久灸九酒厩救旧臼舅咎就疚鞠拘狙疽居驹菊局咀矩举沮聚拒据巨具距踞锯俱句惧炬剧捐鹃娟倦眷卷绢撅攫抉掘倔爵觉决诀绝均菌钧军君峻俊竣浚郡骏喀咖卡咯开揩楷凯慨刊堪勘坎砍看康慷糠扛抗亢炕考拷烤靠坷苛柯棵磕颗科壳咳可渴克刻客课肯啃垦恳坑吭空恐孔控抠口扣寇枯哭窟苦酷库裤夸垮挎跨胯块筷侩快宽款匡筐狂框矿眶旷况亏盔岿窥葵奎魁傀馈愧溃坤昆捆困括扩廓阔垃拉喇蜡腊辣啦莱来赖蓝婪栏拦篮阑兰澜谰揽览懒缆烂滥琅榔狼廊郎朗浪捞劳牢老佬姥酪烙涝勒乐雷镭蕾磊累儡垒擂肋类泪棱楞冷厘梨犁黎篱狸离漓理李里鲤礼莉荔吏栗丽厉励砾历利傈例俐痢立粒沥隶力璃哩俩联莲连镰廉怜涟帘敛脸链恋炼练粮凉梁粱良两辆量晾亮谅撩聊僚疗燎寥辽潦了撂镣廖料列裂烈劣猎琳林磷霖临邻鳞淋凛赁吝拎玲菱零龄铃伶羚凌灵陵岭领另令溜琉榴硫馏留刘瘤流柳六龙聋咙笼窿隆垄拢陇楼娄搂篓漏陋芦卢颅庐炉掳卤虏鲁麓碌露路赂鹿潞禄录陆戮驴吕铝侣旅履屡缕虑氯律率滤绿峦挛孪滦卵乱掠略抡轮伦仑沦纶论萝螺罗逻锣箩骡裸落洛骆络妈麻玛码蚂马骂嘛吗埋买麦卖迈脉瞒馒蛮满蔓曼慢漫谩芒茫盲氓忙莽猫茅锚毛矛铆卯茂冒帽貌贸么玫枚梅酶霉煤没眉媒镁每美昧寐妹媚门闷们萌蒙檬盟锰猛梦孟眯醚靡糜迷谜弥米秘觅泌蜜密幂棉眠绵冕免勉娩缅面苗描瞄藐秒渺庙妙蔑灭民抿皿敏悯闽明螟鸣铭名命谬摸摹蘑模膜磨摩魔抹末莫墨默沫漠寞陌谋牟某拇牡亩姆母墓暮幕募慕木目睦牧穆拿哪呐钠那娜纳氖乃奶耐奈南男难囊挠脑恼闹淖呢馁内嫩能妮霓倪泥尼拟你匿腻逆溺蔫拈年碾撵捻念娘酿鸟尿捏聂孽啮镊镍涅您柠狞凝宁拧泞牛扭钮纽脓浓农弄奴努怒女暖虐疟挪懦糯诺哦欧鸥殴藕呕偶沤啪趴爬帕怕琶拍排牌徘湃派攀潘盘磐盼畔判叛乓庞旁耪胖抛咆刨炮袍跑泡呸胚培裴赔陪配佩沛喷盆砰抨烹澎彭蓬棚硼篷膨朋鹏捧碰坯砒霹批披劈琵毗啤脾疲皮匹痞僻屁譬篇偏片骗飘漂瓢票撇瞥拼频贫品聘乒坪苹萍平凭瓶评屏坡泼颇婆破魄迫粕剖扑铺仆莆葡菩蒲埔朴圃普浦谱曝瀑期欺栖戚妻七凄漆柒沏其棋奇歧畦崎脐齐旗祈祁骑起岂乞企启契砌器气迄弃汽泣讫掐恰洽牵扦钎铅千迁签仟谦乾黔钱钳前潜遣浅谴堑嵌欠歉枪呛腔羌墙蔷强抢橇锹敲悄桥瞧乔侨巧鞘撬翘峭俏窍切茄且怯窃钦侵亲秦琴勤芹擒禽寝沁青轻氢倾卿清擎晴氰情顷请庆琼穷秋丘邱球求囚酋泅趋区蛆曲躯屈驱渠取娶龋趣去圈颧权醛泉全痊拳犬券劝缺炔瘸却鹊榷确雀裙群然燃冉染瓤壤攘嚷让饶扰绕惹热壬仁人忍韧任认刃妊纫扔仍日戎茸蓉荣融熔溶容绒冗揉柔肉茹蠕儒孺如辱乳汝入褥软阮蕊瑞锐闰润若弱撒洒萨腮鳃塞赛三叁伞散桑嗓丧搔骚扫嫂瑟色涩森僧莎砂杀刹沙纱傻啥煞筛晒珊苫杉山删煽衫闪陕擅赡膳善汕扇缮墒伤商赏晌上尚裳梢捎稍烧芍勺韶少哨邵绍奢赊蛇舌舍赦摄射慑涉社设砷申呻伸身深娠绅神沈审婶甚肾慎渗声生甥牲升绳省盛剩胜圣师失狮施湿诗尸虱十石拾时什食蚀实识史矢使屎驶始式示士世柿事拭誓逝势是嗜噬适仕侍释饰氏市恃室视试收手首守寿授售受瘦兽蔬枢梳殊抒输叔舒淑疏书赎孰熟薯暑曙署蜀黍鼠属术述树束戍竖墅庶数漱恕刷耍摔衰甩帅栓拴霜双爽谁水睡税吮瞬顺舜说硕朔烁斯撕嘶思私司丝死肆寺嗣四伺似饲巳松耸怂颂送宋讼诵搜艘擞嗽苏酥俗素速粟僳塑溯宿诉肃酸蒜算虽隋随绥髓碎岁穗遂隧祟孙损笋蓑梭唆缩琐索锁所塌他它她塔獭挞蹋踏胎苔抬台泰酞太态汰坍摊贪瘫滩坛檀痰潭谭谈坦毯袒碳探叹炭汤塘搪堂棠膛唐糖倘躺淌趟烫掏涛滔绦萄桃逃淘陶讨套特藤腾疼誊梯剔踢锑提题蹄啼体替嚏惕涕剃屉天添填田甜恬舔腆挑条迢眺跳贴铁帖厅听烃汀廷停亭庭挺艇通桐酮瞳同铜彤童桶捅筒统痛偷投头透凸秃突图徒途涂屠土吐兔湍团推颓腿蜕褪退吞屯臀拖托脱鸵陀驮驼椭妥拓唾挖哇蛙洼娃瓦袜歪外豌弯湾玩顽丸烷完碗挽晚皖惋宛婉万腕汪王亡枉网往旺望忘妄威巍微危韦违桅围唯惟为潍维苇萎委伟伪尾纬未蔚味畏胃喂魏位渭谓尉慰卫瘟温蚊文闻纹吻稳紊问嗡翁瓮挝蜗涡窝我斡卧握沃巫呜钨乌污诬屋无芜梧吾吴毋武五捂午舞伍侮坞戊雾晤物勿务悟误昔熙析西硒矽晰嘻吸锡牺稀息希悉膝夕惜熄烯溪汐犀檄袭席习媳喜铣洗系隙戏细瞎虾匣霞辖暇峡侠狭下厦夏吓掀锨先仙鲜纤咸贤衔舷闲涎弦嫌显险现献县腺馅羡宪陷限线相厢镶香箱襄湘乡翔祥详想响享项巷橡像向象萧硝霄削哮嚣销消宵淆晓小孝校肖啸笑效楔些歇蝎鞋协挟携邪斜胁谐写械卸蟹懈泄泻谢屑薪芯锌欣辛新忻心信衅星腥猩惺兴刑型形邢行醒幸杏性姓兄凶胸匈汹雄熊休修羞朽嗅锈秀袖绣墟戌需虚嘘须徐许蓄酗叙旭序畜恤絮婿绪续轩喧宣悬旋玄选癣眩绚靴薛学穴雪血勋熏循旬询寻驯巡殉汛训讯逊迅压押鸦鸭呀丫芽牙蚜崖衙涯雅哑亚讶焉咽阉烟淹盐严研蜒岩延言颜阎炎沿奄掩眼衍演艳堰燕厌砚雁唁彦焰宴谚验殃央鸯秧杨扬佯疡羊洋阳氧仰痒养样漾邀腰妖瑶摇尧遥窑谣姚咬舀药要耀椰噎耶爷野冶也页掖业叶曳腋夜液一壹医揖铱依伊衣颐夷遗移仪胰疑沂宜姨彝椅蚁倚已乙矣以艺抑易邑屹亿役臆逸肄疫亦裔意毅忆义益溢诣议谊译异翼翌绎茵荫因殷音阴姻吟银淫寅饮尹引隐印英樱婴鹰应缨莹萤营荧蝇迎赢盈影颖硬映哟拥佣臃痈庸雍踊蛹咏泳涌永恿勇用幽优悠忧尤由邮铀犹油游酉有友右佑釉诱又幼迂淤于盂榆虞愚舆余俞逾鱼愉渝渔隅予娱雨与屿禹宇语羽玉域芋郁吁遇喻峪御愈欲狱育誉浴寓裕预豫驭鸳渊冤元垣袁原援辕园员圆猿源缘远苑愿怨院曰约越跃钥岳粤月悦阅耘云郧匀陨允运蕴酝晕韵孕匝砸杂栽哉灾宰载再在咱攒暂赞赃脏葬遭糟凿藻枣早澡蚤躁噪造皂灶燥责择则泽贼怎增憎曾赠扎喳渣札轧铡闸眨栅榨咋乍炸诈摘斋宅窄债寨瞻毡詹粘沾盏斩辗崭展蘸栈占战站湛绽樟章彰漳张掌涨杖丈帐账仗胀瘴障招昭找沼赵照罩兆肇召遮折哲蛰辙者锗蔗这浙珍斟真甄砧臻贞针侦枕疹诊震振镇阵蒸挣睁征狰争怔整拯正政帧症郑证芝枝支吱蜘知肢脂汁之织职直植殖执值侄址指止趾只旨纸志挚掷至致置帜峙制智秩稚质炙痔滞治窒中盅忠钟衷终种肿重仲众舟周州洲诌粥轴肘帚咒皱宙昼骤珠株蛛朱猪诸诛逐竹烛煮拄瞩嘱主著柱助蛀贮铸筑住注祝驻抓爪拽专砖转撰赚篆桩庄装妆撞壮状椎锥追赘坠缀谆准捉拙卓桌琢茁酌啄着灼浊兹咨资姿滋淄孜紫仔籽滓子自渍字鬃棕踪宗综总纵邹走奏揍租足卒族祖诅阻组钻纂嘴醉最罪尊遵昨左佐柞做作坐座 5 | 衢亳濮漯圳莞儋泸泗颍佤岚泾潼祜赉桦洮睢沅陉栾涞涿绛溧沭瓯浔嵊婺岱弋谯璧旌柘汶莒荥嵩淇驿澧圩榕岑梓仡麟勐湟坻藁妃蠡骅猗稷芮岢隰磴岫鲅蛟珲讷箐闵邺邳盱眙邗鄞暨缙畲鸠庵濉枞歙黟琊埇砀芗诏濂鄱崂峄滕罘朐兖郯茌莘棣郓鄄杞瀍偃郏陟鄢郾渑淅浉潢硚陂猇秭浠蕲芙浏淞渌攸醴晖耒汨溆芷禺浈濠禅邕覃仫碚綦郫邛崃蔺邡犍沐阆珙筠蓥孚湄阡谟麒蒗濞迦灞鄠岐崆峒岷宕晏坂鄯耆伽 6 | 婷晗鑫祺瑾琪倩媛楠馨缤罡闫昊珂睿瑛裱炜怡妍芸宸缪苡烨畈嘟炫鞫邸摽窦雯薇玮钊淼琦珞佥曦钰煜渎璐姣娅晟恪 -------------------------------------------------------------------------------- /config/charset.txt: -------------------------------------------------------------------------------- 1 | 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()-_+={}[]|\<>,.。;:、?/'" 2 | 《》①②③④⑤⑥⑦⑧⑨⑩【】¥ 3 | 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋靶把耙坝霸罢爸白柏百摆佰败拜稗斑班搬扳般颁板版扮拌伴瓣半办绊邦帮梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥薄雹保堡饱宝抱报暴豹鲍爆杯碑悲卑北辈背贝钡倍狈备惫焙被奔苯本笨崩绷甭泵蹦迸逼鼻比鄙笔彼碧蓖蔽毕毙毖币庇痹闭敝弊必辟壁臂避陛鞭边编贬扁便变卞辨辩辫遍标彪膘表鳖憋别瘪彬斌濒滨宾摈兵冰柄丙秉饼炳病并玻菠播拨钵波博勃搏铂箔伯帛舶脖膊渤泊驳捕卜哺补埠不布步簿部怖擦猜裁材才财睬踩采彩菜蔡餐参蚕残惭惨灿苍舱仓沧藏操糙槽曹草厕策侧册测层蹭插叉茬茶查碴搽察岔差诧拆柴豺搀掺蝉馋谗缠铲产阐颤昌猖场尝常长偿肠厂敞畅唱倡超抄钞朝嘲潮巢吵炒车扯撤掣彻澈郴臣辰尘晨忱沉陈趁衬撑称城橙成呈乘程惩澄诚承逞骋秤吃痴持匙池迟弛驰耻齿侈尺赤翅斥炽充冲虫崇宠抽酬畴踌稠愁筹仇绸瞅丑臭初出橱厨躇锄雏滁除楚础储矗搐触处揣川穿椽传船喘串疮窗幢床闯创吹炊捶锤垂春椿醇唇淳纯蠢戳绰疵茨磁雌辞慈瓷词此刺赐次聪葱囱匆从丛凑粗醋簇促蹿篡窜摧崔催脆瘁粹淬翠村存寸磋撮搓措挫错搭达答瘩打大呆歹傣戴带殆代贷袋待逮怠耽担丹单郸掸胆旦氮但惮淡诞弹蛋当挡党荡档刀捣蹈倒岛祷导到稻悼道盗德得的蹬灯登等瞪凳邓堤低滴迪敌笛狄涤翟嫡抵底地蒂第帝弟递缔颠掂滇碘点典靛垫电佃甸店惦奠淀殿碉叼雕凋刁掉吊钓调跌爹碟蝶迭谍叠丁盯叮钉顶鼎锭定订丢东冬董懂动栋侗恫冻洞兜抖斗陡豆逗痘都督毒犊独读堵睹赌杜镀肚度渡妒端短锻段断缎堆兑队对墩吨蹲敦顿囤钝盾遁掇哆多夺垛躲朵跺舵剁惰堕蛾峨鹅俄额讹娥恶厄扼遏鄂饿恩而儿耳尔饵洱二贰发罚筏伐乏阀法珐藩帆番翻樊矾钒繁凡烦反返范贩犯饭泛坊芳方肪房防妨仿访纺放菲非啡飞肥匪诽吠肺废沸费芬酚吩氛分纷坟焚汾粉奋份忿愤粪丰封枫蜂峰锋风疯烽逢冯缝讽奉凤佛否夫敷肤孵扶拂辐幅氟符伏俘服浮涪福袱弗甫抚辅俯釜斧脯腑府腐赴副覆赋复傅付阜父腹负富讣附妇缚咐噶嘎该改概钙盖溉干甘杆柑竿肝赶感秆敢赣冈刚钢缸肛纲岗港杠篙皋高膏羔糕搞镐稿告哥歌搁戈鸽胳疙割革葛格蛤阁隔铬个各给根跟耕更庚羹埂耿梗工攻功恭龚供躬公宫弓巩汞拱贡共钩勾沟苟狗垢构购够辜菇咕箍估沽孤姑鼓古蛊骨谷股故顾固雇刮瓜剐寡挂褂乖拐怪棺关官冠观管馆罐惯灌贯光广逛瑰规圭硅归龟闺轨鬼诡癸桂柜跪贵刽辊滚棍锅郭国果裹过哈骸孩海氦亥害骇酣憨邯韩含涵寒函喊罕翰撼捍旱憾悍焊汗汉夯杭航壕嚎豪毫郝好耗号浩呵喝荷菏核禾和何合盒貉阂河涸赫褐鹤贺嘿黑痕很狠恨哼亨横衡恒轰哄烘虹鸿洪宏弘红喉侯猴吼厚候后呼乎忽瑚壶葫胡蝴狐糊湖弧虎唬护互沪户花哗华猾滑画划化话槐徊怀淮坏欢环桓还缓换患唤痪豢焕涣宦幻荒慌黄磺蝗簧皇凰惶煌晃幌恍谎灰挥辉徽恢蛔回毁悔慧卉惠晦贿秽会烩汇讳诲绘荤昏婚魂浑混豁活伙火获或惑霍货祸击圾基机畸稽积箕肌饥迹激讥鸡姬绩缉吉极棘辑籍集及急疾汲即嫉级挤几脊己蓟技冀季伎祭剂悸济寄寂计记既忌际妓继纪嘉枷夹佳家加荚颊贾甲钾假稼价架驾嫁歼监坚尖笺间煎兼肩艰奸缄茧检柬碱硷拣捡简俭剪减荐槛鉴践贱见键箭件健舰剑饯渐溅涧建僵姜将浆江疆蒋桨奖讲匠酱降蕉椒礁焦胶交郊浇骄娇嚼搅铰矫侥脚狡角饺缴绞剿教酵轿较叫窖揭接皆秸街阶截劫节桔杰捷睫竭洁结解姐戒藉芥界借介疥诫届巾筋斤金今津襟紧锦仅谨进靳晋禁近烬浸尽劲荆兢茎睛晶鲸京惊精粳经井警景颈静境敬镜径痉靖竟竞净炯窘揪究纠玖韭久灸九酒厩救旧臼舅咎就疚鞠拘狙疽居驹菊局咀矩举沮聚拒据巨具距踞锯俱句惧炬剧捐鹃娟倦眷卷绢撅攫抉掘倔爵觉决诀绝均菌钧军君峻俊竣浚郡骏喀咖卡咯开揩楷凯慨刊堪勘坎砍看康慷糠扛抗亢炕考拷烤靠坷苛柯棵磕颗科壳咳可渴克刻客课肯啃垦恳坑吭空恐孔控抠口扣寇枯哭窟苦酷库裤夸垮挎跨胯块筷侩快宽款匡筐狂框矿眶旷况亏盔岿窥葵奎魁傀馈愧溃坤昆捆困括扩廓阔垃拉喇蜡腊辣啦莱来赖蓝婪栏拦篮阑兰澜谰揽览懒缆烂滥琅榔狼廊郎朗浪捞劳牢老佬姥酪烙涝勒乐雷镭蕾磊累儡垒擂肋类泪棱楞冷厘梨犁黎篱狸离漓理李里鲤礼莉荔吏栗丽厉励砾历利傈例俐痢立粒沥隶力璃哩俩联莲连镰廉怜涟帘敛脸链恋炼练粮凉梁粱良两辆量晾亮谅撩聊僚疗燎寥辽潦了撂镣廖料列裂烈劣猎琳林磷霖临邻鳞淋凛赁吝拎玲菱零龄铃伶羚凌灵陵岭领另令溜琉榴硫馏留刘瘤流柳六龙聋咙笼窿隆垄拢陇楼娄搂篓漏陋芦卢颅庐炉掳卤虏鲁麓碌露路赂鹿潞禄录陆戮驴吕铝侣旅履屡缕虑氯律率滤绿峦挛孪滦卵乱掠略抡轮伦仑沦纶论萝螺罗逻锣箩骡裸落洛骆络妈麻玛码蚂马骂嘛吗埋买麦卖迈脉瞒馒蛮满蔓曼慢漫谩芒茫盲氓忙莽猫茅锚毛矛铆卯茂冒帽貌贸么玫枚梅酶霉煤没眉媒镁每美昧寐妹媚门闷们萌蒙檬盟锰猛梦孟眯醚靡糜迷谜弥米秘觅泌蜜密幂棉眠绵冕免勉娩缅面苗描瞄藐秒渺庙妙蔑灭民抿皿敏悯闽明螟鸣铭名命谬摸摹蘑模膜磨摩魔抹末莫墨默沫漠寞陌谋牟某拇牡亩姆母墓暮幕募慕木目睦牧穆拿哪呐钠那娜纳氖乃奶耐奈南男难囊挠脑恼闹淖呢馁内嫩能妮霓倪泥尼拟你匿腻逆溺蔫拈年碾撵捻念娘酿鸟尿捏聂孽啮镊镍涅您柠狞凝宁拧泞牛扭钮纽脓浓农弄奴努怒女暖虐疟挪懦糯诺哦欧鸥殴藕呕偶沤啪趴爬帕怕琶拍排牌徘湃派攀潘盘磐盼畔判叛乓庞旁耪胖抛咆刨炮袍跑泡呸胚培裴赔陪配佩沛喷盆砰抨烹澎彭蓬棚硼篷膨朋鹏捧碰坯砒霹批披劈琵毗啤脾疲皮匹痞僻屁譬篇偏片骗飘漂瓢票撇瞥拼频贫品聘乒坪苹萍平凭瓶评屏坡泼颇婆破魄迫粕剖扑铺仆莆葡菩蒲埔朴圃普浦谱曝瀑期欺栖戚妻七凄漆柒沏其棋奇歧畦崎脐齐旗祈祁骑起岂乞企启契砌器气迄弃汽泣讫掐恰洽牵扦钎铅千迁签仟谦乾黔钱钳前潜遣浅谴堑嵌欠歉枪呛腔羌墙蔷强抢橇锹敲悄桥瞧乔侨巧鞘撬翘峭俏窍切茄且怯窃钦侵亲秦琴勤芹擒禽寝沁青轻氢倾卿清擎晴氰情顷请庆琼穷秋丘邱球求囚酋泅趋区蛆曲躯屈驱渠取娶龋趣去圈颧权醛泉全痊拳犬券劝缺炔瘸却鹊榷确雀裙群然燃冉染瓤壤攘嚷让饶扰绕惹热壬仁人忍韧任认刃妊纫扔仍日戎茸蓉荣融熔溶容绒冗揉柔肉茹蠕儒孺如辱乳汝入褥软阮蕊瑞锐闰润若弱撒洒萨腮鳃塞赛三叁伞散桑嗓丧搔骚扫嫂瑟色涩森僧莎砂杀刹沙纱傻啥煞筛晒珊苫杉山删煽衫闪陕擅赡膳善汕扇缮墒伤商赏晌上尚裳梢捎稍烧芍勺韶少哨邵绍奢赊蛇舌舍赦摄射慑涉社设砷申呻伸身深娠绅神沈审婶甚肾慎渗声生甥牲升绳省盛剩胜圣师失狮施湿诗尸虱十石拾时什食蚀实识史矢使屎驶始式示士世柿事拭誓逝势是嗜噬适仕侍释饰氏市恃室视试收手首守寿授售受瘦兽蔬枢梳殊抒输叔舒淑疏书赎孰熟薯暑曙署蜀黍鼠属术述树束戍竖墅庶数漱恕刷耍摔衰甩帅栓拴霜双爽谁水睡税吮瞬顺舜说硕朔烁斯撕嘶思私司丝死肆寺嗣四伺似饲巳松耸怂颂送宋讼诵搜艘擞嗽苏酥俗素速粟僳塑溯宿诉肃酸蒜算虽隋随绥髓碎岁穗遂隧祟孙损笋蓑梭唆缩琐索锁所塌他它她塔獭挞蹋踏胎苔抬台泰酞太态汰坍摊贪瘫滩坛檀痰潭谭谈坦毯袒碳探叹炭汤塘搪堂棠膛唐糖倘躺淌趟烫掏涛滔绦萄桃逃淘陶讨套特藤腾疼誊梯剔踢锑提题蹄啼体替嚏惕涕剃屉天添填田甜恬舔腆挑条迢眺跳贴铁帖厅听烃汀廷停亭庭挺艇通桐酮瞳同铜彤童桶捅筒统痛偷投头透凸秃突图徒途涂屠土吐兔湍团推颓腿蜕褪退吞屯臀拖托脱鸵陀驮驼椭妥拓唾挖哇蛙洼娃瓦袜歪外豌弯湾玩顽丸烷完碗挽晚皖惋宛婉万腕汪王亡枉网往旺望忘妄威巍微危韦违桅围唯惟为潍维苇萎委伟伪尾纬未蔚味畏胃喂魏位渭谓尉慰卫瘟温蚊文闻纹吻稳紊问嗡翁瓮挝蜗涡窝我斡卧握沃巫呜钨乌污诬屋无芜梧吾吴毋武五捂午舞伍侮坞戊雾晤物勿务悟误昔熙析西硒矽晰嘻吸锡牺稀息希悉膝夕惜熄烯溪汐犀檄袭席习媳喜铣洗系隙戏细瞎虾匣霞辖暇峡侠狭下厦夏吓掀锨先仙鲜纤咸贤衔舷闲涎弦嫌显险现献县腺馅羡宪陷限线相厢镶香箱襄湘乡翔祥详想响享项巷橡像向象萧硝霄削哮嚣销消宵淆晓小孝校肖啸笑效楔些歇蝎鞋协挟携邪斜胁谐写械卸蟹懈泄泻谢屑薪芯锌欣辛新忻心信衅星腥猩惺兴刑型形邢行醒幸杏性姓兄凶胸匈汹雄熊休修羞朽嗅锈秀袖绣墟戌需虚嘘须徐许蓄酗叙旭序畜恤絮婿绪续轩喧宣悬旋玄选癣眩绚靴薛学穴雪血勋熏循旬询寻驯巡殉汛训讯逊迅压押鸦鸭呀丫芽牙蚜崖衙涯雅哑亚讶焉咽阉烟淹盐严研蜒岩延言颜阎炎沿奄掩眼衍演艳堰燕厌砚雁唁彦焰宴谚验殃央鸯秧杨扬佯疡羊洋阳氧仰痒养样漾邀腰妖瑶摇尧遥窑谣姚咬舀药要耀椰噎耶爷野冶也页掖业叶曳腋夜液一壹医揖铱依伊衣颐夷遗移仪胰疑沂宜姨彝椅蚁倚已乙矣以艺抑易邑屹亿役臆逸肄疫亦裔意毅忆义益溢诣议谊译异翼翌绎茵荫因殷音阴姻吟银淫寅饮尹引隐印英樱婴鹰应缨莹萤营荧蝇迎赢盈影颖硬映哟拥佣臃痈庸雍踊蛹咏泳涌永恿勇用幽优悠忧尤由邮铀犹油游酉有友右佑釉诱又幼迂淤于盂榆虞愚舆余俞逾鱼愉渝渔隅予娱雨与屿禹宇语羽玉域芋郁吁遇喻峪御愈欲狱育誉浴寓裕预豫驭鸳渊冤元垣袁原援辕园员圆猿源缘远苑愿怨院曰约越跃钥岳粤月悦阅耘云郧匀陨允运蕴酝晕韵孕匝砸杂栽哉灾宰载再在咱攒暂赞赃脏葬遭糟凿藻枣早澡蚤躁噪造皂灶燥责择则泽贼怎增憎曾赠扎喳渣札轧铡闸眨栅榨咋乍炸诈摘斋宅窄债寨瞻毡詹粘沾盏斩辗崭展蘸栈占战站湛绽樟章彰漳张掌涨杖丈帐账仗胀瘴障招昭找沼赵照罩兆肇召遮折哲蛰辙者锗蔗这浙珍斟真甄砧臻贞针侦枕疹诊震振镇阵蒸挣睁征狰争怔整拯正政帧症郑证芝枝支吱蜘知肢脂汁之织职直植殖执值侄址指止趾只旨纸志挚掷至致置帜峙制智秩稚质炙痔滞治窒中盅忠钟衷终种肿重仲众舟周州洲诌粥轴肘帚咒皱宙昼骤珠株蛛朱猪诸诛逐竹烛煮拄瞩嘱主著柱助蛀贮铸筑住注祝驻抓爪拽专砖转撰赚篆桩庄装妆撞壮状椎锥追赘坠缀谆准捉拙卓桌琢茁酌啄着灼浊兹咨资姿滋淄孜紫仔籽滓子自渍字鬃棕踪宗综总纵邹走奏揍租足卒族祖诅阻组钻纂嘴醉最罪尊遵昨左佐柞做作坐座 -------------------------------------------------------------------------------- /config/memory_usage.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/config/memory_usage.xls -------------------------------------------------------------------------------- /config/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | index-url = http://pypi.douban.com/simple 3 | [install] 4 | trusted-host=pypi.douban.com 5 | -------------------------------------------------------------------------------- /config/sources.list: -------------------------------------------------------------------------------- 1 | # deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties 2 | deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted 3 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties 4 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted 5 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties 6 | deb http://mirrors.aliyun.com/ubuntu/ xenial universe 7 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe 8 | deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse 9 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse 10 | deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse 11 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties 12 | deb http://archive.canonical.com/ubuntu xenial partner 13 | deb-src http://archive.canonical.com/ubuntu xenial partner 14 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted 15 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties 16 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe 17 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse -------------------------------------------------------------------------------- /main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/main/__init__.py -------------------------------------------------------------------------------- /main/pred.py: -------------------------------------------------------------------------------- 1 | from utils import logger as log,label_utils 2 | import conf 3 | from network import model as _model 4 | import logging,cv2 5 | import numpy as np 6 | from tensorflow.keras.utils import to_categorical 7 | from tensorflow.keras.preprocessing.sequence import pad_sequences 8 | 9 | logger = logging.getLogger("Train") 10 | 11 | def pred(args): 12 | charset = label_utils.get_charset(conf.CHARSET) 13 | CHARSET_SIZE = len(charset) 14 | 15 | # 定义模型 16 | _,decoder_model,encoder_model = _model.model(conf, args) 17 | 18 | # 分别加载模型 19 | encoder_model.load_model(args.model) 20 | decoder_model.load_model(args.model) 21 | logger.info("加载了模型:%s", args.model) 22 | 23 | logger.info("开始预测图片:%s",args.image) 24 | image = cv2.imread(args.image) 25 | 26 | 27 | # 编码器先预测 28 | encoder_out_states, encoder_fwd_state, encoder_back_state = encoder_model.predict(image) 29 | 30 | # 准备编码器的初始输入状态 31 | decoder_init_state = np.concatenate([encoder_fwd_state, encoder_back_state], axis=-1) 32 | 33 | attention_weights = [] 34 | 35 | # 开始是STX 36 | from utils.label_utils import convert_to_id 37 | decoder_index = convert_to_id([conf.CHAR_STX], charset) 38 | decoder_state = decoder_init_state 39 | 40 | result = "" 41 | 42 | # 开始预测字符 43 | for i in range(conf.MAX_SEQUENCE): 44 | 45 | # 别看又padding啥的,其实就是一个字符,这样做是为了凑输入的维度定义 46 | decoder_inputs = pad_sequences(decoder_index, maxlen=conf.MAX_SEQUENCE, padding="post", value=0) 47 | decoder_inputs = to_categorical(decoder_inputs,num_classes=CHARSET_SIZE) 48 | 49 | # infer_decoder_model : Model(inputs=[decoder_inputs, encoder_out_states,decoder_init_state], 50 | # outputs=[decoder_pred,attn_states,decoder_state]) 51 | # encoder_out_states->attention用 52 | decoder_out, attention, decoder_state = \ 53 | decoder_model.predict([decoder_inputs,decoder_state]) 54 | 55 | encoder_out_states 56 | 57 | 58 | # beam search impl 59 | max_k_index = decoder_out.argsort()[:3] 60 | max_prob = decoder_out[max_k_index] 61 | max_labels = label_utils.id2strs(max_k_index) #TODO id2strs 62 | 63 | 64 | 65 | # 得到当前时间的输出,是一个3770的概率分布,所以要argmax,得到一个id 66 | decoder_index = np.argmax(decoder_out, axis=-1)[0, 0] 67 | 68 | 69 | 70 | 71 | if decoder_index == 2: 72 | logger.info("预测字符为ETX,退出") 73 | break #==>conf.CHAR_ETX: break 74 | 75 | attention_weights.append(attention) 76 | 77 | pred_char = label_utils.ids2str(decoder_index,charset=charset) 78 | 79 | logger.info("预测字符为:%s",pred_char) 80 | result+= pred_char 81 | 82 | if len(result)>= conf.MAX_SEQUENCE: 83 | logger.debug("预测字符为:%s,达到最大预测长度", result) 84 | else: 85 | logger.debug("预测字符为:%s,解码最后为ETX", result) 86 | 87 | return pred_char,attention_weights 88 | 89 | def sents2sequences(tokenizer, sentences, reverse=False, pad_length=None, padding_type='post'): 90 | encoded_text = tokenizer.texts_to_sequences(sentences) 91 | preproc_text = pad_sequences(encoded_text, padding=padding_type, maxlen=pad_length, value=0) 92 | if reverse: 93 | preproc_text = np.flip(preproc_text, axis=1) 94 | 95 | return preproc_text 96 | 97 | 98 | 99 | 100 | 101 | if __name__ == "__main__": 102 | log.init() 103 | args = conf.init_pred_args() 104 | result,attention_probs = pred(args) 105 | logger.info("预测字符串为:%s",result) 106 | logger.info("注意力概率为:%r", attention_probs) -------------------------------------------------------------------------------- /main/train.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.callbacks import TensorBoard 2 | from tensorflow.keras.callbacks import ModelCheckpoint 3 | from tensorflow.keras.callbacks import EarlyStopping 4 | from tensorflow.keras.models import load_model 5 | from network.model import TextScannerModel 6 | from utils.visualise_callback import TBoardVisual 7 | from utils.sequence import SequenceData 8 | from utils.label import label_utils 9 | from utils import logger as log 10 | from utils import util 11 | import logging 12 | import conf 13 | import os 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def train(args): 19 | charset = label_utils.get_charset(conf.CHARSET) 20 | conf.CHARSET_SIZE = len(charset) 21 | 22 | model = TextScannerModel(conf, charset) 23 | model.comile_model() 24 | 25 | train_sequence = SequenceData(name="Train", 26 | label_dir=args.train_label_dir, 27 | label_file=args.train_label_file, 28 | charsets=charset, 29 | conf=conf, 30 | args=args, 31 | batch_size=args.batch) 32 | valid_sequence = SequenceData(name="Validate", 33 | label_dir=args.validate_label_dir, 34 | label_file=args.validate_label_file, 35 | charsets=charset, 36 | conf=conf, 37 | args=args, 38 | batch_size=args.validation_batch) 39 | 40 | timestamp = util.timestamp_s() 41 | tb_log_name = os.path.join(conf.DIR_TBOARD, timestamp) 42 | # checkpoint_path = conf.DIR_MODEL + "/model-" + timestamp + "-epoch{epoch:03d}-acc{accuracy:.4f}-val{val_accuracy:.4f}.hdf5" 43 | checkpoint_path = conf.DIR_MODEL + "/model-" + timestamp + "-epoch{epoch:03d}.hdf5" 44 | 45 | # 如果checkpoint文件存在,就加载之 46 | if args.retrain: 47 | logger.info("Train from beginning ...") 48 | else: 49 | logger.info("基于之前的checkpoint训练...") 50 | _checkpoint_path = util.get_checkpoint(conf.DIR_CHECKPOINT) 51 | if _checkpoint_path is not None: 52 | model = load_model(_checkpoint_path) 53 | logger.info("加载checkpoint模型[%s]", _checkpoint_path) 54 | else: 55 | logger.warning("找不到任何checkpoint,重新开始训练") 56 | 57 | logger.info("Train begin:") 58 | 59 | tboard = TensorBoard(log_dir=tb_log_name, histogram_freq=1, batch_size=2, write_grads=True) 60 | early_stop = EarlyStopping(patience=args.early_stop, verbose=1, mode='max') 61 | checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1, mode='max') 62 | visibility_debug = TBoardVisual('Attetnon Visibility', tb_log_name, charset, args, valid_sequence) 63 | 64 | model.fit( 65 | x=train_sequence, 66 | steps_per_epoch=args.steps_per_epoch, # 其实应该是用len(train_sequence),但是这样太慢了,所以,我规定用一个比较小的数,比如1000 67 | epochs=args.epochs, 68 | workers=args.workers, # 同时启动多少个进程加载 69 | callbacks=[tboard, checkpoint, early_stop, visibility_debug], 70 | use_multiprocessing=True, 71 | validation_data=valid_sequence, 72 | validation_steps=args.validation_steps, 73 | verbose=2) 74 | 75 | logger.info("Train end!") 76 | 77 | model_path = conf.DIR_MODEL + "/textscanner-{}.hdf5".format(util.timestamp_s()) 78 | model.save(model_path) 79 | logger.info("Save model saved to :%s", model_path) 80 | 81 | 82 | if __name__ == "__main__": 83 | log.init() 84 | args = conf.init_args() 85 | train(args) 86 | -------------------------------------------------------------------------------- /network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/network/__init__.py -------------------------------------------------------------------------------- /network/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/network/layers/__init__.py -------------------------------------------------------------------------------- /network/layers/class_branch_layer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Layer 2 | from tensorflow.keras.layers import Convolution2D 3 | from tensorflow.keras.layers import Softmax 4 | from utils.util import call_debug as _call 5 | 6 | 7 | class ClassBranchLayer(Layer): 8 | """ 9 | [H,W,E] => [H,W,C] 10 | E: encoding output channels 11 | C: character class number 12 | """ 13 | 14 | def __init__(self, name, charset_size, filter_num): 15 | super().__init__(name=name) 16 | self.charset_size = charset_size 17 | self.filter_num = filter_num 18 | 19 | def build(self, input_shape): 20 | self.conv1 = Convolution2D(filters=self.filter_num, 21 | kernel_size=(3, 3), 22 | padding="same", 23 | name="class_branch_conv1") 24 | # the classification number is Character Size + 1 25 | self.conv2 = Convolution2D(filters=self.charset_size + 1, 26 | kernel_size=(1, 1), 27 | padding="same", 28 | name="class_branch_conv2") 29 | self.softmax = Softmax(name="class_branch_softmax") 30 | 31 | def call(self, inputs, training=None): 32 | x = _call(self.conv1, inputs) 33 | x = _call(self.conv2, x) 34 | x = _call(self.softmax, x) 35 | return x 36 | -------------------------------------------------------------------------------- /network/layers/fcn_layer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Conv2D, Dropout, Conv2DTranspose, Add, Cropping2D, Layer 2 | from tensorflow.keras.models import Model 3 | from utils.util import call_debug as _call 4 | 5 | 6 | class FCNLayer(Layer): 7 | """ 8 | # Resnet:http://www.piginzoo.com/machine-learning/2019/08/28/east & https://i.stack.imgur.com/tkUYS.png 9 | # FCN: https://github.com/divamgupta/image-segmentation-keras/blob/master/keras_segmentation/models/fcn.py 10 | # Resnet50+FCN:参考 http://www.piginzoo.com/machine-learning/2020/04/23/fcn-unet#resnet50%E7%9A%84fcn 11 | This implements FCN-8s 12 | """ 13 | 14 | def __init__(self, name, filter_num, resnet50_model): 15 | super().__init__(name=name) 16 | resnet50_model.layers.pop() 17 | # resnet50_model.summary() 18 | self.resnet50_model = resnet50_model 19 | self.filter_num = filter_num 20 | 21 | def build(self, input_image): 22 | 23 | ############################ 24 | # encoder part 25 | ############################ 26 | 27 | layer_names = [ 28 | "conv3_block4_out", # 1/8 29 | "conv4_block6_out", # 1/16 30 | "conv5_block3_out", # 1/32 31 | ] 32 | layers = [self.resnet50_model.get_layer(name).output for name in layer_names] 33 | self.FCN_left = Model(inputs=self.resnet50_model.input, outputs=layers) 34 | 35 | ############################ 36 | # decoder part 37 | ############################ 38 | 39 | # pool5(1/32) ==> 1/16 40 | self.pool5_conv1 = Conv2D(filters=self.filter_num, 41 | kernel_size=(2, 2), 42 | activation='relu', 43 | padding='same', 44 | name="fcn_pool5_conv1") # 2x2 is because the least height is 2 pixes after Resnet 45 | self.pool5_drop1 = Dropout(0.25, name="fcn_pool5_drop1") 46 | self.pool5_conv2 = Conv2D(filters=self.filter_num, 47 | kernel_size=(1, 1), 48 | activation='relu', 49 | padding='same', 50 | name="fcn_pool5_conv2") 51 | self.pool5_drop2 = Dropout(0.25, name="fcn_pool5_drop2") 52 | self.pool5_conv3 = Conv2D(filters=self.filter_num, 53 | kernel_size=(1, 1), 54 | kernel_initializer='he_normal', 55 | name="fcn_pool5_conv3") 56 | self.pool5_dconv1 = Conv2DTranspose(filters=self.filter_num, 57 | kernel_size=(3, 3), 58 | strides=(2, 2), 59 | use_bias=False, 60 | name="fcn_pool5_dconv1") # stride=2后,反卷积图从2x8=>5x17(像素间padding0),采用3x3核做上卷积 61 | 62 | # pool4(1/16)+dconv ==> 1/8 63 | self.pool4_conv1 = Conv2D(filters=self.filter_num, 64 | kernel_size=(1, 1), 65 | kernel_initializer='he_normal', 66 | name="fcn_pool4_conv1") # pool4做1x1卷积后 + 反卷积后的pool5,恢复到原图1/16 67 | self.pool4_add1 = Add(name="fcn_pool4_add1") 68 | self.pool4_dconv1 = Conv2DTranspose(filters=self.filter_num, 69 | kernel_size=(3, 3), 70 | strides=(2, 2), 71 | use_bias=False, 72 | name="fcn_pool4_dconv1") # (pool4 + 上采样后的pool5)的结果 再次做反卷积,尺寸恢复到原图的1/8 73 | 74 | # pool3(1/8)+dconv ==> original size 75 | self.pool3_conv1 = Conv2D(filters=self.filter_num, 76 | kernel_size=(1, 1), 77 | kernel_initializer='he_normal', 78 | name="fcn_pool3_conv1") # pool3做1x1卷积后与上面的结果融合 79 | self.pool3_add1 = Add(name="fcn_pool3_add1") 80 | self.pool3_dconv1 = Conv2DTranspose(filters=self.filter_num, 81 | kernel_size=(3, 3), 82 | strides=(8, 8), 83 | use_bias=False, 84 | name="fcn_pool3_dconv1") # 最后一个反卷积,将尺寸从1/8,直接恢复到原图大小(stride=8) 85 | 86 | def call(self, input_image, training=True): 87 | 88 | pool3, pool4, pool5 = _call(self.FCN_left, input_image) 89 | o = _call(self.pool5_conv1, pool5) 90 | o = _call(self.pool5_drop1, o) 91 | o = _call(self.pool5_conv2, o) 92 | o = _call(self.pool5_drop2, o) 93 | o = _call(self.pool5_conv3, o) 94 | o5 = _call(self.pool5_dconv1, o) 95 | 96 | o4 = _call(self.pool4_conv1, pool4) 97 | o5, o4 = self.crop(o5, o4) 98 | o45 = _call(self.pool4_add1, [o5, o4]) 99 | o45 = _call(self.pool4_dconv1, o45) 100 | 101 | o3 = _call(self.pool3_conv1, pool3) 102 | o45, o3 = self.crop(o45, o3) 103 | o = _call(self.pool3_add1, [o45, o3]) 104 | o = _call(self.pool3_dconv1, o) 105 | 106 | return o 107 | 108 | # cut to smaller 109 | def crop(self, o1, o2): 110 | o1_height, o1_width = o1.shape[1], o1.shape[2] 111 | o2_height, o2_width = o2.shape[1], o2.shape[2] 112 | 113 | cx = abs(o1_width - o2_width) 114 | cy = abs(o1_height - o2_height) 115 | 116 | if o1_width > o2_width: 117 | o1 = Cropping2D(cropping=((0, 0), (0, cx)))(o1) 118 | else: 119 | o2 = Cropping2D(cropping=((0, 0), (0, cx)))(o2) 120 | 121 | if o1_height > o2_height: 122 | o1 = Cropping2D(cropping=((0, cy), (0, 0)))(o1) 123 | else: 124 | o2 = Cropping2D(cropping=((0, cy), (0, 0)))(o2) 125 | 126 | return o1, o2 127 | -------------------------------------------------------------------------------- /network/layers/geometry_branch_layer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Layer 2 | from tensorflow.keras.layers import Convolution2D 3 | from tensorflow.keras.layers import Conv2DTranspose 4 | from tensorflow.keras.layers import GRU 5 | from tensorflow.keras.layers import Permute 6 | from tensorflow.keras.layers import Softmax 7 | from tensorflow.keras.layers import Activation 8 | from utils.util import call_debug as _call 9 | import tensorflow as tf 10 | 11 | 12 | class GeometryBranch(Layer): 13 | """ 14 | [H,W,E] => [H,W,C] 15 | E: encoding output channels 16 | C: character class number 17 | """ 18 | 19 | def __init__(self, name, conf): 20 | super().__init__(name=name) 21 | self.image_area = conf.INPUT_IMAGE_HEIGHT * conf.INPUT_IMAGE_WIDTH 22 | self.sequence_length = conf.MAX_SEQUENCE 23 | self.conf = conf 24 | self.filter_num = conf.FILTER_NUM 25 | 26 | def build(self, input_shape): 27 | # ######################################################################## 28 | # order segment generation network 29 | 30 | # 1. Convs 31 | self.conv_order_seg1 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2, 32 | name="conv_order_seg1", padding="same") # 1/2 33 | self.conv_order_seg2 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2, 34 | name="conv_order_seg2", padding="same") # 1/4 35 | self.conv_order_seg3 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), strides=2, 36 | name="conv_order_seg3", padding="same") # 1/8 37 | 38 | # 2. GRU 39 | self.transpose1 = Permute((2, 1, 3)) # [B,H,W,C] => [B,W,H,C] 40 | # self.reshape1 = Reshape((-1,self.conf.INPUT_IMAGE_WIDTH,self.conf.INPUT_IMAGE_HEIGHT*self.filter_num)) # [B,W,H,C] => [B,W,H*C] 41 | self.gru_order_seg = GRU(units=self.filter_num * (input_shape[1] // 8), return_sequences=True, 42 | name="gru_order_seg") 43 | # self.reshape2 = Reshape((-1,self.conf.INPUT_IMAGE_WIDTH,self.conf.INPUT_IMAGE_HEIGHT,self.filter_num)) # [B,W,H*C] => [B,W,H,C] 44 | self.transpose2 = Permute((2, 1, 3)) # [B,W,H,C] => [B,H,W,C] 45 | 46 | # 3. DeConvs 47 | self.dconv_order_seg3 = Conv2DTranspose(filters=self.filter_num, kernel_size=(3, 3), strides=2, 48 | name="dconv_order_seg3", padding="same") # 1 49 | self.dconv_order_seg2 = Conv2DTranspose(filters=self.filter_num, kernel_size=(3, 3), strides=2, 50 | name="dconv_order_seg2", padding="same") # 1/2 51 | self.dconv_order_seg1 = Conv2DTranspose(filters=self.sequence_length, kernel_size=(3, 3), strides=2, 52 | name="dconv_order_seg1", 53 | padding="same") # 1/4 54 | self.softmax = Softmax(name="softmax") 55 | 56 | # ######################################################################## 57 | # localization map generation network 58 | self.conv_loc_map1 = Convolution2D(filters=self.filter_num, kernel_size=(3, 3), padding="same", 59 | name="conv_loc_map1") 60 | self.conv_loc_map2 = Convolution2D(filters=1, kernel_size=(1, 1), padding="same", 61 | name="conv_loc_map2") 62 | self.sigmoid = Activation("sigmoid", name="sigmoid") 63 | 64 | def call(self, inputs, training=None): 65 | # convs 66 | x = inputs 67 | x = s1 = _call(self.conv_order_seg1, x) 68 | x = s2 = _call(self.conv_order_seg2, x) 69 | x = _call(self.conv_order_seg3, x) 70 | 71 | # gru 72 | x = _call(self.transpose1, x) 73 | height = x.shape[2] 74 | channel = x.shape[3] 75 | target_shape = [-1, x.shape[1], height * channel] 76 | x = _call(tf.reshape, x, target_shape) 77 | x = _call(self.gru_order_seg, x) 78 | target_shape = [-1, x.shape[1], height, channel] 79 | x = _call(tf.reshape, x, target_shape) 80 | x = _call(self.transpose2, x) 81 | 82 | # de-convs,get seg 83 | x = _call(self.dconv_order_seg3, x) 84 | x = _call(self.dconv_order_seg2, x + s2) 85 | x = _call(self.dconv_order_seg1, x + s1) 86 | order_segment = _call(self.softmax, x) 87 | 88 | # generate Localization Map 89 | q = _call(self.conv_loc_map1, inputs) 90 | q = _call(self.conv_loc_map2, q) 91 | localization_map = _call(self.sigmoid, q) 92 | 93 | # multiply S[B,H,W,N] * Q[B,H,W,1] => [B,H,W,N] 94 | order_map = order_segment * localization_map # multiply together 95 | 96 | return order_map, localization_map, order_segment 97 | -------------------------------------------------------------------------------- /network/layers/word_formation_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow.keras.backend as K 2 | from tensorflow.keras.layers import Layer 3 | import tensorflow as tf 4 | 5 | 6 | class WordFormation(Layer): 7 | """ 8 | integral the product of "Character Segmentation" & "Order Maps", 9 | and infer the character possibility. 10 | The threshold is 0.3(paper said "Other Detais: ... The score threshold L_score is set to 0.3 empirically...") 11 | """ 12 | 13 | def __init__(self, name): 14 | super().__init__(name=name) 15 | 16 | def call(self, G, H, training=None): 17 | """ 18 | G[Character Segmentation] : [N,H,W,C] - N:batch, C:charset size(3770) 19 | H[Order Map] : [N,H,W,S] - S: Sequence Length(30) 20 | 21 | return will be [N,S,C],which means each character's probilities. 22 | """ 23 | p_k_list = [] 24 | for i in range(H.shape[-1]): 25 | H_k = H[:, :, :, i] 26 | H_k = H_k[:, :, :, tf.newaxis] 27 | GH = H_k * G 28 | p_k = K.sum(GH, axis=(1, 2)) 29 | p_k_list.append(p_k) 30 | 31 | pks = K.stack(p_k_list) # P_k: (30, 10, 4100) 32 | pks = K.permute_dimensions(pks, (1, 0, 2)) 33 | return pks 34 | -------------------------------------------------------------------------------- /network/model.py: -------------------------------------------------------------------------------- 1 | from network.layers.class_branch_layer import ClassBranchLayer 2 | from network.layers.geometry_branch_layer import GeometryBranch 3 | from network.layers.word_formation_layer import WordFormation 4 | from tensorflow.keras.applications.resnet import ResNet50 5 | from network.layers.fcn_layer import FCNLayer 6 | from tensorflow.keras.optimizers import Adam 7 | from utils.util import call_debug as _call 8 | from tensorflow.keras import backend as K 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Input 11 | import tensorflow as tf 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | HUBER_DELTA = 0.5 17 | 18 | 19 | class TextScannerModel(Model): 20 | """ 21 | TextScanner Core Model 22 | """ 23 | 24 | def __init__(self, conf, charset): 25 | super(TextScannerModel, self).__init__() 26 | self.input_image = Input(shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH, 3), name='input_image') 27 | self.class_branch = ClassBranchLayer(name="ClassBranchLayer", charset_size=len(charset), 28 | filter_num=conf.FILTER_NUM) 29 | self.geometry_branch = GeometryBranch(name="GeometryBranchLayer", conf=conf) 30 | self.word_formation = WordFormation(name="WordFormationLayer") 31 | self.resnet50_model = ResNet50(include_top=False, weights='imagenet') # Resnet50+FCN:参考 http://www.piginzoo.com/machine-learning/2020/04/23/fcn-unet#resnet50%E7%9A%84fcn 32 | self.resnet50_model.summary() 33 | self.fcn = FCNLayer(name="FCNLayer", filter_num=conf.FILTER_NUM, resnet50_model=self.resnet50_model) 34 | 35 | def call(self, inputs, training=None): 36 | fcn_features = _call(self.fcn, inputs) 37 | character_segmentation = _call(self.class_branch, fcn_features) 38 | order_map, localization_map, order_segment = _call(self.geometry_branch, fcn_features) 39 | words = _call(self.word_formation, character_segmentation, order_map) 40 | return character_segmentation, order_segment, localization_map, words # the sequence of them is critical for loss & metrics 41 | 42 | def localization_map_loss(self): 43 | def smoothL1(y_true, y_pred): 44 | x = K.abs(y_true - y_pred) 45 | x = K.switch(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) 46 | return K.sum(x) 47 | 48 | return smoothL1 49 | 50 | def comile_model(self): 51 | # model predict output are: character_segmentation(G), order_segment(S), localization_map(Q), words 52 | # the last "words" corresponding loss function is useless, will be masked by its weight, keep it only for metrics 53 | losses = ['categorical_crossentropy', 54 | 'categorical_crossentropy', 55 | self.localization_map_loss(), 56 | 'categorical_crossentropy'] 57 | loss_weights = [1, 10, 10, 0] # weight value refer from paper, and last 0 is mask to eliminate the words loss 58 | 59 | # metrics 60 | metrics = ['categorical_accuracy', 61 | 'categorical_accuracy', 62 | 'binary_accuracy', 63 | 'categorical_accuracy'] 64 | 65 | self.compile(Adam(), 66 | loss=losses, 67 | loss_weights=loss_weights, 68 | metrics=metrics, 69 | run_eagerly=True) 70 | logger.info("######## TextScanner Model Structure ########") 71 | self.build(self.input_image.shape) 72 | self.summary() 73 | exit() 74 | logger.info("TextScanner Model was compiled.") 75 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Keras-Preprocessing 2 | python-levenshtein 3 | Keras-Applications 4 | opencv-python 5 | matplotlib 6 | pyclipper 7 | pillow 8 | keras 9 | numpy -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/test/__init__.py -------------------------------------------------------------------------------- /test/gaussian_filter.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | from PIL import Image 3 | import numpy as np 4 | from scipy.ndimage import filters 5 | import matplotlib.pyplot as plt 6 | import scipy.ndimage.filters as fi 7 | 8 | 9 | def render_image(): 10 | im = np.array(Image.open('messi.jpg')) 11 | 12 | index = 141 #画1行四列的图,与 1,4,1 同 13 | plt.subplot(index) 14 | plt.imshow(im) 15 | 16 | for sigma in (2, 5, 10): 17 | im_blur = np.zeros(im.shape, dtype=np.uint8) 18 | for i in range(3): #对图像的每一个通道都应用高斯滤波 19 | im_blur[:,:,i] = filters.gaussian_filter(im[:,:,i], sigma) 20 | index += 1 21 | plt.subplot(index) 22 | plt.imshow(im_blur) 23 | 24 | plt.show() 25 | 26 | 27 | def render_gaussian(h,w,box): 28 | canvas = np.zeros((h,w), dtype=np.int32) 29 | xmin, xmax,ymin, ymax = box 30 | out = np.zeros_like(canvas).astype(np.float32) 31 | h, w = canvas.shape[:2] 32 | sigma = 2 33 | 34 | # 求中心点 35 | y = (ymax+ymin+1)//2 36 | x = (xmax+xmin+1)//2 37 | 38 | # 那个点上值为1 39 | out[y, x] = 1. 40 | print("============================================================") 41 | print("原始out") 42 | print(out) 43 | # 44 | h, w = canvas.shape[:2] 45 | fi.gaussian_filter(out, (sigma, sigma),output=out, mode='mirror') 46 | 47 | print("============================================================") 48 | print("高斯过滤后out") 49 | print(out) 50 | plt.subplot(131)#画1行四列的图,与 1,4,1 同 51 | plt.imshow(out) 52 | 53 | out = out / out.max() 54 | print("============================================================") 55 | print("归一化后out") 56 | print(out) 57 | plt.subplot(132)#画1行四列的图,与 1,4,1 同 58 | plt.imshow(canvas) 59 | 60 | canvas[out > canvas] = out[out > canvas] 61 | print("============================================================") 62 | print("重新填充后的canvas") 63 | print(out) 64 | plt.subplot(133)#画1行四列的图,与 1,4,1 同 65 | plt.imshow(canvas) 66 | 67 | plt.show() 68 | 69 | 70 | def render_gaussian_thresh(h,w,box): 71 | canvas = np.zeros((h,w), dtype=np.int32) 72 | xmin, xmax,ymin, ymax = box 73 | value=7 74 | thresh=0.2 75 | shrink=0.6 76 | sigma = 2 77 | out = np.zeros_like(canvas) 78 | h, w = canvas.shape[:2] 79 | y = (ymax+ymin+1)//2 80 | x = (xmax+xmin+1)//2 81 | 82 | out = np.zeros_like(canvas).astype(np.float32) 83 | print(out.shape) 84 | out[y, x] = 1. 85 | print("============================================================") 86 | print("原始out") 87 | print(out) 88 | 89 | # out = filters.gaussian_filter(out,sigma=3) 90 | fi.gaussian_filter(out, (sigma, sigma),output=out, mode='mirror') 91 | print("============================================================") 92 | print("高斯滤波后out") 93 | print(out) 94 | out = out / out.max() 95 | print("============================================================") 96 | print("归一化后out") 97 | print(out) 98 | canvas[out > thresh] = value 99 | print("============================================================") 100 | print("out大于0.2复制了%d的canvas" % value) 101 | print(canvas) 102 | plt.imshow(canvas) 103 | plt.show() 104 | 105 | if __name__ == '__main__': 106 | #render_gaussian_thresh(64,256,(30,50,30,50)) 107 | render_gaussian(8,8,(3,5,3,5)) -------------------------------------------------------------------------------- /test/make_decouple_map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.ndimage.filters as fi 3 | import ipdb 4 | from concern.config import State 5 | 6 | from .data_process import DataProcess 7 | 8 | 9 | class MakeDecoupleMap(DataProcess): 10 | max_size = State(default=32) # ???什么size,是最多的字符数么? 11 | shape = State(default=(64, 256)) # 图像的标准宽度 12 | sigma = State(default=2) # 噢,方差设成了2 13 | summation = State(default=False) 14 | box_key = State(default='charboxes') 15 | function = State(default='gaussian') 16 | thresh = State(default=0.2) 17 | order_dest = State(default='ordermaps') 18 | mask_dest = State(default='charmaps') 19 | shape_dest = State(default='shapemaps') 20 | 21 | def process(self, data): 22 | assert self.box_key in data, '%s in data is required' % self.box_key 23 | shape = data['image'].shape[:2] # h,w 24 | boxes = np.array(data[self.box_key]) # 这个是单字的框 25 | 26 | ratio_x = shape[1] / self.shape[1] # 高度比 27 | boxes[:, :, 0] = (boxes[:, :, 0] / ratio_x).clip(0, self.shape[1]) # 估计是boxes是所有的框:[b,N,2] 28 | ratio_y = shape[0] / self.shape[0] # 宽度比 29 | boxes[:, :, 1] = (boxes[:, :, 1] / ratio_y).clip(0, self.shape[0]) 30 | boxes = (boxes + .5).astype(np.int32) 31 | xmins = boxes[:, :, 0].min(axis=1) # 找到x最小的值 32 | xmaxs = np.maximum(boxes[:, :, 0].max(axis=1), xmins + 1) # 找到x最大值 33 | ymins = boxes[:, :, 1].min(axis=1) 34 | ymaxs = np.maximum(boxes[:, :, 1].max(axis=1), ymins + 1) 35 | 36 | # 做了一张空图,h,w,全0 37 | shapemaps = np.zeros((self.shape[0], self.shape[1], 2), dtype=np.int32) 38 | 39 | 40 | if self.summation: 41 | # 感觉是给localization map准备的gt 42 | canvas = np.zeros(self.shape, dtype=np.int32) 43 | else: 44 | # 3维度的,有点像是 order map的gt 45 | canvas = np.zeros((self.max_size+1, *self.shape), dtype=np.float32) 46 | 47 | mask = np.zeros(self.shape, dtype=np.float32) 48 | # 生成1~30的序号 49 | orders = self.orders(data) 50 | 51 | # 处理每个字符 52 | for i in range(xmins.shape[0]): 53 | # 初始化一个h,w的零图 54 | temp = np.zeros(self.shape, dtype=np.float32) 55 | function = getattr(self, 'render_' + self.function) 56 | order = min(orders[i], self.max_size) 57 | if self.summation: 58 | function(canvas, xmins[i], xmaxs[i], ymins[i], ymaxs[i], 59 | value=order+1, shrink=0.6) 60 | else: 61 | # 这个是每张图 62 | function(canvas[order+1], xmins[i], xmaxs[i], ymins[i], ymaxs[i]) 63 | self.render_gaussian(mask, xmins[i], xmaxs[i], ymins[i], ymaxs[i]) 64 | self.render_gaussian(temp, xmins[i], xmaxs[i], ymins[i], ymaxs[i]) 65 | w, h = xmaxs[i]-xmins[i], ymaxs[i]-ymins[i] 66 | shapemaps[temp > 0.4] = np.array([w, h]) 67 | data[self.order_dest] = canvas 68 | data[self.mask_dest] = mask 69 | data[self.shape_dest] = shapemaps.transpose(2, 0, 1) 70 | return data 71 | 72 | def render_gaussian(self, canvas, xmin, xmax, ymin, ymax): 73 | out = np.zeros_like(canvas) 74 | h, w = canvas.shape[:2] 75 | # 求中心点 76 | y = (ymax+ymin+1)//2 77 | x = (xmax+xmin+1)//2 78 | if not (w > x \\and h > y): return 79 | # 那个点上值为1 80 | out[y, x] = 1. 81 | h, w = canvas.shape[:2] 82 | fi.gaussian_filter(out, (self.sigma, self.sigma),output=out, mode='mirror') 83 | out = out / out.max() 84 | canvas[out > canvas] = out[out > canvas]# <--- 85 | 86 | def render_gaussian_thresh(self, canvas, xmin, xmax, ymin, ymax, 87 | value=1, thresh=None, shrink=None): 88 | if thresh is None:thresh = self.thresh 89 | h, w = canvas.shape[:2] 90 | y = (ymax+ymin+1)//2 91 | x = (xmax+xmin+1)//2 92 | if not (w > x and h > y):return 93 | out = np.zeros_like(canvas).astype(np.float32) 94 | out[y, x] = 1. 95 | out = fi.gaussian_filter(out, (self.sigma, self.sigma),output=out, mode='mirror') 96 | out = out / out.max() 97 | canvas[out > thresh] = value 98 | 99 | 100 | def render_gaussian_fast(self, canvas, xmin, xmax, ymin, ymax): 101 | out = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.float32) 102 | out[(ymax-ymin+1)//2, (xmax-xmin+1)//2] = 1. 103 | h, w = canvas.shape[:2] 104 | fi.gaussian_filter(out, (self.sigma, self.sigma), 105 | output=out, mode='mirror') 106 | out = out / out.max() 107 | canvas[ymin:ymax+1, xmin:xmax+1] = np.maximum(out, canvas[ymin:ymax+1, xmin:xmax+1]) 108 | 109 | def orders(self, data): 110 | orders = [] 111 | if 'lines' in data: # lines什么鬼? 112 | for text in data['lines'].texts: 113 | orders += list(range(min(len(text), self.max_size))) 114 | else: 115 | # 我理解,就是生成了一个1:max_size的序号(data[self.box_key]是就是box们) 116 | orders = list(range(min(data[self.box_key].shape[0], self.max_size))) 117 | return orders -------------------------------------------------------------------------------- /test/test_accuracy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | def _p(t,name): 5 | print("调试计算图定义:"+name, t) 6 | return tf.Print(t,[t],name,summarize=300) 7 | 8 | # y_pred is [batch,seq,charset_size] 9 | # pred = np.random.rand(3,3,3) 10 | # label = np.random.rand(3,3,3) 11 | 12 | pred = np.array( 13 | [ 14 | [[1,0,0],[1,0,0],[1,0,0]], 15 | [[1,0,0],[1,0,0],[1,0,0]], 16 | [[1,0,0],[1,0,0],[1,0,0]] 17 | ]) 18 | 19 | label = np.array( 20 | [ 21 | [[0.5,0.2,0.3],[0.5,0.2,0.3],[0.5,0.2,0.3]], #true,true,true=>true 22 | [[0.5,0.2,0.3],[0.5,0.2,0.3],[0.2,0.5,0.3]], #true,true,false=>false 23 | [[0.2,0.3,0.5],[0.2,0.3,0.5],[0.2,0.3,0.5]] #false,false,false=>false 24 | ]) 25 | 26 | # 正确率应该是0.333 27 | 28 | def accuracy(y_true, y_pred): 29 | max_idx_p = tf.argmax(y_pred, axis=2) 30 | max_idx_l = tf.argmax(y_true, axis=2) 31 | max_idx_p = _p(max_idx_p,"max_idx_p") 32 | correct_pred = tf.equal(max_idx_p, max_idx_l) 33 | _result = tf.map_fn(fn=lambda e: tf.reduce_all(e), elems=correct_pred, dtype=tf.bool) 34 | return tf.reduce_mean(tf.cast(_result, tf.float32)) 35 | 36 | s = tf.Session() 37 | 38 | x = tf.placeholder(tf.float32, shape=[None, None,3], name='x') 39 | y = tf.placeholder(tf.float32, shape=[None, None,3], name='y') 40 | m = accuracy(x,y) 41 | r = s.run(m,feed_dict={x:pred,y:label}) 42 | print(r) -------------------------------------------------------------------------------- /test/test_call.py: -------------------------------------------------------------------------------- 1 | 2 | def call1(p1): 3 | print("call1!") 4 | print(p1) 5 | 6 | def call2(p1,p2): 7 | print("call2!") 8 | print(p1) 9 | print(p2) 10 | 11 | def call3(p_list): 12 | print("call3!") 13 | print(p_list) 14 | 15 | 16 | def test_func(c,*param): 17 | print(type(param)) 18 | c(*param) 19 | 20 | test_func(call1, "aaaa") 21 | test_func(call2, "bbbb","cccc") 22 | test_func(call3, ["bbbb","cccc"]) -------------------------------------------------------------------------------- /test/test_customized_layer.py: -------------------------------------------------------------------------------- 1 | # from tensorflow.keras.layers import Conv2D 2 | # from tensorflow.keras.layers import LeakyReLU 3 | # from tensorflow.keras.layers import MaxPooling2D 4 | # from tensorflow.keras.layers import BatchNormalization 5 | # from tensorflow.keras.layers import Lambda 6 | # from tensorflow.keras.layers import Layer 7 | # from tensorflow.keras.backend import squeeze 8 | from keras.layers import Conv2D 9 | from keras.layers import LeakyReLU 10 | from keras.layers import MaxPooling2D 11 | from keras.layers import BatchNormalization 12 | from keras.layers import Lambda 13 | from keras.layers import Layer,Flatten,Dense 14 | from keras.backend import squeeze 15 | from keras.optimizers import Adam 16 | from keras.models import Model 17 | from keras.layers import Input 18 | import numpy as np 19 | 20 | class Conv(Layer): 21 | 22 | #[N,1,256/4,512] => [N,256/4,512] 23 | def squeeze_wrapper(self,tensor): 24 | print("tensor:",tensor) 25 | return squeeze(tensor, axis=1) 26 | 27 | def __init__(self, **kwargs): 28 | super(Conv, self).__init__(**kwargs) 29 | 30 | ''' 31 | #抽feature,用的cnn网络 32 | # https://blog.csdn.net/Quincuntial/article/details/77679463 33 | 在CRNN模型中,通过采用标准CNN模型(去除全连接层)中的卷积层和最大池化层来构造卷积层的组件。 34 | 这样的组件用于从输入图像中提取序列特征表示。在进入网络之前,所有的图像需要缩放到相同的高度。 35 | 然后从卷积层组件产生的特征图中提取特征向量序列,这些特征向量序列作为循环层的输入。 36 | 具体地,特征序列的每一个特征向量在特征图上按列从左到右生成。这意味着第i个特征向量是所有特征图第i列的连接。 37 | 在我们的设置中每列的宽度固定为单个像素。 38 | 39 | # 由于卷积层,最大池化层和元素激活函数在局部区域上执行,因此它们是平移不变的。 40 | 因此,特征图的每列对应于原始图像的一个矩形区域(称为感受野),并且这些矩形区域与特征图上从左到右的相应列具有相同的顺序。 41 | 如图2所示,特征序列中的每个向量关联一个感受野,并且可以被认为是该区域的图像描述符。 42 | :param inputdata: eg. batch*32*100*3 NHWC format 43 | | 44 | Conv1 --> H*W*64 #卷积后,得到的维度 45 | Relu1 46 | Pool1 H/2 * W/2 * 64 #池化后得到的维度 47 | | 48 | Conv2 H/2 * W/2 * 128 49 | Relu2 50 | Pool2 H/4 * W/4 * 128 51 | | 52 | Conv3 H/4 * W/4 * 256 53 | Relu3 54 | | 55 | Conv4 H/4 * W/4 * 256 56 | Relu4 57 | Pool4 H/8 * W/4 * 64 58 | | 59 | Conv5 H/8 * W/4 * 512 60 | Relu5 61 | BatchNormal5 62 | | 63 | Conv6 H/8 * W/4 * 512 64 | Relu6 65 | BatchNormal6 66 | Pool6 H/16 * W/4 * 512 67 | | 68 | Conv7 69 | Relu7 H/32 * W/4 * 512 70 | | 71 | 20层 72 | ''' 73 | # 自定义的卷基层,32x100 => 1 x 25,即(1/32,1/4) 74 | def call(self,inputs): 75 | x = inputs 76 | for layer in self.layers: 77 | # print(x) 78 | x = layer(x) 79 | 80 | return x 81 | 82 | 83 | def build(self, input_shape): 84 | self.layers = [] 85 | # Block 1 86 | self.layers.append(Conv2D(64, (3, 3), padding='same', name='block1_conv1')) 87 | self.layers.append(LeakyReLU()) 88 | # self.layers.append(BatchNormalization()) 89 | self.layers.append(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')) #1/2 90 | 91 | # Block 2 92 | self.layers.append(Conv2D(128, (3, 3), padding='same', name='block2_conv1')) 93 | self.layers.append(LeakyReLU()) 94 | # self.layers.append(BatchNormalization()) 95 | self.layers.append(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')) #1/2 96 | 97 | # Block 3 98 | self.layers.append(Conv2D(256, (3, 3), padding='same', name='block3_conv1')) 99 | self.layers.append(LeakyReLU()) 100 | # self.layers.append(BatchNormalization()) 101 | 102 | # Block 4 103 | self.layers.append(Conv2D(256, (3, 3), padding='same', name='block4_conv1')) 104 | # self.layers.append(BatchNormalization()) 105 | self.layers.append(LeakyReLU()) 106 | self.layers.append(MaxPooling2D((2, 1), strides=(2, 1), name='block4_pool')) # 1/2 <------ pool kernel is (2,1)!!!!! 107 | 108 | # Block 5 109 | self.layers.append(Conv2D(512, (3, 3), padding='same', name='block5_conv1')) 110 | self.layers.append(LeakyReLU()) 111 | self.layers.append(BatchNormalization()) 112 | 113 | # Block 6 114 | self.layers.append(Conv2D(512, (3, 3), padding='same', name='block6_conv1')) 115 | self.layers.append(LeakyReLU()) 116 | self.layers.append(BatchNormalization()) 117 | self.layers.append(MaxPooling2D((2, 1), strides=(2, 1), name='block6_pool')) #1/2 <------ pool kernel is (2,1)!!!!! 118 | 119 | # Block 7 120 | self.layers.append(Conv2D(512, (2, 2), strides=[2, 1], padding='same', name='block7_conv1')) #1/2 121 | self.layers.append(LeakyReLU()) 122 | 123 | # 输出是(batch,1,Width/4,512),squeeze后,变成了(batch,Width/4,512) 124 | self.layers.append(Lambda(self.squeeze_wrapper)) 125 | 126 | super(Conv, self).build(input_shape) 127 | 128 | # # input_shape[N,H,W,512] => output_shape[N,W/4,512] 129 | def compute_output_shape(self, input_shape): 130 | print("input_shape:",input_shape) 131 | return (None, int(input_shape[2]/4),512) 132 | 133 | if __name__ == '__main__': 134 | 135 | input_image = Input(shape=(32,256,3)) 136 | conv = Conv() 137 | conv_output = conv(input_image) # output[64,512] 138 | print(conv_output) 139 | flat = Flatten()(conv_output) 140 | output = Dense(4,activation='softmax',input_shape=(-1,))(flat) 141 | 142 | train_model = Model(inputs=input_image, outputs=output) 143 | adam = Adam() 144 | train_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) 145 | train_data = np.random.random((10,32,256,3)) 146 | train_labels = np.random.random((10,4)) 147 | train_model.fit(train_data, train_labels, epochs=1, batch_size=1) 148 | -------------------------------------------------------------------------------- /test/test_draw_charactor_segment.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | csi = character_segment_image = np.random.random((64,256,3840)) 5 | csi = np.argmax(csi,axis=-1) # 需要把3840的那个维度,变成1 6 | 7 | plt.imshow(csi) 8 | plt.show() 9 | -------------------------------------------------------------------------------- /test/test_file_process.py: -------------------------------------------------------------------------------- 1 | f = open("../data/small_vocab_en.txt", 'r') 2 | for l in f: 3 | print(l) -------------------------------------------------------------------------------- /test/test_heirachy.py: -------------------------------------------------------------------------------- 1 | class Parent(): 2 | def __init__(self,name,*args): 3 | self.name = name 4 | print("i am parent,name=", name) 5 | 6 | class Child(Parent): 7 | def __init__(self, name, *args): 8 | super().__init__(name,args) 9 | print("i am child,name=",name) 10 | 11 | 12 | class GrandChild(Child): 13 | def __init__(self, name, *args): 14 | super().__init__(name,args) 15 | print("i am grandchild,name=", name) 16 | 17 | 18 | gc = GrandChild("grand_child_hello","...") 19 | print("finally, got:",gc.name) 20 | 21 | -------------------------------------------------------------------------------- /test/test_image_process.py: -------------------------------------------------------------------------------- 1 | # 测试样本是否能被正确resize么? 2 | from utils import image_utils 3 | import conf 4 | import matplotlib.pyplot as plt,cv2 5 | # 6 | # plt.title("processed images", fontsize='large', fontweight='bold') 7 | # 8 | # resize_images = image_utils.read_and_resize_image(["test/data/test/test1.jpg"], conf) 9 | # plt.imshow(cv2.cvtColor(resize_images[0], cv2.COLOR_BGR2RGB)) 10 | # plt.show() 11 | # 12 | # resize_images = image_utils.read_and_resize_image(["test/data/test/test2.jpg"], conf) 13 | # plt.imshow(cv2.cvtColor(resize_images[0], cv2.COLOR_BGR2RGB)) 14 | # plt.show() 15 | 16 | 17 | # 测试收缩算法 18 | import numpy as np 19 | poly = np.array([[100,130],[140,126],[160,129],[170,140],[144,142],[124,135]]) 20 | shrinked_poly = image_utils.shrink_poly(poly,0.75) 21 | from matplotlib import pyplot as plt 22 | fig = plt.figure() 23 | ax = fig.add_subplot(121) 24 | ax.fill(poly[:,0],poly[:,1],'g') 25 | ax = fig.add_subplot(121) 26 | ax.fill(shrinked_poly[:,0],shrinked_poly[:,1],'r',alpha=0.8) 27 | ax = fig.add_subplot(122) 28 | ax.fill(shrinked_poly[:,0],shrinked_poly[:,1],'r',alpha=0.8) 29 | plt.show() -------------------------------------------------------------------------------- /test/test_krnn.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Layer 2 | import keras.backend as K 3 | from keras.layers import LSTM,Input, GRU, Dense, Concatenate, TimeDistributed, Bidirectional 4 | from keras.models import Sequential 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import tensorflow as tf 8 | 9 | class My_RNN(Layer): 10 | 11 | def __init__(self, output_dim, **kwargs): 12 | self.output_dim = output_dim # 输出维度 13 | super(My_RNN, self).__init__(**kwargs) 14 | 15 | def build(self, input_shape): # 定义可训练参数 16 | self.kernel1 = self.add_weight(name='kernel1', 17 | shape=(self.output_dim, self.output_dim), 18 | initializer='glorot_normal', 19 | trainable=True) 20 | self.kernel2 = self.add_weight(name='kernel2', 21 | shape=(input_shape[-1], self.output_dim), 22 | initializer='glorot_normal', 23 | trainable=True) 24 | self.bias = self.add_weight(name='kernel', 25 | shape=(self.output_dim,), 26 | initializer='glorot_normal', 27 | trainable=True) 28 | 29 | def step_do(self, step_in, states): # 定义每一步的迭代 30 | print("step_in:",step_in) 31 | print("states:",states) 32 | step_in = tf.Print(step_in,[tf.shape(step_in)],"step_in") 33 | states = tf.Print(states,[tf.shape(states)],"states") 34 | step_out = K.tanh(K.dot(states[0], self.kernel1) + 35 | K.dot(step_in, self.kernel2) + 36 | self.bias) 37 | return step_out, [step_out] 38 | 39 | def call(self, inputs): # 定义正式执行的函数 40 | init_states = [K.zeros((K.shape(inputs)[0],self.output_dim))] # 定义初始态(全零) 41 | print("init_states.shape:",init_states) 42 | outputs = K.rnn(self.step_do, inputs, init_states) # 循环执行step_do函数 43 | return outputs[0] # outputs是一个tuple,outputs[0]为最后时刻的输出, 44 | # outputs[1]为整个输出的时间序列,output[2]是一个list, 45 | # 是中间的隐藏状态。 46 | 47 | def compute_output_shape(self, input_shape): 48 | return (input_shape[0], self.output_dim) 49 | 50 | 51 | 52 | train_X = np.random.rand(10,5,3) 53 | train_y = np.random.rand(10,5) 54 | 55 | model = Sequential() 56 | model.add(My_RNN(output_dim=4, input_shape=(train_X.shape[1], train_X.shape[2]))) 57 | model.add(Dense(5)) 58 | model.compile(loss='mae', optimizer='adam') 59 | model.summary() 60 | # fit network 61 | history = model.fit(train_X, train_y, epochs=2, batch_size=72,verbose=2, shuffle=False) 62 | -------------------------------------------------------------------------------- /test/test_label_maker.py: -------------------------------------------------------------------------------- 1 | from utils.label.label_maker import LabelGenerater 2 | from utils.label.label import ImageLabel 3 | from utils.label import label_utils 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import logging 7 | import os, cv2 8 | import conf 9 | 10 | debug_dir = "data/debug" 11 | charset_path = "config/charset.4100.txt" 12 | shape = (conf.INPUT_IMAGE_WIDTH, conf.INPUT_IMAGE_HEIGHT) 13 | 14 | """ 15 | 这个类用于测试样本生成,主要测试: 16 | 1、是不是从视觉上看,符合要求: 17 | - order_segment是不是画出来,恰好是包裹字符的 18 | - localization map是不是围绕字符中心的一个正态分布的样子 19 | - order map是不是按照顺序画出了第N个字符的正态分布 20 | 2. 测试是不是按照word formulation,可以从生成的label得到原有的字符串, 21 | 这样变相的验证了,生成的各种map的正确性 22 | """ 23 | 24 | 25 | def save_bbox_image(image_label, image_path): 26 | image = image_label.image 27 | bboxes = image_label.bboxes 28 | cv2.polylines(image,bboxes,True,(0,0,255)) 29 | cv2.imwrite(image_path,image) 30 | 31 | 32 | def save_image(name, gt, image=None, highlight=False): 33 | image = cv2.resize(image, shape) 34 | image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 35 | 36 | if highlight: 37 | gt_mask = gt.copy() 38 | gt_mask[gt_mask > 0] = 1 39 | gt = 155 + 100 * gt / (gt.max() + 0.001) 40 | gt = gt * gt_mask 41 | else: 42 | gt = 255 * gt / (gt.max() + 0.001) 43 | 44 | image = np.ubyte(0.5 * gt + 0.5 * image) 45 | plt.clf() 46 | # plt.imshow(image) 47 | plt.imsave(name, image) # 必须使用plt,才可以显示彩色的 48 | 49 | # cv2.imwrite(name,image) # 如果使用cv2,出来的都是灰度的,不知道为何,plt对灰度的显示做了特殊的处理,使其彩色花了 50 | 51 | 52 | def test_make_label(image_path, charset): 53 | dir, image_name = os.path.split(image_path) 54 | name, ext = os.path.splitext(image_name) 55 | if ext != ".png": return 56 | json_path = os.path.join(dir, name + ".txt") 57 | 58 | print("----------------------------------------------") 59 | print("Image: ", image_name) 60 | 61 | image = cv2.imread(image_path) 62 | 63 | f = open(json_path, encoding="utf-8") 64 | data = f.readlines() 65 | 66 | image_label = ImageLabel(image, 67 | data, 68 | format="plaintext", 69 | target_size=(conf.INPUT_IMAGE_WIDTH, conf.INPUT_IMAGE_HEIGHT)) 70 | 71 | generator = LabelGenerater(conf.MAX_SEQUENCE, 72 | target_image_shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH), 73 | charset=charset) 74 | 75 | character_segment, order_maps, localization_map = generator.process(image_label) 76 | 77 | if not os.path.exists(debug_dir): os.makedirs(debug_dir) 78 | 79 | save_bbox_image(image_label, os.path.join(debug_dir,f"{name}.jpg")) 80 | save_image(os.path.join(debug_dir, f"{name}_character_segment.jpg"), character_segment, image, True) 81 | save_image(os.path.join(debug_dir, f"{name}_localization_map.jpg"), localization_map, image) 82 | order_maps = order_maps.transpose(2, 0, 1) # (H,W,S) => (S,H,W) 83 | 84 | for i, order_map in enumerate(order_maps): 85 | save_image(os.path.join(debug_dir, f"{name}_order_map_{i + 1}.jpg"), order_map, image) 86 | 87 | test_word_formulation(character_segment, charset, image_label, order_maps) 88 | 89 | 90 | # 尝试还原结果,看看是不是可以在复原判断出原有的汉字, 91 | # 主要是验证这样识别是不是一个合理的方法(通过标注来尝试,标注理论上应该是最容易得到正确字符的) 92 | def test_word_formulation(character_segment_G, charset, image_label, order_maps_H): 93 | G = np.eye(len(charset))[character_segment_G] # eye是对角阵生成函数,通过他,完成categorical one hot化 94 | H = order_maps_H 95 | # print("character_segment_G.shape:", character_segment_G.shape) 96 | # print("G.shape:", G.shape) 97 | # print("order_maps.shape/H:", order_maps_H.shape) 98 | 99 | pred = "" 100 | indices,max_sum = None, None 101 | for i, H_k in enumerate(H): 102 | # G[H,W,C:4100] * H_k[H,W,1] 103 | # G是每个像素字符的概率(1/4100) 104 | # H_k是第k个字符对应的正态分布 105 | # (G*H_k) ===> [H,W,4100] 106 | # sum = \sum(G*H_k) ===> [4100] 107 | 108 | _H_k = H_k[:, :, np.newaxis] # [H,W] => [H,W,1] 109 | GH_k = (G * _H_k) 110 | sum = np.sum(GH_k, axis=(0, 1)) 111 | id = sum.argmax() 112 | print("sum max value:", sum[id]) 113 | 114 | # print("max id of 4100:", id, ", max value is :", sum[id]) 115 | if id == 0: 116 | indices = sum.argsort() 117 | max_sum = sum[indices] 118 | # print("top2 id:",indices[2:]) 119 | # print("top2 prob:",sum[indices]) 120 | break 121 | 122 | c = label_utils.id2str([int(id)], charset) 123 | pred += c 124 | 125 | if image_label.label != pred: 126 | print("Predict:[%s]" % pred) 127 | print("Label :[%s]" % image_label.label) 128 | top = 2 129 | print(f"Top {top} :", indices[-top:]) 130 | print(f"Prob {top} :", max_sum[-top:]) 131 | print("Missed :", label_utils.id2str(indices[-top:].tolist(), charset)) 132 | 133 | 134 | if __name__ == "__main__": 135 | logging.basicConfig(format="%(levelname)s %(message)s", level=logging.DEBUG) 136 | 137 | charset = label_utils.get_charset(charset_path) 138 | 139 | # test 目录里的所有 140 | # dir = "data/train" 141 | # files = os.listdir(dir) 142 | # for f in files: 143 | # image_path = os.path.join(dir,f) 144 | # test_make_label(image_path, charset) 145 | 146 | # test 单张 147 | test_make_label("data/train/3-5.png", charset) 148 | # test_make_label("data/train/0-6.png", charset) 149 | # test_make_label("data/train/0-23.png", charset) 150 | # test_make_label("data/train/2-16.png", charset) 151 | # test_make_label("data/train/1-22.png", charset) 152 | -------------------------------------------------------------------------------- /test/test_summary_image.py: -------------------------------------------------------------------------------- 1 | import io 2 | from tensorflow.keras.callbacks import Callback 3 | from tensorflow.python.framework.ops import EagerTensor 4 | from PIL import Image, ImageDraw, ImageFont 5 | import matplotlib.pyplot as plt 6 | import tensorflow as tf 7 | import numpy as np 8 | import logging 9 | import io,cv2 10 | from PIL import Image 11 | 12 | image = np.random.random((32,256)) 13 | buffer = io.BytesIO() 14 | plt.imsave(buffer, image, format='jpg') 15 | image = Image.open(buffer).convert('RGB') 16 | image.save("../data/test.jpg") 17 | image = np.array(image) 18 | buffer.close() 19 | image.shape 20 | 21 | writer = tf.summary.create_file_writer("../data/tboard") 22 | with writer.as_default(): 23 | tf.summary.image("test123", np.array([image]), step=0) -------------------------------------------------------------------------------- /test/test_tensor_process.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras.backend as K 3 | import numpy as np 4 | G = np.random.random((10,64,256,512)) 5 | G = tf.convert_to_tensor(G) 6 | H = np.random.random((10,64,256,3)) 7 | H = tf.convert_to_tensor(H) 8 | 9 | 10 | p_k_list = [] 11 | for i in range(H.shape[-1]): 12 | H_k = H[:,:,:,i] 13 | H_k = H_k[:,:,:,tf.newaxis] 14 | print("H_k:",H_k.shape) 15 | GH = H_k*G 16 | print("GH:", GH.shape) 17 | p_k = K.sum(GH,axis=(1,2)) 18 | print("p_k:", p_k.shape) 19 | print("------------") 20 | p_k_list.append(p_k) 21 | pks = tf.stack(p_k_list) # P_k: (30, 10, 4100) 22 | pks = K.permute_dimensions(pks, (1,0,2)) 23 | print("P_k:",pks.shape) # [10,30,4100] -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/utils/__init__.py -------------------------------------------------------------------------------- /utils/image_utils.py: -------------------------------------------------------------------------------- 1 | import cv2,numpy as np 2 | import logging 3 | import pyclipper 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def show_image(img): 8 | # if img:plt.imshow(img) 9 | pass 10 | 11 | # 图像缩放,高度都是64,这次的宽度,会和这个批次最宽的图像对齐填充padding 12 | def read_and_resize_image(image_names: list,conf): 13 | 14 | padded_images = [] 15 | 16 | for image_name in image_names: 17 | image = cv2.imread(image_name, cv2.IMREAD_COLOR) 18 | if image is None: 19 | logger.warning("图像%s读取失败",image_name) 20 | continue 21 | # logger.debug("读取文件[%s]:%r",image_name,image.shape) 22 | h,w,_ = image.shape 23 | ratio = conf.INPUT_IMAGE_HEIGHT/h # INPUT_IMAGE_HEIGHT 24 | image = cv2.resize(image, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_AREA) 25 | show_image(image) 26 | # resize后,看实际宽度和要求的宽度,默认是256 27 | dim_difference = conf.INPUT_IMAGE_WIDTH - image.shape[1] 28 | if (dim_difference<0): 29 | # 如果图像宽了,就直接resize到最大 30 | padded_image = cv2.resize(image,(conf.INPUT_IMAGE_WIDTH,conf.INPUT_IMAGE_HEIGHT)) 31 | else: 32 | # 否则,就给填充黑色,[(0, 0),(0, dim_difference),(0,0)]=>[高前后忽略,宽前忽略尾部加,通道前后忽略] 33 | padded_image = np.pad(image, [(0, 0),(0, dim_difference),(0,0)], 'constant',constant_values=(0)) 34 | # show_image(padded_image) 35 | # cv2.imwrite("data/test.jpg", padded_image) 36 | padded_images.append(padded_image) 37 | # logger.debug("resize文件[%s]:%r", image_name, padded_image.shape) 38 | 39 | images = np.stack(padded_images,axis=0) 40 | # logger.debug("图像的shape:%r",images.shape) 41 | return images 42 | 43 | def perimeter(polys): 44 | # 计算周长 45 | p = 0 46 | nums = polys.shape[0] 47 | for i in range(nums): 48 | p += abs(np.linalg.norm(polys[i % nums] - polys[(i + 1) % nums])) 49 | # logger.debug('perimeter:{}'.format(p)) 50 | return p 51 | 52 | # 参考:https://blog.csdn.net/m_buddy/article/details/105614620 53 | # polys[N,2] 54 | def shrink_poly(polys, ratio=0.5): 55 | 56 | if type(polys)==list: 57 | polys = np.array(polys) 58 | 59 | if ratio==1: return polys 60 | 61 | """ 62 | 收缩多边形 63 | :param polys: 多边形 64 | :param ratio: 收缩比例 65 | :return: 66 | """ 67 | area = abs(pyclipper.Area(polys)) # 面积 68 | _perimeter = perimeter(polys) # 周长 69 | 70 | pco = pyclipper.PyclipperOffset() 71 | if _perimeter: 72 | # TODO:不知道为何这样计算??? 73 | d = area * (1 - ratio * ratio) / _perimeter 74 | pco.AddPath(polys, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) 75 | # 缩小后返回多边形 76 | polys_shrink = pco.Execute(-d) 77 | else: 78 | logger.warning("多边形周长为0") 79 | return None 80 | 81 | if len(polys_shrink)==0: 82 | logger.debug("收缩多边形[面积=%f]失败,使用原有坐标",area) 83 | return polys 84 | shrinked_bbox = np.array(polys_shrink[0]) 85 | return shrinked_bbox 86 | 87 | if __name__=="__main__": 88 | import conf 89 | read_and_resize_image("data/test.jpg", conf) 90 | -------------------------------------------------------------------------------- /utils/label/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piginzoo/textscanner/a934102e3f7c9c7301c74fc86f6491da55ea5ba6/utils/label/__init__.py -------------------------------------------------------------------------------- /utils/label/label.py: -------------------------------------------------------------------------------- 1 | from utils import util 2 | import numpy as np 3 | import json, cv2 4 | import logging 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class ImageLabel: 10 | """ 11 | Wrap the image and label data 12 | there are 2 types: 13 | - labelme format: like https://github.com/wkentaro/labelme/blob/master/examples/tutorial/apc2016_obj3.json 14 | - plaintext: like 15 | >>> 16 | 你好,世界 17 | 11,12,21,22,31,32,41,42,你 18 | ... 19 | <<< 20 | more, ImageLabel is in charge of resizing to standard size (64x256). 21 | """ 22 | 23 | def __init__(self, image, data, format, target_size): 24 | self.format = format 25 | self.image = cv2.resize(image, target_size) # do the standard resizing 26 | 27 | self.target_size = target_size # (W,H) 28 | self.orignal_size = (image.shape[1], image.shape[0]) # (W,H) 29 | 30 | self.labels = self.load(data) 31 | 32 | def load(self, data): 33 | 34 | if self.format == "labelme": 35 | return self._load_labelme(data) 36 | 37 | if self.format == "plaintext": 38 | return self._load_plaintext(data) 39 | 40 | raise ValueError("Unknow label type:", self.format) 41 | 42 | # labelme json format reference: https://github.com/wkentaro/labelme/blob/master/examples/tutorial/apc2016_obj3.json 43 | def _load_labelme(self, data): 44 | 45 | assert type(data) == list 46 | 47 | data = "".join(data) 48 | 49 | image_labels = json.loads(data) 50 | shapes = image_labels['shapes'] 51 | labels = [] 52 | for s in shapes: 53 | label = s['label'] 54 | points = s['points'] 55 | points = util.resize_bboxes(points, original_size=self.orignal_size, target_size=self.target_size) 56 | labels.append(Label(label, points)) 57 | return labels 58 | 59 | # format: 60 | # 你好,世界 61 | # 11,12,21,22,31,32,41,42,你 62 | # 11,12,21,22,31,32,41,42,好 63 | # .... 64 | def _load_plaintext(self, data): 65 | 66 | assert type(data) == list 67 | 68 | # data[0], bypass the first line, which is the label strings 69 | 70 | # parse line #2 to end 71 | labels = [] 72 | for i in range(1, len(data)): 73 | # "11,12,21,22,31,32,41,42,你" 74 | line = data[i] 75 | line = line.replace(" ", "") 76 | line = line.replace("\n", "") 77 | 78 | line_data = line.split(",") 79 | points = line_data[:8] 80 | label = line_data[8] 81 | 82 | # handle exceptional case: "11,12,21,22,31,32,41,42,," 83 | if line[-2:]==",,": 84 | label = "," 85 | 86 | # "11,12,21,22,31,32,41,42" => [[11,12],[21,22],[31,32],[41,42]] 87 | points = [int(p.strip()) for p in points] 88 | points = np.array(points) 89 | points = np.reshape(points, (4, 2)) 90 | 91 | # adjust all bboxes' coordinators 92 | points = util.resize_bboxes(points, original_size=self.orignal_size, target_size=self.target_size) 93 | 94 | # logger.debug("resized bbox:%r", points) 95 | 96 | labels.append(Label(label, points)) 97 | return labels 98 | 99 | @property 100 | def bboxes(self): 101 | return np.array([l.bbox for l in self.labels]) 102 | 103 | @property 104 | def label(self): 105 | return "".join([l.label for l in self.labels]) 106 | 107 | class Label: 108 | """ 109 | Single word label format: 110 | "label": "X", 111 | "points": [ [x1,y1],....,[xn,yn]] 112 | """ 113 | 114 | def __init__(self, label, bbox): 115 | if type(bbox) == list: 116 | bbox = np.array(bbox) 117 | assert bbox.shape == (4, 2) 118 | assert label is not None and label != " " and label!="" 119 | self.bbox = bbox 120 | self.label = label 121 | -------------------------------------------------------------------------------- /utils/label/label_maker.py: -------------------------------------------------------------------------------- 1 | from utils.label import label_utils 2 | import scipy.ndimage.filters as fi 3 | from utils import image_utils 4 | import numpy as np 5 | import logging 6 | import cv2 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class LabelGenerater(): 12 | """ 13 | The class is used to generate the GT labels. 14 | In loss function, we need 3 GT: Q,H,G 15 | Refer to : http://www.piginzoo.com/machine-learning/2020/04/14/ocr-fa-textscanner#%E5%85%B3%E4%BA%8E%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0 16 | - Order map GT : H 17 | - Localization map GT : Q 18 | - Character Segmentation : G 19 | """ 20 | shrink = 1 # shrink ratio for one character wrapped polygon 21 | ζ = 0.5 # threshold for normalization 22 | δ = 5 # variation for Gaussian distribution 23 | 24 | def __init__(self, max_sequence, target_image_shape, charset): 25 | self.max_sequence = max_sequence 26 | self.target_image_shape = target_image_shape # [H,W]: [64,256] 27 | self.target_width = target_image_shape[1] 28 | self.target_height = target_image_shape[0] 29 | self.charset = charset 30 | 31 | # # adjust all polygens' co-ordinations 32 | # def _adjust_by_size(self, boxes, original_shape): 33 | # assert len(boxes.shape) == 2 or len(boxes.shape) == 3 34 | # 35 | # ratio_x = original_shape[1] / self.target_width 36 | # ratio_y = original_shape[0] / self.target_height 37 | # 38 | # if len(boxes.shape) == 3: 39 | # boxes[:, :, 0] = (boxes[:, :, 0] / ratio_x).clip(0, self.target_width) 40 | # boxes[:, :, 1] = (boxes[:, :, 1] / ratio_y).clip(0, self.target_heigth) 41 | # else: 42 | # boxes[:, 0] = (boxes[:, 0] / ratio_x).clip(0, self.target_width) 43 | # boxes[:, 1] = (boxes[:, 1] / ratio_y).clip(0, self.target_heigth) 44 | # 45 | # boxes = (boxes + .5).astype(np.int32) 46 | # return boxes 47 | 48 | # data is ImageLabel{image,[Label]} 49 | def process(self, image_labels): 50 | 51 | # adjust the coordination 52 | shape = image_labels.image.shape[:2] # h,w 53 | boxes = image_labels.bboxes # [N,4,2] N: words number 54 | label = image_labels.label 55 | 56 | # # find the one bbox boundary 57 | # xmins = boxes[:, :, 0].min(axis=1) 58 | # xmaxs = np.maximum(boxes[:, :, 0].max(axis=1), xmins + 1) 59 | # ymins = boxes[:, :, 1].min(axis=1) 60 | # ymaxs = np.maximum(boxes[:, :, 1].max(axis=1), ymins + 1) 61 | 62 | character_segment = self.render_character_segemention(image_labels) 63 | localization_map = np.zeros(self.target_image_shape, dtype=np.float32) 64 | order_segments = np.zeros((*self.target_image_shape, self.max_sequence), dtype=np.float32) 65 | #order_maps = np.zeros((*self.target_image_shape, self.max_sequence), dtype=np.float32) 66 | 67 | assert boxes.shape[0] <= self.max_sequence, \ 68 | f"the train/validate label text length[{len(image_labels.labels)}] must be less than pre-defined max sequence length[{self.max_sequence}]" 69 | 70 | # process each character 71 | for i in range(boxes.shape[0]): 72 | # Y_hat_k is the normalized_gaussian map, comply with the name in the paper 73 | Y_hat_k = self.generate_Y_hat_k_by_gaussian_normalize(self.target_image_shape, 74 | boxes[i]) # xmins[i], xmaxs[i], ymins[i], ymaxs[i]) 75 | if Y_hat_k is None: 76 | logger.warning("Y_%d generator failed,the char[%s] of [%s]", i, label[i], label) 77 | Y_hat_k = np.zeros((self.target_image_shape)) 78 | 79 | self.render_order_segment(order_segments[:, :, i], Y_hat_k, threshold=self.ζ) 80 | localization_map = self.render_localization_map(localization_map, Y_hat_k) 81 | #order_maps = order_segments * localization_map[:, :, np.newaxis] 82 | 83 | return character_segment, order_segments, localization_map 84 | 85 | # 围绕中心点做一个高斯分布,但是由于每个点的概率值过小,所以要做一个归一化,使得每个点的值归一化到[0,1]之间 86 | # Make a gaussian distribution with the center, and do normalization 87 | # def gaussian_normalize(self, shape, xmin, xmax, ymin, ymax): 88 | # @return a "image" with shape[H,W], which is filled by a gaussian distribution 89 | def generate_Y_hat_k_by_gaussian_normalize(self, shape, one_word_bboxes): # one_word_bboxes[4,2] 90 | # logger.debug("The word bbox : %r , image shape is : %r", one_word_bboxes, shape) 91 | 92 | # find the one bbox boundary 93 | xmin = one_word_bboxes[:, 0].min() 94 | xmax = one_word_bboxes[:, 0].max() 95 | ymin = one_word_bboxes[:, 1].min() 96 | ymax = one_word_bboxes[:, 1].max() 97 | 98 | out = np.zeros(shape) 99 | h, w = shape[:2] 100 | # find the "Center" of polygon 101 | y = (ymax + ymin + 1) // 2 102 | x = (xmax + xmin + 1) // 2 103 | if x > w or y > h: 104 | logger.warning("标注超出图像范围,生成高斯样本失败:(xmin:%f, xmax:%f, ymin:%f, ymax:%f,w:%f,x:%f,h:%f,y:%f)", xmin, xmax, 105 | ymin, ymax, w, x, h, y) 106 | return None 107 | 108 | # prepare the gaussian distribution,refer to paper <