├── README.md
├── images
└── framwork.png
├── mmocr
├── .circleci
│ ├── config.yml
│ ├── docker
│ │ └── Dockerfile
│ └── test.yml
├── .codespellrc
├── .coveragerc
├── .dev_scripts
│ ├── benchmark_full_models.txt
│ ├── benchmark_options.py
│ ├── benchmark_train_models.txt
│ ├── covignore.cfg
│ └── diff_coverage_test.sh
├── .gitignore
├── .owners.yml
├── .pre-commit-config.yaml
├── .pylintrc
├── .readthedocs.yml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── README.md
├── configs
│ ├── backbone
│ │ └── oclip
│ │ │ ├── README.md
│ │ │ └── metafile.yml
│ ├── kie
│ │ ├── _base_
│ │ │ ├── datasets
│ │ │ │ ├── wildreceipt-openset.py
│ │ │ │ └── wildreceipt.py
│ │ │ ├── default_runtime.py
│ │ │ └── schedules
│ │ │ │ └── schedule_adam_60e.py
│ │ └── sdmgr
│ │ │ ├── README.md
│ │ │ ├── _base_sdmgr_novisual.py
│ │ │ ├── _base_sdmgr_unet16.py
│ │ │ ├── metafile.yml
│ │ │ ├── sdmgr_novisual_60e_wildreceipt-openset.py
│ │ │ ├── sdmgr_novisual_60e_wildreceipt.py
│ │ │ └── sdmgr_unet16_60e_wildreceipt.py
│ ├── textdet
│ │ ├── _base_
│ │ │ ├── datasets
│ │ │ │ ├── ctw1500.py
│ │ │ │ ├── icdar2013.py
│ │ │ │ ├── icdar2015.py
│ │ │ │ ├── icdar2017.py
│ │ │ │ ├── synthtext.py
│ │ │ │ ├── totaltext.py
│ │ │ │ └── toy_data.py
│ │ │ ├── default_runtime.py
│ │ │ ├── pretrain_runtime.py
│ │ │ └── schedules
│ │ │ │ ├── schedule_adam_600e.py
│ │ │ │ ├── schedule_sgd_100k.py
│ │ │ │ ├── schedule_sgd_1200e.py
│ │ │ │ └── schedule_sgd_base.py
│ │ ├── dbnet
│ │ │ ├── README.md
│ │ │ ├── _base_dbnet_resnet18_fpnc.py
│ │ │ ├── _base_dbnet_resnet50-dcnv2_fpnc.py
│ │ │ ├── dbnet_resnet18_fpnc_100k_synthtext.py
│ │ │ ├── dbnet_resnet18_fpnc_1200e_icdar2015.py
│ │ │ ├── dbnet_resnet18_fpnc_1200e_totaltext.py
│ │ │ ├── dbnet_resnet50-dcnv2_fpnc_100k_synthtext.py
│ │ │ ├── dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py
│ │ │ ├── dbnet_resnet50-oclip_1200e_icdar2015.py
│ │ │ ├── dbnet_resnet50_1200e_icdar2015.py
│ │ │ ├── dbnet_resnet50_120e_synth_train_ic15_test.py
│ │ │ ├── metafile.yml
│ │ │ ├── synth_data_train_100k_ic15_test.py
│ │ │ ├── synth_finetune_from_pretrain_ctw1500.py
│ │ │ └── synth_finetune_from_pretrain_ic15_test.py
│ │ ├── dbnetpp
│ │ │ ├── README.md
│ │ │ ├── _base_dbnetpp_resnet50-dcnv2_fpnc.py
│ │ │ ├── dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py
│ │ │ ├── dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
│ │ │ ├── dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py
│ │ │ ├── dbnetpp_resnet50_fpnc_1200e_icdar2015.py
│ │ │ └── metafile.yml
│ │ ├── drrg
│ │ │ ├── README.md
│ │ │ ├── _base_drrg_resnet50_fpn-unet.py
│ │ │ ├── drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py
│ │ │ ├── drrg_resnet50_fpn-unet_1200e_ctw1500.py
│ │ │ └── metafile.yml
│ │ ├── fcenet
│ │ │ ├── README.md
│ │ │ ├── _base_fcenet_resnet50-dcnv2_fpn.py
│ │ │ ├── _base_fcenet_resnet50_fpn.py
│ │ │ ├── fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
│ │ │ ├── fcenet_resnet50-oclip_fpn_1500e_ctw1500.py
│ │ │ ├── fcenet_resnet50-oclip_fpn_1500e_icdar2015.py
│ │ │ ├── fcenet_resnet50_fpn_1500e_icdar2015.py
│ │ │ ├── fcenet_resnet50_fpn_1500e_totaltext.py
│ │ │ ├── finetune.py
│ │ │ ├── metafile.yml
│ │ │ └── synth_data_train_real_data_test.py
│ │ ├── maskrcnn
│ │ │ ├── README.md
│ │ │ ├── _base_mask-rcnn_resnet50_fpn.py
│ │ │ ├── mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py
│ │ │ ├── mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py
│ │ │ ├── mask-rcnn_resnet50_fpn_160e_ctw1500.py
│ │ │ ├── mask-rcnn_resnet50_fpn_160e_icdar2015.py
│ │ │ ├── mask-rcnn_resnet50_fpn_160e_icdar2017.py
│ │ │ └── metafile.yml
│ │ ├── panet
│ │ │ ├── README.md
│ │ │ ├── _base_panet_resnet18_fpem-ffm.py
│ │ │ ├── _base_panet_resnet50_fpem-ffm.py
│ │ │ ├── metafile.yml
│ │ │ ├── panet_resnet18_fpem-ffm_600e_ctw1500.py
│ │ │ ├── panet_resnet18_fpem-ffm_600e_icdar2015.py
│ │ │ ├── panet_resnet50_fpem-ffm_600e_icdar2017.py
│ │ │ └── synth_data_train_real_data_test.py
│ │ ├── psenet
│ │ │ ├── README.md
│ │ │ ├── _base_psenet_resnet50_fpnf.py
│ │ │ ├── metafile.yml
│ │ │ ├── psenet_resnet50-oclip_fpnf_600e_ctw1500.py
│ │ │ ├── psenet_resnet50-oclip_fpnf_600e_icdar2015.py
│ │ │ ├── psenet_resnet50_fpnf_600e_ctw1500.py
│ │ │ ├── psenet_resnet50_fpnf_600e_icdar2015.py
│ │ │ ├── psenet_resnet50_fpnf_600e_icdar2017.py
│ │ │ └── psenet_resnet50_synth_train_ic15_test.py
│ │ └── textsnake
│ │ │ ├── README.md
│ │ │ ├── _base_textsnake_resnet50_fpn-unet.py
│ │ │ ├── metafile.yml
│ │ │ ├── textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py
│ │ │ └── textsnake_resnet50_fpn-unet_1200e_ctw1500.py
│ └── textrecog
│ │ ├── _base_
│ │ ├── datasets
│ │ │ ├── coco_text_v1.py
│ │ │ ├── cute80.py
│ │ │ ├── icdar2011.py
│ │ │ ├── icdar2013.py
│ │ │ ├── icdar2015.py
│ │ │ ├── iiit5k.py
│ │ │ ├── mjsynth.py
│ │ │ ├── svt.py
│ │ │ ├── svtp.py
│ │ │ ├── synthtext.py
│ │ │ ├── synthtext_add.py
│ │ │ ├── totaltext.py
│ │ │ └── toy_data.py
│ │ ├── default_runtime.py
│ │ └── schedules
│ │ │ ├── schedule_adadelta_5e.py
│ │ │ ├── schedule_adam_base.py
│ │ │ ├── schedule_adam_step_5e.py
│ │ │ └── schedule_adamw_cos_6e.py
│ │ ├── abinet
│ │ ├── README.md
│ │ ├── _base_abinet-vision.py
│ │ ├── _base_abinet.py
│ │ ├── abinet-vision_20e_st-an_mj.py
│ │ ├── abinet_20e_st-an_mj.py
│ │ └── metafile.yml
│ │ ├── aster
│ │ ├── README.md
│ │ ├── _base_aster.py
│ │ ├── aster_resnet45_6e_st_mj.py
│ │ └── metafile.yml
│ │ ├── crnn
│ │ ├── README.md
│ │ ├── _base_crnn_mini-vgg.py
│ │ ├── crnn_mini-vgg_5e_mj.py
│ │ ├── crnn_mini-vgg_5e_toy.py
│ │ └── metafile.yml
│ │ ├── master
│ │ ├── README.md
│ │ ├── _base_master_resnet31.py
│ │ ├── master_resnet31_12e_st_mj_sa.py
│ │ ├── master_resnet31_12e_toy.py
│ │ └── metafile.yml
│ │ ├── nrtr
│ │ ├── README.md
│ │ ├── _base_nrtr_modality-transform.py
│ │ ├── _base_nrtr_resnet31.py
│ │ ├── metafile.yml
│ │ ├── nrtr_modality-transform_6e_st_mj.py
│ │ ├── nrtr_modality-transform_6e_toy.py
│ │ ├── nrtr_resnet31-1by16-1by8_6e_st_mj.py
│ │ └── nrtr_resnet31-1by8-1by4_6e_st_mj.py
│ │ ├── robust_scanner
│ │ ├── README.md
│ │ ├── _base_robustscanner_resnet31.py
│ │ ├── metafile.yml
│ │ ├── robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
│ │ └── robustscanner_resnet31_5e_toy.py
│ │ ├── sar
│ │ ├── README.md
│ │ ├── _base_sar_resnet31_parallel-decoder.py
│ │ ├── metafile.yml
│ │ ├── sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
│ │ ├── sar_resnet31_parallel-decoder_5e_toy.py
│ │ └── sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py
│ │ ├── satrn
│ │ ├── README.md
│ │ ├── _base_satrn_shallow.py
│ │ ├── metafile.yml
│ │ ├── satrn_shallow-small_5e_st_mj.py
│ │ └── satrn_shallow_5e_st_mj.py
│ │ └── svtr
│ │ ├── README.md
│ │ ├── _base_svtr-tiny.py
│ │ ├── metafile.yml
│ │ ├── svtr-base_20e_st_mj.py
│ │ ├── svtr-large_20e_st_mj.py
│ │ ├── svtr-small_20e_st_mj.py
│ │ └── svtr-tiny_20e_st_mj.py
├── dataset_zoo
│ ├── cocotextv2
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── ctw1500
│ │ ├── metafile.yml
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── cute80
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ └── textrecog.py
│ ├── funsd
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── icdar2013
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── icdar2015
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── iiit5k
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ └── textrecog.py
│ ├── mjsynth
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ └── textrecog.py
│ ├── naf
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── sroie
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── svt
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── svtp
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ └── textrecog.py
│ ├── synthtext
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── textocr
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ ├── totaltext
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
│ └── wildreceipt
│ │ ├── kie.py
│ │ ├── metafile.yml
│ │ ├── sample_anno.md
│ │ ├── textdet.py
│ │ ├── textrecog.py
│ │ └── textspotting.py
├── demo
│ ├── demo_densetext_det.jpg
│ ├── demo_kie.jpeg
│ ├── demo_text_det.jpg
│ ├── demo_text_ocr.jpg
│ ├── demo_text_recog.jpg
│ └── resources
│ │ ├── demo_kie_pred.png
│ │ ├── det_vis.png
│ │ ├── kie_vis.png
│ │ ├── log_analysis_demo.png
│ │ └── rec_vis.png
├── dicts
│ ├── chinese_english_digits.txt
│ ├── english_digits_symbols.txt
│ ├── english_digits_symbols_space.txt
│ ├── lower_english_digits.txt
│ ├── lower_english_digits_space.txt
│ └── sdmgr_dict.txt
├── docker
│ ├── Dockerfile
│ └── serve
│ │ ├── Dockerfile
│ │ ├── config.properties
│ │ └── entrypoint.sh
├── docs
│ ├── en
│ │ ├── Makefile
│ │ ├── _static
│ │ │ ├── css
│ │ │ │ └── readthedocs.css
│ │ │ ├── images
│ │ │ │ └── mmocr.png
│ │ │ └── js
│ │ │ │ ├── collapsed.js
│ │ │ │ └── table.js
│ │ ├── _templates
│ │ │ └── classtemplate.rst
│ │ ├── api
│ │ │ ├── apis.rst
│ │ │ ├── datasets.rst
│ │ │ ├── engine.rst
│ │ │ ├── evaluation.rst
│ │ │ ├── models.rst
│ │ │ ├── structures.rst
│ │ │ ├── transforms.rst
│ │ │ ├── utils.rst
│ │ │ └── visualization.rst
│ │ ├── basic_concepts
│ │ │ ├── convention.md
│ │ │ ├── data_flow.md
│ │ │ ├── datasets.md
│ │ │ ├── engine.md
│ │ │ ├── evaluation.md
│ │ │ ├── models.md
│ │ │ ├── overview.md
│ │ │ ├── structures.md
│ │ │ ├── transforms.md
│ │ │ └── visualizers.md
│ │ ├── conf.py
│ │ ├── contact.md
│ │ ├── dataset_zoo.py
│ │ ├── docutils.conf
│ │ ├── get_started
│ │ │ ├── faq.md
│ │ │ ├── install.md
│ │ │ ├── overview.md
│ │ │ └── quick_run.md
│ │ ├── index.rst
│ │ ├── make.bat
│ │ ├── merge_docs.sh
│ │ ├── migration
│ │ │ ├── branches.md
│ │ │ ├── code.md
│ │ │ ├── dataset.md
│ │ │ ├── model.md
│ │ │ ├── news.md
│ │ │ ├── overview.md
│ │ │ └── transforms.md
│ │ ├── notes
│ │ │ ├── branches.md
│ │ │ ├── changelog.md
│ │ │ ├── changelog_v0.x.md
│ │ │ └── contribution_guide.md
│ │ ├── project_zoo.py
│ │ ├── requirements.txt
│ │ ├── stats.py
│ │ ├── switch_language.md
│ │ ├── user_guides
│ │ │ ├── config.md
│ │ │ ├── data_prepare
│ │ │ │ ├── dataset_preparer.md
│ │ │ │ ├── det.md
│ │ │ │ ├── kie.md
│ │ │ │ └── recog.md
│ │ │ ├── dataset_prepare.md
│ │ │ ├── inference.md
│ │ │ ├── train_test.md
│ │ │ ├── useful_tools.md
│ │ │ └── visualization.md
│ │ └── weight_list.py
│ └── zh_cn
│ │ ├── Makefile
│ │ ├── _static
│ │ ├── css
│ │ │ └── readthedocs.css
│ │ ├── images
│ │ │ └── mmocr.png
│ │ └── js
│ │ │ ├── collapsed.js
│ │ │ └── table.js
│ │ ├── _templates
│ │ └── classtemplate.rst
│ │ ├── api
│ │ ├── apis.rst
│ │ ├── datasets.rst
│ │ ├── engine.rst
│ │ ├── evaluation.rst
│ │ ├── models.rst
│ │ ├── structures.rst
│ │ ├── transforms.rst
│ │ ├── utils.rst
│ │ └── visualization.rst
│ │ ├── basic_concepts
│ │ ├── convention.md
│ │ ├── data_flow.md
│ │ ├── datasets.md
│ │ ├── engine.md
│ │ ├── evaluation.md
│ │ ├── models.md
│ │ ├── overview.md
│ │ ├── structures.md
│ │ ├── transforms.md
│ │ └── visualizers.md
│ │ ├── conf.py
│ │ ├── contact.md
│ │ ├── cp_origin_docs.sh
│ │ ├── dataset_zoo.py
│ │ ├── docutils.conf
│ │ ├── get_started
│ │ ├── install.md
│ │ ├── overview.md
│ │ └── quick_run.md
│ │ ├── index.rst
│ │ ├── make.bat
│ │ ├── merge_docs.sh
│ │ ├── migration
│ │ ├── branches.md
│ │ ├── code.md
│ │ ├── dataset.md
│ │ ├── model.md
│ │ ├── news.md
│ │ ├── overview.md
│ │ └── transforms.md
│ │ ├── notes
│ │ ├── branches.md
│ │ └── contribution_guide.md
│ │ ├── project_zoo.py
│ │ ├── stats.py
│ │ ├── switch_language.md
│ │ ├── user_guides
│ │ ├── config.md
│ │ ├── data_prepare
│ │ │ ├── dataset_preparer.md
│ │ │ └── kie.md
│ │ ├── dataset_prepare.md
│ │ ├── inference.md
│ │ ├── train_test.md
│ │ ├── useful_tools.md
│ │ └── visualization.md
│ │ └── weight_list.py
├── mmocr
│ ├── __init__.py
│ ├── apis
│ │ ├── __init__.py
│ │ └── inferencers
│ │ │ ├── __init__.py
│ │ │ ├── base_mmocr_inferencer.py
│ │ │ ├── kie_inferencer.py
│ │ │ ├── mmocr_inferencer.py
│ │ │ ├── textdet_inferencer.py
│ │ │ ├── textrec_inferencer.py
│ │ │ └── textspot_inferencer.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── dataset_wrapper.py
│ │ ├── icdar_dataset.py
│ │ ├── ocr_dataset.py
│ │ ├── preparers
│ │ │ ├── __init__.py
│ │ │ ├── config_generators
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── textdet_config_generator.py
│ │ │ │ ├── textrecog_config_generator.py
│ │ │ │ └── textspotting_config_generator.py
│ │ │ ├── data_preparer.py
│ │ │ ├── dumpers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── json_dumper.py
│ │ │ │ ├── lmdb_dumper.py
│ │ │ │ └── wild_receipt_openset_dumper.py
│ │ │ ├── gatherers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── mono_gatherer.py
│ │ │ │ ├── naf_gatherer.py
│ │ │ │ └── pair_gatherer.py
│ │ │ ├── obtainers
│ │ │ │ ├── __init__.py
│ │ │ │ └── naive_data_obtainer.py
│ │ │ ├── packers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── textdet_packer.py
│ │ │ │ ├── textrecog_packer.py
│ │ │ │ ├── textspotting_packer.py
│ │ │ │ └── wildreceipt_packer.py
│ │ │ └── parsers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── coco_parser.py
│ │ │ │ ├── ctw1500_parser.py
│ │ │ │ ├── funsd_parser.py
│ │ │ │ ├── icdar_txt_parser.py
│ │ │ │ ├── mjsynth_parser.py
│ │ │ │ ├── naf_parser.py
│ │ │ │ ├── sroie_parser.py
│ │ │ │ ├── svt_parser.py
│ │ │ │ ├── synthtext_parser.py
│ │ │ │ ├── totaltext_parser.py
│ │ │ │ └── wildreceipt_parser.py
│ │ ├── recog_lmdb_dataset.py
│ │ ├── recog_text_dataset.py
│ │ ├── samplers
│ │ │ ├── __init__.py
│ │ │ └── batch_aug.py
│ │ ├── transforms
│ │ │ ├── __init__.py
│ │ │ ├── adapters.py
│ │ │ ├── formatting.py
│ │ │ ├── loading.py
│ │ │ ├── ocr_transforms.py
│ │ │ ├── textdet_transforms.py
│ │ │ ├── textrecog_transforms.py
│ │ │ └── wrappers.py
│ │ └── wildreceipt_dataset.py
│ ├── engine
│ │ ├── __init__.py
│ │ └── hooks
│ │ │ ├── __init__.py
│ │ │ └── visualization_hook.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── evaluator
│ │ │ ├── __init__.py
│ │ │ └── multi_datasets_evaluator.py
│ │ ├── functional
│ │ │ ├── __init__.py
│ │ │ └── hmean.py
│ │ └── metrics
│ │ │ ├── __init__.py
│ │ │ ├── f_metric.py
│ │ │ ├── hmean_iou_metric.py
│ │ │ └── recog_metric.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── backbones
│ │ │ │ ├── __init__.py
│ │ │ │ ├── clip_resnet.py
│ │ │ │ └── unet.py
│ │ │ ├── dictionary
│ │ │ │ ├── __init__.py
│ │ │ │ └── dictionary.py
│ │ │ ├── layers
│ │ │ │ ├── __init__.py
│ │ │ │ └── transformer_layers.py
│ │ │ ├── losses
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bce_loss.py
│ │ │ │ ├── ce_loss.py
│ │ │ │ ├── dice_loss.py
│ │ │ │ └── l1_loss.py
│ │ │ ├── modules
│ │ │ │ ├── __init__.py
│ │ │ │ └── transformer_module.py
│ │ │ └── plugins
│ │ │ │ ├── __init__.py
│ │ │ │ └── common.py
│ │ ├── kie
│ │ │ ├── __init__.py
│ │ │ ├── extractors
│ │ │ │ ├── __init__.py
│ │ │ │ └── sdmgr.py
│ │ │ ├── heads
│ │ │ │ ├── __init__.py
│ │ │ │ └── sdmgr_head.py
│ │ │ ├── module_losses
│ │ │ │ ├── __init__.py
│ │ │ │ └── sdmgr_module_loss.py
│ │ │ └── postprocessors
│ │ │ │ ├── __init__.py
│ │ │ │ └── sdmgr_postprocessor.py
│ │ ├── textdet
│ │ │ ├── __init__.py
│ │ │ ├── data_preprocessors
│ │ │ │ ├── __init__.py
│ │ │ │ └── data_preprocessor.py
│ │ │ ├── detectors
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── dbnet.py
│ │ │ │ ├── drrg.py
│ │ │ │ ├── fcenet.py
│ │ │ │ ├── mmdet_wrapper.py
│ │ │ │ ├── panet.py
│ │ │ │ ├── psenet.py
│ │ │ │ ├── single_stage_text_detector.py
│ │ │ │ └── textsnake.py
│ │ │ ├── heads
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── db_head.py
│ │ │ │ ├── drrg_head.py
│ │ │ │ ├── fce_head.py
│ │ │ │ ├── pan_head.py
│ │ │ │ ├── pse_head.py
│ │ │ │ └── textsnake_head.py
│ │ │ ├── module_losses
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── db_module_loss.py
│ │ │ │ ├── drrg_module_loss.py
│ │ │ │ ├── fce_module_loss.py
│ │ │ │ ├── pan_module_loss.py
│ │ │ │ ├── pse_module_loss.py
│ │ │ │ ├── seg_based_module_loss.py
│ │ │ │ └── textsnake_module_loss.py
│ │ │ ├── necks
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fpem_ffm.py
│ │ │ │ ├── fpn_cat.py
│ │ │ │ ├── fpn_unet.py
│ │ │ │ └── fpnf.py
│ │ │ └── postprocessors
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── db_postprocessor.py
│ │ │ │ ├── drrg_postprocessor.py
│ │ │ │ ├── fce_postprocessor.py
│ │ │ │ ├── pan_postprocessor.py
│ │ │ │ ├── pse_postprocessor.py
│ │ │ │ └── textsnake_postprocessor.py
│ │ └── textrecog
│ │ │ ├── __init__.py
│ │ │ ├── backbones
│ │ │ ├── __init__.py
│ │ │ ├── mini_vgg.py
│ │ │ ├── mobilenet_v2.py
│ │ │ ├── nrtr_modality_transformer.py
│ │ │ ├── resnet.py
│ │ │ ├── resnet31_ocr.py
│ │ │ ├── resnet_abi.py
│ │ │ └── shallow_cnn.py
│ │ │ ├── data_preprocessors
│ │ │ ├── __init__.py
│ │ │ └── data_preprocessor.py
│ │ │ ├── decoders
│ │ │ ├── __init__.py
│ │ │ ├── abi_fuser.py
│ │ │ ├── abi_language_decoder.py
│ │ │ ├── abi_vision_decoder.py
│ │ │ ├── aster_decoder.py
│ │ │ ├── base.py
│ │ │ ├── crnn_decoder.py
│ │ │ ├── master_decoder.py
│ │ │ ├── nrtr_decoder.py
│ │ │ ├── position_attention_decoder.py
│ │ │ ├── robust_scanner_fuser.py
│ │ │ ├── sar_decoder.py
│ │ │ ├── sar_decoder_with_bs.py
│ │ │ ├── sequence_attention_decoder.py
│ │ │ └── svtr_decoder.py
│ │ │ ├── encoders
│ │ │ ├── __init__.py
│ │ │ ├── abi_encoder.py
│ │ │ ├── aster_encoder.py
│ │ │ ├── base.py
│ │ │ ├── channel_reduction_encoder.py
│ │ │ ├── nrtr_encoder.py
│ │ │ ├── sar_encoder.py
│ │ │ ├── satrn_encoder.py
│ │ │ └── svtr_encoder.py
│ │ │ ├── layers
│ │ │ ├── __init__.py
│ │ │ ├── conv_layer.py
│ │ │ ├── dot_product_attention_layer.py
│ │ │ ├── lstm_layer.py
│ │ │ ├── position_aware_layer.py
│ │ │ ├── robust_scanner_fusion_layer.py
│ │ │ └── satrn_layers.py
│ │ │ ├── module_losses
│ │ │ ├── __init__.py
│ │ │ ├── abi_module_loss.py
│ │ │ ├── base.py
│ │ │ ├── ce_module_loss.py
│ │ │ └── ctc_module_loss.py
│ │ │ ├── plugins
│ │ │ ├── __init__.py
│ │ │ └── common.py
│ │ │ ├── postprocessors
│ │ │ ├── __init__.py
│ │ │ ├── attn_postprocessor.py
│ │ │ ├── base.py
│ │ │ └── ctc_postprocessor.py
│ │ │ ├── preprocessors
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ └── tps_preprocessor.py
│ │ │ └── recognizers
│ │ │ ├── __init__.py
│ │ │ ├── abinet.py
│ │ │ ├── aster.py
│ │ │ ├── base.py
│ │ │ ├── crnn.py
│ │ │ ├── encoder_decoder_recognizer.py
│ │ │ ├── encoder_decoder_recognizer_tta.py
│ │ │ ├── master.py
│ │ │ ├── nrtr.py
│ │ │ ├── robust_scanner.py
│ │ │ ├── sar.py
│ │ │ ├── satrn.py
│ │ │ └── svtr.py
│ ├── registry.py
│ ├── structures
│ │ ├── __init__.py
│ │ ├── kie_data_sample.py
│ │ ├── textdet_data_sample.py
│ │ ├── textrecog_data_sample.py
│ │ └── textspotting_data_sample.py
│ ├── testing
│ │ ├── __init__.py
│ │ └── data.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── bbox_utils.py
│ │ ├── bezier_utils.py
│ │ ├── check_argument.py
│ │ ├── collect_env.py
│ │ ├── data_converter_utils.py
│ │ ├── fileio.py
│ │ ├── img_utils.py
│ │ ├── mask_utils.py
│ │ ├── parsers.py
│ │ ├── point_utils.py
│ │ ├── polygon_utils.py
│ │ ├── processing.py
│ │ ├── setup_env.py
│ │ ├── string_utils.py
│ │ ├── transform_utils.py
│ │ └── typing_utils.py
│ ├── version.py
│ └── visualization
│ │ ├── __init__.py
│ │ ├── base_visualizer.py
│ │ ├── kie_visualizer.py
│ │ ├── textdet_visualizer.py
│ │ ├── textrecog_visualizer.py
│ │ └── textspotting_visualizer.py
├── model-index.yml
├── my_test.sh
├── my_train.sh
├── projects
│ ├── ABCNet
│ │ ├── README.md
│ │ ├── README_V2.md
│ │ ├── abcnet
│ │ │ ├── __init__.py
│ │ │ ├── metric
│ │ │ │ ├── __init__.py
│ │ │ │ └── e2e_hmean_iou_metric.py
│ │ │ ├── model
│ │ │ │ ├── __init__.py
│ │ │ │ ├── abcnet.py
│ │ │ │ ├── abcnet_det_head.py
│ │ │ │ ├── abcnet_det_module_loss.py
│ │ │ │ ├── abcnet_det_postprocessor.py
│ │ │ │ ├── abcnet_postprocessor.py
│ │ │ │ ├── abcnet_rec.py
│ │ │ │ ├── abcnet_rec_backbone.py
│ │ │ │ ├── abcnet_rec_decoder.py
│ │ │ │ ├── abcnet_rec_encoder.py
│ │ │ │ ├── base_roi_extractor.py
│ │ │ │ ├── base_roi_head.py
│ │ │ │ ├── bezier_roi_extractor.py
│ │ │ │ ├── bifpn.py
│ │ │ │ ├── coordinate_head.py
│ │ │ │ ├── rec_roi_head.py
│ │ │ │ └── two_stage_text_spotting.py
│ │ │ └── utils
│ │ │ │ ├── __init__.py
│ │ │ │ └── bezier_utils.py
│ │ ├── config
│ │ │ ├── _base_
│ │ │ │ ├── datasets
│ │ │ │ │ └── icdar2015.py
│ │ │ │ ├── default_runtime.py
│ │ │ │ └── schedules
│ │ │ │ │ └── schedule_sgd_500e.py
│ │ │ ├── abcnet
│ │ │ │ ├── _base_abcnet_resnet50_fpn.py
│ │ │ │ └── abcnet_resnet50_fpn_500e_icdar2015.py
│ │ │ └── abcnet_v2
│ │ │ │ ├── _base_abcnet-v2_resnet50_bifpn.py
│ │ │ │ └── abcnet-v2_resnet50_bifpn_500e_icdar2015.py
│ │ └── dicts
│ │ │ └── abcnet.txt
│ ├── README.md
│ ├── SPTS
│ │ ├── README.md
│ │ ├── config
│ │ │ ├── _base_
│ │ │ │ ├── datasets
│ │ │ │ │ ├── ctw1500-spts.py
│ │ │ │ │ ├── icdar2013-spts.py
│ │ │ │ │ ├── icdar2013.py
│ │ │ │ │ ├── icdar2015-spts.py
│ │ │ │ │ ├── icdar2015.py
│ │ │ │ │ ├── mlt-spts.py
│ │ │ │ │ ├── syntext1-spts.py
│ │ │ │ │ ├── syntext2-spts.py
│ │ │ │ │ ├── totaltext-spts.py
│ │ │ │ │ └── totaltext.py
│ │ │ │ └── default_runtime.py
│ │ │ └── spts
│ │ │ │ ├── _base_spts_resnet50.py
│ │ │ │ ├── _base_spts_resnet50_mmocr.py
│ │ │ │ ├── spts_resnet50_8xb8-150e_pretrain-spts.py
│ │ │ │ ├── spts_resnet50_8xb8-200e_icdar2013.py
│ │ │ │ ├── spts_resnet50_8xb8-200e_icdar2015.py
│ │ │ │ └── spts_resnet50_8xb8-200e_totaltext.py
│ │ ├── dicts
│ │ │ └── spts.txt
│ │ ├── spts
│ │ │ ├── __init__.py
│ │ │ ├── datasets
│ │ │ │ ├── __init__.py
│ │ │ │ ├── adel_dataset.py
│ │ │ │ └── transforms
│ │ │ │ │ └── spts_transforms.py
│ │ │ ├── metric
│ │ │ │ ├── __init__.py
│ │ │ │ └── e2e_point_metric.py
│ │ │ └── model
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_text_spotter.py
│ │ │ │ ├── encoder_decoder_text_spotter.py
│ │ │ │ ├── position_embedding.py
│ │ │ │ ├── spts.py
│ │ │ │ ├── spts_decoder.py
│ │ │ │ ├── spts_dictionary.py
│ │ │ │ ├── spts_encoder.py
│ │ │ │ ├── spts_module_loss.py
│ │ │ │ └── spts_postprocessor.py
│ │ └── tools
│ │ │ └── ckpt_adapter.py
│ ├── example_project
│ │ ├── README.md
│ │ ├── configs
│ │ │ └── dbnet_dummy-resnet_fpnc_1200e_icdar2015.py
│ │ └── dummy
│ │ │ ├── __init__.py
│ │ │ └── dummy_resnet.py
│ ├── faq.md
│ └── selected.txt
├── requirements.txt
├── requirements
│ ├── albu.txt
│ ├── build.txt
│ ├── docs.txt
│ ├── mminstall.txt
│ ├── optional.txt
│ ├── readthedocs.txt
│ ├── runtime.txt
│ └── tests.txt
├── resources
│ ├── illustration.jpg
│ ├── kie.jpg
│ ├── mmocr-logo.png
│ ├── textdet.jpg
│ ├── textrecog.jpg
│ └── verification.png
├── setup.cfg
├── setup.py
├── tests
│ ├── models
│ │ └── textrecog
│ │ │ └── test_preprocessors
│ │ │ └── test_tps_preprocessor.py
│ ├── test_apis
│ │ └── test_inferencers
│ │ │ ├── test_kie_inferencer.py
│ │ │ ├── test_mmocr_inferencer.py
│ │ │ ├── test_textdet_inferencer.py
│ │ │ └── test_textrec_inferencer.py
│ ├── test_datasets
│ │ ├── test_dataset_wrapper.py
│ │ ├── test_icdar_dataset.py
│ │ ├── test_preparers
│ │ │ ├── test_config_generators
│ │ │ │ ├── test_textdet_config_generator.py
│ │ │ │ ├── test_textrecog_config_generator.py
│ │ │ │ └── test_textspotting_config_generator.py
│ │ │ ├── test_data_preparer.py
│ │ │ ├── test_dumpers
│ │ │ │ └── test_dumpers.py
│ │ │ ├── test_gatherers
│ │ │ │ ├── test_mono_gatherer.py
│ │ │ │ └── test_pair_gatherer.py
│ │ │ ├── test_packers
│ │ │ │ ├── test_textdet_packer.py
│ │ │ │ ├── test_textrecog_packer.py
│ │ │ │ └── test_textspotting_packer.py
│ │ │ └── test_parsers
│ │ │ │ ├── test_ctw1500_parser.py
│ │ │ │ ├── test_funsd_parser.py
│ │ │ │ ├── test_icdar_txt_parsers.py
│ │ │ │ ├── test_naf_parser.py
│ │ │ │ ├── test_sroie_parser.py
│ │ │ │ ├── test_svt_parsers.py
│ │ │ │ ├── test_tt_parsers.py
│ │ │ │ └── test_wildreceipt_parsers.py
│ │ ├── test_recog_lmdb_dataset.py
│ │ ├── test_recog_text_dataset.py
│ │ ├── test_samplers
│ │ │ └── test_batch_aug.py
│ │ ├── test_transforms
│ │ │ ├── test_adapters.py
│ │ │ ├── test_formatting.py
│ │ │ ├── test_loading.py
│ │ │ ├── test_ocr_transforms.py
│ │ │ ├── test_textdet_transforms.py
│ │ │ ├── test_textrecog_transforms.py
│ │ │ └── test_wrappers.py
│ │ └── test_wildreceipt_dataset.py
│ ├── test_engine
│ │ └── test_hooks
│ │ │ └── test_visualization_hook.py
│ ├── test_evaluation
│ │ ├── test_evaluator
│ │ │ └── test_multi_datasets_evaluator.py
│ │ ├── test_functional
│ │ │ └── test_hmean.py
│ │ └── test_metrics
│ │ │ ├── test_f_metric.py
│ │ │ ├── test_hmean_iou_metric.py
│ │ │ └── test_recog_metric.py
│ ├── test_init.py
│ ├── test_models
│ │ ├── test_common
│ │ │ ├── test_backbones
│ │ │ │ └── test_clip_resnet.py
│ │ │ ├── test_layers
│ │ │ │ └── test_transformer_layers.py
│ │ │ ├── test_losses
│ │ │ │ ├── test_bce_loss.py
│ │ │ │ ├── test_dice_loss.py
│ │ │ │ └── test_l1_loss.py
│ │ │ ├── test_modules
│ │ │ │ └── test_transformer_module.py
│ │ │ └── test_plugins
│ │ │ │ └── test_avgpool.py
│ │ ├── test_kie
│ │ │ ├── test_extractors
│ │ │ │ └── test_sdmgr.py
│ │ │ ├── test_heads
│ │ │ │ └── test_sdmgr_head.py
│ │ │ ├── test_module_losses
│ │ │ │ └── test_sdmgr_module_loss.py
│ │ │ └── test_postprocessors
│ │ │ │ └── test_sdmgr_postprocessor.py
│ │ ├── test_textdet
│ │ │ ├── test_data_preprocessors
│ │ │ │ └── test_textdet_data_preprocessor.py
│ │ │ ├── test_detectors
│ │ │ │ └── test_drrg.py
│ │ │ ├── test_heads
│ │ │ │ ├── test_base_head.py
│ │ │ │ ├── test_db_head.py
│ │ │ │ ├── test_drrg_head.py
│ │ │ │ ├── test_fce_head.py
│ │ │ │ ├── test_pan_head.py
│ │ │ │ ├── test_pse_head.py
│ │ │ │ └── test_textsnake_head.py
│ │ │ ├── test_module_losses
│ │ │ │ ├── test_db_module_loss.py
│ │ │ │ ├── test_drrg_module_loss.py
│ │ │ │ ├── test_fce_module_loss.py
│ │ │ │ ├── test_pan_module_loss.py
│ │ │ │ ├── test_pse_module_loss.py
│ │ │ │ └── test_textsnake_module_loss.py
│ │ │ ├── test_necks
│ │ │ │ ├── test_fpem_ffm.py
│ │ │ │ ├── test_fpn_cat.py
│ │ │ │ ├── test_fpn_unet.py
│ │ │ │ └── test_fpnf.py
│ │ │ ├── test_postprocessors
│ │ │ │ ├── test_base_postprocessor.py
│ │ │ │ ├── test_db_postprocessor.py
│ │ │ │ ├── test_drrg_postprocessor.py
│ │ │ │ ├── test_fce_postprocessor.py
│ │ │ │ ├── test_pan_postprocessor.py
│ │ │ │ ├── test_pse_postprocessor.py
│ │ │ │ └── test_textsnake_postprocessor.py
│ │ │ └── test_wrappers
│ │ │ │ └── test_mmdet_wrapper.py
│ │ └── test_textrecog
│ │ │ ├── test_backbones
│ │ │ ├── test_mini_vgg.py
│ │ │ ├── test_mobilenet_v2.py
│ │ │ ├── test_nrtr_modality_transformer.py
│ │ │ ├── test_resnet.py
│ │ │ ├── test_resnet31_ocr.py
│ │ │ ├── test_resnet_abi.py
│ │ │ └── test_shallow_cnn.py
│ │ │ ├── test_data_preprocessors
│ │ │ └── test_data_preprocessor.py
│ │ │ ├── test_decoders
│ │ │ ├── test_abi_fuser.py
│ │ │ ├── test_abi_language_decoder.py
│ │ │ ├── test_abi_vision_decoder.py
│ │ │ ├── test_aster_decoder.py
│ │ │ ├── test_base_decoder.py
│ │ │ ├── test_crnn_decoder.py
│ │ │ ├── test_master_decoder.py
│ │ │ ├── test_nrtr_decoder.py
│ │ │ ├── test_position_attention_decoder.py
│ │ │ ├── test_robust_scanner_fuser.py
│ │ │ ├── test_sar_decoder.py
│ │ │ ├── test_sequence_attention_decoder.py
│ │ │ └── test_svtr_decoder.py
│ │ │ ├── test_dictionary
│ │ │ └── test_dictionary.py
│ │ │ ├── test_encoders
│ │ │ ├── test_abi_encoder.py
│ │ │ ├── test_aster_encoder.py
│ │ │ ├── test_channel_reduction_encoder.py
│ │ │ ├── test_nrtr_encoder.py
│ │ │ ├── test_sar_encoder.py
│ │ │ ├── test_satrn_decoder.py
│ │ │ └── test_svtr_encoder.py
│ │ │ ├── test_layers
│ │ │ └── test_conv_layer.py
│ │ │ ├── test_module_losses
│ │ │ ├── test_abi_module_loss.py
│ │ │ ├── test_base_recog_module_loss.py
│ │ │ ├── test_ce_module_loss.py
│ │ │ └── test_ctc_module_loss.py
│ │ │ ├── test_plugins
│ │ │ ├── test_gcamodule.py
│ │ │ └── test_maxpool.py
│ │ │ ├── test_postprocessors
│ │ │ ├── test_attn_postprocessor.py
│ │ │ ├── test_base_textrecog_postprocessor.py
│ │ │ └── test_ctc_postprocessor.py
│ │ │ └── test_recognizers
│ │ │ ├── test_encoder_decoder_recognizer.py
│ │ │ └── test_encoder_decoder_recognizer_tta.py
│ ├── test_structures
│ │ ├── test_kie_data_sample.py
│ │ ├── test_textdet_data_sample.py
│ │ ├── test_textrecog_data_sample.py
│ │ └── test_textspotting_data_sample.py
│ ├── test_utils
│ │ ├── test_bbox_utils.py
│ │ ├── test_check_argument.py
│ │ ├── test_data_converter_utils.py
│ │ ├── test_fileio.py
│ │ ├── test_img_utils.py
│ │ ├── test_mask_utils.py
│ │ ├── test_parsers.py
│ │ ├── test_point_utils.py
│ │ ├── test_polygon_utils.py
│ │ ├── test_processing.py
│ │ ├── test_string_utils.py
│ │ └── test_transform_utils.py
│ └── test_visualization
│ │ ├── test_base_visualizer.py
│ │ ├── test_kie_visualizer.py
│ │ ├── test_textdet_visualizer.py
│ │ ├── test_textrecog_visualizer.py
│ │ └── test_textspotting_visualizer.py
└── tools
│ ├── analysis_tools
│ ├── browse_dataset.py
│ ├── get_flops.py
│ ├── offline_eval.py
│ └── print_config.py
│ ├── dataset_converters
│ ├── common
│ │ ├── curvedsyntext_converter.py
│ │ └── extract_kaist.py
│ ├── kie
│ │ └── closeset_to_openset.py
│ ├── prepare_dataset.py
│ ├── textdet
│ │ ├── art_converter.py
│ │ ├── bid_converter.py
│ │ ├── coco_to_line_dict.py
│ │ ├── cocotext_converter.py
│ │ ├── data_migrator.py
│ │ ├── detext_converter.py
│ │ ├── funsd_converter.py
│ │ ├── hiertext_converter.py
│ │ ├── ic11_converter.py
│ │ ├── ilst_converter.py
│ │ ├── imgur_converter.py
│ │ ├── kaist_converter.py
│ │ ├── lsvt_converter.py
│ │ ├── lv_converter.py
│ │ ├── mtwi_converter.py
│ │ ├── naf_converter.py
│ │ ├── rctw_converter.py
│ │ ├── rects_converter.py
│ │ ├── sroie_converter.py
│ │ └── vintext_converter.py
│ └── textrecog
│ │ ├── art_converter.py
│ │ ├── bid_converter.py
│ │ ├── cocotext_converter.py
│ │ ├── data_migrator.py
│ │ ├── detext_converter.py
│ │ ├── funsd_converter.py
│ │ ├── hiertext_converter.py
│ │ ├── ic11_converter.py
│ │ ├── ilst_converter.py
│ │ ├── imgur_converter.py
│ │ ├── kaist_converter.py
│ │ ├── lmdb_converter.py
│ │ ├── lsvt_converter.py
│ │ ├── lv_converter.py
│ │ ├── mtwi_converter.py
│ │ ├── naf_converter.py
│ │ ├── openvino_converter.py
│ │ ├── rctw_converter.py
│ │ ├── rects_converter.py
│ │ ├── sroie_converter.py
│ │ └── vintext_converter.py
│ ├── dist_test.sh
│ ├── dist_train.sh
│ ├── infer.py
│ ├── model_converters
│ └── publish_model.py
│ ├── slurm_test.sh
│ ├── slurm_train.sh
│ ├── test.py
│ └── train.py
└── textfussion
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── PHILOSOPHY.md
├── README.md
├── _typos.toml
├── build
└── lib
│ └── diffusers
│ ├── __init__.py
│ ├── commands
│ ├── __init__.py
│ ├── diffusers_cli.py
│ └── env.py
│ ├── configuration_utils.py
│ ├── dependency_versions_check.py
│ ├── dependency_versions_table.py
│ ├── experimental
│ ├── __init__.py
│ └── rl
│ │ ├── __init__.py
│ │ └── value_guided_sampling.py
│ ├── image_processor.py
│ ├── loaders.py
│ ├── models
│ ├── __init__.py
│ ├── attention.py
│ ├── attention_flax.py
│ ├── attention_processor.py
│ ├── autoencoder_kl.py
│ ├── controlnet.py
│ ├── controlnet_flax.py
│ ├── cross_attention.py
│ ├── dual_transformer_2d.py
│ ├── embeddings.py
│ ├── embeddings_flax.py
│ ├── modeling_flax_pytorch_utils.py
│ ├── modeling_flax_utils.py
│ ├── modeling_pytorch_flax_utils.py
│ ├── modeling_utils.py
│ ├── prior_transformer.py
│ ├── resnet.py
│ ├── resnet_flax.py
│ ├── t5_film_transformer.py
│ ├── transformer_2d.py
│ ├── transformer_temporal.py
│ ├── unet_1d.py
│ ├── unet_1d_blocks.py
│ ├── unet_2d.py
│ ├── unet_2d_blocks.py
│ ├── unet_2d_blocks_flax.py
│ ├── unet_2d_condition.py
│ ├── unet_2d_condition_flax.py
│ ├── unet_3d_blocks.py
│ ├── unet_3d_condition.py
│ ├── vae.py
│ ├── vae_flax.py
│ └── vq_model.py
│ ├── optimization.py
│ ├── pipeline_utils.py
│ ├── pipelines
│ ├── __init__.py
│ ├── alt_diffusion
│ │ ├── __init__.py
│ │ ├── modeling_roberta_series.py
│ │ ├── pipeline_alt_diffusion.py
│ │ └── pipeline_alt_diffusion_img2img.py
│ ├── audio_diffusion
│ │ ├── __init__.py
│ │ ├── mel.py
│ │ └── pipeline_audio_diffusion.py
│ ├── audioldm
│ │ ├── __init__.py
│ │ └── pipeline_audioldm.py
│ ├── dance_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_dance_diffusion.py
│ ├── ddim
│ │ ├── __init__.py
│ │ └── pipeline_ddim.py
│ ├── ddpm
│ │ ├── __init__.py
│ │ └── pipeline_ddpm.py
│ ├── dit
│ │ ├── __init__.py
│ │ └── pipeline_dit.py
│ ├── latent_diffusion
│ │ ├── __init__.py
│ │ ├── pipeline_latent_diffusion.py
│ │ └── pipeline_latent_diffusion_superresolution.py
│ ├── latent_diffusion_uncond
│ │ ├── __init__.py
│ │ └── pipeline_latent_diffusion_uncond.py
│ ├── onnx_utils.py
│ ├── paint_by_example
│ │ ├── __init__.py
│ │ ├── image_encoder.py
│ │ └── pipeline_paint_by_example.py
│ ├── pipeline_flax_utils.py
│ ├── pipeline_utils.py
│ ├── pndm
│ │ ├── __init__.py
│ │ └── pipeline_pndm.py
│ ├── repaint
│ │ ├── __init__.py
│ │ └── pipeline_repaint.py
│ ├── score_sde_ve
│ │ ├── __init__.py
│ │ └── pipeline_score_sde_ve.py
│ ├── semantic_stable_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_semantic_stable_diffusion.py
│ ├── spectrogram_diffusion
│ │ ├── __init__.py
│ │ ├── continous_encoder.py
│ │ ├── midi_utils.py
│ │ ├── notes_encoder.py
│ │ └── pipeline_spectrogram_diffusion.py
│ ├── stable_diffusion
│ │ ├── __init__.py
│ │ ├── convert_from_ckpt.py
│ │ ├── pipeline_cycle_diffusion.py
│ │ ├── pipeline_flax_stable_diffusion.py
│ │ ├── pipeline_flax_stable_diffusion_controlnet.py
│ │ ├── pipeline_flax_stable_diffusion_img2img.py
│ │ ├── pipeline_flax_stable_diffusion_inpaint.py
│ │ ├── pipeline_onnx_stable_diffusion.py
│ │ ├── pipeline_onnx_stable_diffusion_img2img.py
│ │ ├── pipeline_onnx_stable_diffusion_inpaint.py
│ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py
│ │ ├── pipeline_onnx_stable_diffusion_upscale.py
│ │ ├── pipeline_stable_diffusion.py
│ │ ├── pipeline_stable_diffusion_attend_and_excite.py
│ │ ├── pipeline_stable_diffusion_controlnet.py
│ │ ├── pipeline_stable_diffusion_depth2img.py
│ │ ├── pipeline_stable_diffusion_image_variation.py
│ │ ├── pipeline_stable_diffusion_img2img.py
│ │ ├── pipeline_stable_diffusion_inpaint.py
│ │ ├── pipeline_stable_diffusion_inpaint_legacy.py
│ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py
│ │ ├── pipeline_stable_diffusion_k_diffusion.py
│ │ ├── pipeline_stable_diffusion_latent_upscale.py
│ │ ├── pipeline_stable_diffusion_model_editing.py
│ │ ├── pipeline_stable_diffusion_panorama.py
│ │ ├── pipeline_stable_diffusion_pix2pix_zero.py
│ │ ├── pipeline_stable_diffusion_sag.py
│ │ ├── pipeline_stable_diffusion_upscale.py
│ │ ├── pipeline_stable_unclip.py
│ │ ├── pipeline_stable_unclip_img2img.py
│ │ ├── safety_checker.py
│ │ ├── safety_checker_flax.py
│ │ └── stable_unclip_image_normalizer.py
│ ├── stable_diffusion_safe
│ │ ├── __init__.py
│ │ ├── pipeline_stable_diffusion_safe.py
│ │ └── safety_checker.py
│ ├── stochastic_karras_ve
│ │ ├── __init__.py
│ │ └── pipeline_stochastic_karras_ve.py
│ ├── text_to_video_synthesis
│ │ ├── __init__.py
│ │ └── pipeline_text_to_video_synth.py
│ ├── unclip
│ │ ├── __init__.py
│ │ ├── pipeline_unclip.py
│ │ ├── pipeline_unclip_image_variation.py
│ │ └── text_proj.py
│ ├── versatile_diffusion
│ │ ├── __init__.py
│ │ ├── modeling_text_unet.py
│ │ ├── pipeline_versatile_diffusion.py
│ │ ├── pipeline_versatile_diffusion_dual_guided.py
│ │ ├── pipeline_versatile_diffusion_image_variation.py
│ │ └── pipeline_versatile_diffusion_text_to_image.py
│ └── vq_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_vq_diffusion.py
│ ├── schedulers
│ ├── __init__.py
│ ├── scheduling_ddim.py
│ ├── scheduling_ddim_flax.py
│ ├── scheduling_ddim_inverse.py
│ ├── scheduling_ddpm.py
│ ├── scheduling_ddpm_flax.py
│ ├── scheduling_deis_multistep.py
│ ├── scheduling_dpmsolver_multistep.py
│ ├── scheduling_dpmsolver_multistep_flax.py
│ ├── scheduling_dpmsolver_singlestep.py
│ ├── scheduling_euler_ancestral_discrete.py
│ ├── scheduling_euler_discrete.py
│ ├── scheduling_heun_discrete.py
│ ├── scheduling_ipndm.py
│ ├── scheduling_k_dpm_2_ancestral_discrete.py
│ ├── scheduling_k_dpm_2_discrete.py
│ ├── scheduling_karras_ve.py
│ ├── scheduling_karras_ve_flax.py
│ ├── scheduling_lms_discrete.py
│ ├── scheduling_lms_discrete_flax.py
│ ├── scheduling_pndm.py
│ ├── scheduling_pndm_flax.py
│ ├── scheduling_repaint.py
│ ├── scheduling_sde_ve.py
│ ├── scheduling_sde_ve_flax.py
│ ├── scheduling_sde_vp.py
│ ├── scheduling_unclip.py
│ ├── scheduling_unipc_multistep.py
│ ├── scheduling_utils.py
│ ├── scheduling_utils_flax.py
│ └── scheduling_vq_diffusion.py
│ ├── training_utils.py
│ └── utils
│ ├── __init__.py
│ ├── accelerate_utils.py
│ ├── constants.py
│ ├── deprecation_utils.py
│ ├── doc_utils.py
│ ├── dummy_flax_and_transformers_objects.py
│ ├── dummy_flax_objects.py
│ ├── dummy_note_seq_objects.py
│ ├── dummy_onnx_objects.py
│ ├── dummy_pt_objects.py
│ ├── dummy_torch_and_librosa_objects.py
│ ├── dummy_torch_and_scipy_objects.py
│ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py
│ ├── dummy_torch_and_transformers_and_onnx_objects.py
│ ├── dummy_torch_and_transformers_objects.py
│ ├── dummy_transformers_and_torch_and_note_seq_objects.py
│ ├── dynamic_modules_utils.py
│ ├── hub_utils.py
│ ├── import_utils.py
│ ├── logging.py
│ ├── model_card_template.md
│ ├── outputs.py
│ ├── pil_utils.py
│ ├── testing_utils.py
│ └── torch_utils.py
├── docker
├── diffusers-flax-cpu
│ └── Dockerfile
├── diffusers-flax-tpu
│ └── Dockerfile
├── diffusers-onnxruntime-cpu
│ └── Dockerfile
├── diffusers-onnxruntime-cuda
│ └── Dockerfile
├── diffusers-pytorch-cpu
│ └── Dockerfile
└── diffusers-pytorch-cuda
│ └── Dockerfile
├── docs
├── README.md
├── TRANSLATING.md
└── source
│ ├── _config.py
│ ├── en
│ ├── _toctree.yml
│ ├── api
│ │ ├── configuration.mdx
│ │ ├── diffusion_pipeline.mdx
│ │ ├── experimental
│ │ │ └── rl.mdx
│ │ ├── loaders.mdx
│ │ ├── logging.mdx
│ │ ├── models.mdx
│ │ ├── outputs.mdx
│ │ ├── pipelines
│ │ │ ├── alt_diffusion.mdx
│ │ │ ├── audio_diffusion.mdx
│ │ │ ├── audioldm.mdx
│ │ │ ├── cycle_diffusion.mdx
│ │ │ ├── dance_diffusion.mdx
│ │ │ ├── ddim.mdx
│ │ │ ├── ddpm.mdx
│ │ │ ├── dit.mdx
│ │ │ ├── latent_diffusion.mdx
│ │ │ ├── latent_diffusion_uncond.mdx
│ │ │ ├── overview.mdx
│ │ │ ├── paint_by_example.mdx
│ │ │ ├── pndm.mdx
│ │ │ ├── repaint.mdx
│ │ │ ├── score_sde_ve.mdx
│ │ │ ├── semantic_stable_diffusion.mdx
│ │ │ ├── spectrogram_diffusion.mdx
│ │ │ ├── stable_diffusion
│ │ │ │ ├── attend_and_excite.mdx
│ │ │ │ ├── controlnet.mdx
│ │ │ │ ├── depth2img.mdx
│ │ │ │ ├── image_variation.mdx
│ │ │ │ ├── img2img.mdx
│ │ │ │ ├── inpaint.mdx
│ │ │ │ ├── latent_upscale.mdx
│ │ │ │ ├── model_editing.mdx
│ │ │ │ ├── overview.mdx
│ │ │ │ ├── panorama.mdx
│ │ │ │ ├── pix2pix.mdx
│ │ │ │ ├── pix2pix_zero.mdx
│ │ │ │ ├── self_attention_guidance.mdx
│ │ │ │ ├── text2img.mdx
│ │ │ │ └── upscale.mdx
│ │ │ ├── stable_diffusion_2.mdx
│ │ │ ├── stable_diffusion_safe.mdx
│ │ │ ├── stable_unclip.mdx
│ │ │ ├── stochastic_karras_ve.mdx
│ │ │ ├── text_to_video.mdx
│ │ │ ├── unclip.mdx
│ │ │ ├── versatile_diffusion.mdx
│ │ │ └── vq_diffusion.mdx
│ │ └── schedulers
│ │ │ ├── ddim.mdx
│ │ │ ├── ddim_inverse.mdx
│ │ │ ├── ddpm.mdx
│ │ │ ├── deis.mdx
│ │ │ ├── dpm_discrete.mdx
│ │ │ ├── dpm_discrete_ancestral.mdx
│ │ │ ├── euler.mdx
│ │ │ ├── euler_ancestral.mdx
│ │ │ ├── heun.mdx
│ │ │ ├── ipndm.mdx
│ │ │ ├── lms_discrete.mdx
│ │ │ ├── multistep_dpm_solver.mdx
│ │ │ ├── overview.mdx
│ │ │ ├── pndm.mdx
│ │ │ ├── repaint.mdx
│ │ │ ├── score_sde_ve.mdx
│ │ │ ├── score_sde_vp.mdx
│ │ │ ├── singlestep_dpm_solver.mdx
│ │ │ ├── stochastic_karras_ve.mdx
│ │ │ ├── unipc.mdx
│ │ │ └── vq_diffusion.mdx
│ ├── conceptual
│ │ ├── contribution.mdx
│ │ ├── ethical_guidelines.mdx
│ │ ├── evaluation.mdx
│ │ └── philosophy.mdx
│ ├── imgs
│ │ ├── access_request.png
│ │ └── diffusers_library.jpg
│ ├── index.mdx
│ ├── installation.mdx
│ ├── optimization
│ │ ├── fp16.mdx
│ │ ├── habana.mdx
│ │ ├── mps.mdx
│ │ ├── onnx.mdx
│ │ ├── open_vino.mdx
│ │ ├── opt_overview.mdx
│ │ ├── torch2.0.mdx
│ │ └── xformers.mdx
│ ├── quicktour.mdx
│ ├── stable_diffusion.mdx
│ ├── training
│ │ ├── controlnet.mdx
│ │ ├── dreambooth.mdx
│ │ ├── instructpix2pix.mdx
│ │ ├── lora.mdx
│ │ ├── overview.mdx
│ │ ├── text2image.mdx
│ │ ├── text_inversion.mdx
│ │ └── unconditional_training.mdx
│ ├── tutorials
│ │ ├── basic_training.mdx
│ │ └── tutorial_overview.mdx
│ └── using-diffusers
│ │ ├── audio.mdx
│ │ ├── conditional_image_generation.mdx
│ │ ├── contribute_pipeline.mdx
│ │ ├── controlling_generation.mdx
│ │ ├── custom_pipeline_examples.mdx
│ │ ├── custom_pipeline_overview.mdx
│ │ ├── depth2img.mdx
│ │ ├── img2img.mdx
│ │ ├── inpaint.mdx
│ │ ├── kerascv.mdx
│ │ ├── loading.mdx
│ │ ├── loading_overview.mdx
│ │ ├── other-modalities.mdx
│ │ ├── pipeline_overview.mdx
│ │ ├── reproducibility.mdx
│ │ ├── reusing_seeds.mdx
│ │ ├── rl.mdx
│ │ ├── schedulers.mdx
│ │ ├── stable_diffusion_jax_how_to.mdx
│ │ ├── unconditional_image_generation.mdx
│ │ ├── using_safetensors
│ │ ├── using_safetensors.mdx
│ │ ├── weighted_prompts.mdx
│ │ └── write_own_pipeline.mdx
│ ├── ko
│ ├── _toctree.yml
│ ├── in_translation.mdx
│ ├── index.mdx
│ ├── installation.mdx
│ └── quicktour.mdx
│ └── zh
│ ├── _toctree.yml
│ ├── index.mdx
│ ├── installation.mdx
│ └── quicktour.mdx
├── examples
├── README.md
├── community
│ ├── README.md
│ ├── bit_diffusion.py
│ ├── checkpoint_merger.py
│ ├── clip_guided_stable_diffusion.py
│ ├── clip_guided_stable_diffusion_img2img.py
│ ├── composable_stable_diffusion.py
│ ├── ddim_noise_comparative_analysis.py
│ ├── imagic_stable_diffusion.py
│ ├── img2img_inpainting.py
│ ├── interpolate_stable_diffusion.py
│ ├── lpw_stable_diffusion.py
│ ├── lpw_stable_diffusion_onnx.py
│ ├── magic_mix.py
│ ├── multilingual_stable_diffusion.py
│ ├── one_step_unet.py
│ ├── sd_text2img_k_diffusion.py
│ ├── seed_resize_stable_diffusion.py
│ ├── speech_to_image_diffusion.py
│ ├── stable_diffusion_comparison.py
│ ├── stable_diffusion_controlnet_img2img.py
│ ├── stable_diffusion_controlnet_inpaint.py
│ ├── stable_diffusion_controlnet_inpaint_img2img.py
│ ├── stable_diffusion_mega.py
│ ├── stable_unclip.py
│ ├── text_inpainting.py
│ ├── tiled_upscaling.py
│ ├── unclip_image_interpolation.py
│ ├── unclip_text_interpolation.py
│ └── wildcard_stable_diffusion.py
├── conftest.py
├── controlnet
│ ├── README.md
│ ├── requirements.txt
│ ├── requirements_flax.txt
│ ├── train_controlnet.py
│ └── train_controlnet_flax.py
├── dreambooth
│ ├── README.md
│ ├── requirements.txt
│ ├── requirements_flax.txt
│ ├── train_dreambooth.py
│ ├── train_dreambooth_flax.py
│ └── train_dreambooth_lora.py
├── inference
│ ├── README.md
│ ├── image_to_image.py
│ └── inpainting.py
├── instruct_pix2pix
│ ├── README.md
│ ├── requirements.txt
│ └── train_instruct_pix2pix.py
├── research_projects
│ ├── README.md
│ ├── colossalai
│ │ ├── README.md
│ │ ├── inference.py
│ │ ├── requirement.txt
│ │ └── train_dreambooth_colossalai.py
│ ├── dreambooth_inpaint
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── train_dreambooth_inpaint.py
│ │ └── train_dreambooth_inpaint_lora.py
│ ├── intel_opts
│ │ ├── README.md
│ │ ├── inference_bf16.py
│ │ └── textual_inversion
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── textual_inversion_bf16.py
│ ├── lora
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_text_to_image_lora.py
│ ├── mulit_token_textual_inversion
│ │ ├── README.md
│ │ ├── multi_token_clip.py
│ │ ├── requirements.txt
│ │ ├── requirements_flax.txt
│ │ ├── textual_inversion.py
│ │ └── textual_inversion_flax.py
│ ├── multi_subject_dreambooth
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_multi_subject_dreambooth.py
│ └── onnxruntime
│ │ ├── README.md
│ │ ├── text_to_image
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_text_to_image.py
│ │ ├── textual_inversion
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── textual_inversion.py
│ │ └── unconditional_image_generation
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_unconditional.py
├── rl
│ ├── README.md
│ └── run_diffuser_locomotion.py
├── test_examples.py
├── text_to_image
│ ├── README.md
│ ├── requirements.txt
│ ├── requirements_flax.txt
│ ├── train_text_to_image.py
│ ├── train_text_to_image_flax.py
│ └── train_text_to_image_lora.py
├── textual_inversion
│ ├── README.md
│ ├── requirements.txt
│ ├── requirements_flax.txt
│ ├── textual_inversion.py
│ └── textual_inversion_flax.py
└── unconditional_image_generation
│ ├── README.md
│ ├── requirements.txt
│ └── train_unconditional.py
├── my_inpainting
├── label_list.json
├── my_build_synth_data_baseline.py
├── my_build_synth_data_baseline_large_num.py
├── my_test_inpainting_baseline.py
├── my_test_inpainting_baseline_batch_test.py
├── my_test_inpainting_char.py
├── my_test_inpainting_char_multi.py
├── my_test_inpainting_only_pre_prompt_word_multi.py
├── my_test_inpainting_pure_word_prefix_prompt_batch_test.py
├── my_test_inpainting_with_adapter_char_multi.py
├── my_test_inpainting_with_adapter_with_fussion_te_word_multi.py
├── my_test_inpainting_with_adapter_with_pre_prompt_word_multi.py
├── my_test_inpainting_with_adapter_word_multi.py
├── my_test_inpainting_with_adapter_zero_prompt_char_multi.py
├── my_test_inpainting_with_char_adapter_char_multi.py
├── my_test_inpainting_with_controlnet_batch_test.py
├── my_test_inpainting_with_full_controlnet_batch_test.py
├── my_test_inpainting_with_full_controlnet_dual_text_batch_test.py
├── my_train_only_pre_prompt.sh
├── my_train_prefix_prompt.sh
├── my_train_with_adapter_with_pre_prompt.sh
├── my_train_with_char_adapter.sh
├── my_train_with_controlnet.sh
├── my_train_with_full_controlnet.sh
├── my_train_with_full_controlnet_with_dual_text.sh
├── my_train_with_single_adapter.sh
├── my_train_with_single_adapter_with_fussion_te.sh
├── new_paradigm_any_demo.py
├── new_paradigm_build_baseline.py
├── new_paradigm_build_baseline_no_crop.py
├── new_paradigm_build_demo.py
├── new_paradigm_build_dual_text.py
├── new_paradigm_build_with_text_vae.py
├── new_paradigm_dual_text_encoder.sh
├── new_paradigm_train.sh
├── src
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ └── __init__.cpython-39.pyc
│ ├── build_synth_data
│ │ ├── __pycache__
│ │ │ ├── batch_utils.cpython-310.pyc
│ │ │ ├── crop_tools.cpython-310.pyc
│ │ │ ├── glyph_utils.cpython-310.pyc
│ │ │ └── rec_inferencer.cpython-310.pyc
│ │ ├── batch_utils.py
│ │ ├── crop_tools.py
│ │ ├── glyph_utils.py
│ │ └── rec_inferencer.py
│ ├── dataset
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── __init__.cpython-39.pyc
│ │ │ ├── base_text_dataset.cpython-310.pyc
│ │ │ ├── batch_utils.cpython-310.pyc
│ │ │ ├── crop_image_for_test.cpython-310.pyc
│ │ │ ├── new_paradigm_text_dataset.cpython-310.pyc
│ │ │ ├── text_dataset.cpython-310.pyc
│ │ │ ├── text_dataset.cpython-39.pyc
│ │ │ ├── text_mapper.cpython-310.pyc
│ │ │ ├── text_mapper.cpython-38.pyc
│ │ │ ├── text_mapper.cpython-39.pyc
│ │ │ ├── utils.cpython-310.pyc
│ │ │ ├── utils.cpython-39.pyc
│ │ │ ├── zoom_up_pure_text_dataset.cpython-310.pyc
│ │ │ ├── zoom_up_text_dataset.cpython-310.pyc
│ │ │ ├── zoom_up_text_dataset.cpython-38.pyc
│ │ │ ├── zoom_up_text_dataset.cpython-39.pyc
│ │ │ └── zoom_up_with_blank_text_dataset.cpython-310.pyc
│ │ ├── base_text_dataset.py
│ │ ├── batch_utils.py
│ │ ├── crop_image_for_test.py
│ │ ├── new_paradigm_text_dataset.py
│ │ ├── text_dataset.py
│ │ ├── text_mapper.py
│ │ ├── utils.py
│ │ ├── zoom_up_pure_text_dataset.py
│ │ ├── zoom_up_text_dataset.py
│ │ └── zoom_up_with_blank_text_dataset.py
│ ├── engines
│ │ ├── __init__.py
│ │ ├── finetune_text_to_image.py
│ │ ├── finetune_text_to_image_inpainting.py
│ │ ├── finetune_text_to_image_inpainting_with_adapter_with_pre_prompt.py
│ │ ├── finetune_text_to_image_inpainting_with_char_adapter.py
│ │ ├── finetune_text_to_image_inpainting_with_controlnet.py
│ │ ├── finetune_text_to_image_inpainting_with_full_controlnet.py
│ │ ├── finetune_text_to_image_inpainting_with_full_controlnet_with_dual_text.py
│ │ ├── finetune_text_to_image_inpainting_with_pre_prompt.py
│ │ ├── finetune_text_to_image_inpainting_with_prefix_prompt.py
│ │ ├── finetune_text_to_image_inpainting_with_single_adapter.py
│ │ ├── finetune_text_to_image_inpainting_with_single_adapter_with_fussion_te.py
│ │ ├── new_paradigm_inpainting.py
│ │ └── new_paradigm_inpainting_dual_text.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── __init__.cpython-39.pyc
│ │ │ ├── adapter.cpython-310.pyc
│ │ │ ├── adapter.cpython-39.pyc
│ │ │ ├── adapter_with_char_embedding.cpython-39.pyc
│ │ │ ├── adapter_with_fussion_TE.cpython-310.pyc
│ │ │ ├── adapter_with_fussion_TE.cpython-38.pyc
│ │ │ ├── adapter_with_fussion_TE.cpython-39.pyc
│ │ │ ├── adapter_with_pre_prompt.cpython-310.pyc
│ │ │ ├── attention.cpython-310.pyc
│ │ │ ├── char_encoder.cpython-310.pyc
│ │ │ ├── controlnet.cpython-310.pyc
│ │ │ ├── dual_controlnet.cpython-310.pyc
│ │ │ ├── fussion_text_embedding.cpython-39.pyc
│ │ │ ├── modules.cpython-310.pyc
│ │ │ ├── only_pre_prompt.cpython-310.pyc
│ │ │ ├── only_prefix_prompt.cpython-310.pyc
│ │ │ ├── openaimodel.cpython-310.pyc
│ │ │ ├── transformer_2d_with_controlnet.cpython-310.pyc
│ │ │ ├── transformer_2d_with_dual_text_controlnet.cpython-310.pyc
│ │ │ ├── unet_2d_blocks_with_adapter.cpython-310.pyc
│ │ │ ├── unet_2d_blocks_with_adapter.cpython-38.pyc
│ │ │ ├── unet_2d_blocks_with_adapter.cpython-39.pyc
│ │ │ ├── unet_2d_blocks_with_controlnet.cpython-310.pyc
│ │ │ ├── unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc
│ │ │ ├── unet_2d_with_adapter.cpython-310.pyc
│ │ │ ├── unet_2d_with_adapter.cpython-38.pyc
│ │ │ ├── unet_2d_with_adapter.cpython-39.pyc
│ │ │ ├── unet_2d_with_controlnet.cpython-310.pyc
│ │ │ ├── unet_2d_with_dual_text.cpython-310.pyc
│ │ │ ├── unet_2d_with_dual_text_controlnet.cpython-310.pyc
│ │ │ └── union_net.cpython-310.pyc
│ │ ├── adapter.py
│ │ ├── adapter_with_char_embedding.py
│ │ ├── adapter_with_fussion_TE.py
│ │ ├── adapter_with_pre_prompt.py
│ │ ├── attention.py
│ │ ├── char_encoder.py
│ │ ├── controlnet.py
│ │ ├── dual_controlnet.py
│ │ ├── fussion_text_embedding.py
│ │ ├── modules.py
│ │ ├── only_pre_prompt.py
│ │ ├── only_prefix_prompt.py
│ │ ├── openaimodel.py
│ │ ├── ori_controlnet.py
│ │ ├── transformer_2d_with_dual_text_controlnet.py
│ │ ├── unet_2d_blocks_with_adapter.py
│ │ ├── unet_2d_blocks_with_controlnet.py
│ │ ├── unet_2d_blocks_with_dual_text_controlnet.py
│ │ ├── unet_2d_with_adapter.py
│ │ ├── unet_2d_with_controlnet.py
│ │ ├── unet_2d_with_dual_text.py
│ │ ├── unet_2d_with_dual_text_controlnet.py
│ │ └── union_net.py
│ ├── pipelines
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── __init__.cpython-39.pyc
│ │ │ ├── new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting.cpython-39.pyc
│ │ │ ├── stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_full_controlnet.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_only_controlnet.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_text_glyph.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_with_adapter.cpython-39.pyc
│ │ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc
│ │ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc
│ │ │ ├── stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc
│ │ │ └── stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc
│ │ ├── new_paradigm_inpainting_dual_text_encoder.py
│ │ ├── stable_diffusion_inpainting.py
│ │ ├── stable_diffusion_inpainting_dual_text_full_controlnet.py
│ │ ├── stable_diffusion_inpainting_full_controlnet.py
│ │ ├── stable_diffusion_inpainting_mask_controlnet.py
│ │ ├── stable_diffusion_inpainting_only_controlnet.py
│ │ ├── stable_diffusion_inpainting_only_pre_prompt.py
│ │ ├── stable_diffusion_inpainting_only_prefix_prompt.py
│ │ ├── stable_diffusion_inpainting_text_glyph.py
│ │ ├── stable_diffusion_inpainting_text_vae_text_glyph.py
│ │ ├── stable_diffusion_inpainting_with_adapter.py
│ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.py
│ │ ├── stable_diffusion_inpainting_with_adapter_zero_prompt.py
│ │ └── stable_diffusion_inpainting_with_char_adapter.py
│ └── utils
│ │ ├── ori.png
│ │ ├── output.png
│ │ ├── res.png
│ │ ├── res_area.png
│ │ ├── res_trilinear.png
│ │ └── vis_mask.py
├── train_vae.py
└── train_vae.sh
├── my_pipeline.py
├── pyproject.toml
├── requirements.txt
├── scripts
├── __init__.py
├── change_naming_configs_and_checkpoints.py
├── conversion_ldm_uncond.py
├── convert_dance_diffusion_to_diffusers.py
├── convert_ddpm_original_checkpoint_to_diffusers.py
├── convert_diffusers_to_original_stable_diffusion.py
├── convert_dit_to_diffusers.py
├── convert_k_upscaler_to_diffusers.py
├── convert_kakao_brain_unclip_to_diffusers.py
├── convert_ldm_original_checkpoint_to_diffusers.py
├── convert_lora_safetensor_to_diffusers.py
├── convert_models_diffuser_to_diffusers.py
├── convert_ms_text_to_video_to_diffusers.py
├── convert_music_spectrogram_to_diffusers.py
├── convert_ncsnpp_original_checkpoint_to_diffusers.py
├── convert_original_audioldm_to_diffusers.py
├── convert_original_controlnet_to_diffusers.py
├── convert_original_stable_diffusion_to_diffusers.py
├── convert_stable_diffusion_checkpoint_to_onnx.py
├── convert_unclip_txt2img_to_image_variation.py
├── convert_vae_diff_to_onnx.py
├── convert_vae_pt_to_diffusers.py
├── convert_versatile_diffusion_to_diffusers.py
├── convert_vq_diffusion_to_diffusers.py
└── generate_logits.py
├── setup.cfg
├── setup.py
├── src
├── diffusers.egg-info
│ ├── PKG-INFO
│ ├── SOURCES.txt
│ ├── dependency_links.txt
│ ├── entry_points.txt
│ ├── requires.txt
│ └── top_level.txt
└── diffusers
│ ├── __init__.py
│ ├── commands
│ ├── __init__.py
│ ├── diffusers_cli.py
│ └── env.py
│ ├── configuration_utils.py
│ ├── dependency_versions_check.py
│ ├── dependency_versions_table.py
│ ├── experimental
│ ├── README.md
│ ├── __init__.py
│ └── rl
│ │ ├── __init__.py
│ │ └── value_guided_sampling.py
│ ├── image_processor.py
│ ├── loaders.py
│ ├── models
│ ├── README.md
│ ├── __init__.py
│ ├── attention.py
│ ├── attention_flax.py
│ ├── attention_processor.py
│ ├── autoencoder_kl.py
│ ├── controlnet.py
│ ├── controlnet_flax.py
│ ├── cross_attention.py
│ ├── dual_transformer_2d.py
│ ├── embeddings.py
│ ├── embeddings_flax.py
│ ├── modeling_flax_pytorch_utils.py
│ ├── modeling_flax_utils.py
│ ├── modeling_pytorch_flax_utils.py
│ ├── modeling_utils.py
│ ├── prior_transformer.py
│ ├── resnet.py
│ ├── resnet_flax.py
│ ├── t5_film_transformer.py
│ ├── transformer_2d.py
│ ├── transformer_temporal.py
│ ├── unet_1d.py
│ ├── unet_1d_blocks.py
│ ├── unet_2d.py
│ ├── unet_2d_blocks.py
│ ├── unet_2d_blocks_flax.py
│ ├── unet_2d_condition.py
│ ├── unet_2d_condition_flax.py
│ ├── unet_3d_blocks.py
│ ├── unet_3d_condition.py
│ ├── vae.py
│ ├── vae_flax.py
│ └── vq_model.py
│ ├── optimization.py
│ ├── pipeline_utils.py
│ ├── pipelines
│ ├── README.md
│ ├── __init__.py
│ ├── alt_diffusion
│ │ ├── __init__.py
│ │ ├── modeling_roberta_series.py
│ │ ├── pipeline_alt_diffusion.py
│ │ └── pipeline_alt_diffusion_img2img.py
│ ├── audio_diffusion
│ │ ├── __init__.py
│ │ ├── mel.py
│ │ └── pipeline_audio_diffusion.py
│ ├── audioldm
│ │ ├── __init__.py
│ │ └── pipeline_audioldm.py
│ ├── dance_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_dance_diffusion.py
│ ├── ddim
│ │ ├── __init__.py
│ │ └── pipeline_ddim.py
│ ├── ddpm
│ │ ├── __init__.py
│ │ └── pipeline_ddpm.py
│ ├── dit
│ │ ├── __init__.py
│ │ └── pipeline_dit.py
│ ├── latent_diffusion
│ │ ├── __init__.py
│ │ ├── pipeline_latent_diffusion.py
│ │ └── pipeline_latent_diffusion_superresolution.py
│ ├── latent_diffusion_uncond
│ │ ├── __init__.py
│ │ └── pipeline_latent_diffusion_uncond.py
│ ├── onnx_utils.py
│ ├── paint_by_example
│ │ ├── __init__.py
│ │ ├── image_encoder.py
│ │ └── pipeline_paint_by_example.py
│ ├── pipeline_flax_utils.py
│ ├── pipeline_utils.py
│ ├── pndm
│ │ ├── __init__.py
│ │ └── pipeline_pndm.py
│ ├── repaint
│ │ ├── __init__.py
│ │ └── pipeline_repaint.py
│ ├── score_sde_ve
│ │ ├── __init__.py
│ │ └── pipeline_score_sde_ve.py
│ ├── semantic_stable_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_semantic_stable_diffusion.py
│ ├── spectrogram_diffusion
│ │ ├── __init__.py
│ │ ├── continous_encoder.py
│ │ ├── midi_utils.py
│ │ ├── notes_encoder.py
│ │ └── pipeline_spectrogram_diffusion.py
│ ├── stable_diffusion
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── convert_from_ckpt.py
│ │ ├── pipeline_cycle_diffusion.py
│ │ ├── pipeline_flax_stable_diffusion.py
│ │ ├── pipeline_flax_stable_diffusion_controlnet.py
│ │ ├── pipeline_flax_stable_diffusion_img2img.py
│ │ ├── pipeline_flax_stable_diffusion_inpaint.py
│ │ ├── pipeline_onnx_stable_diffusion.py
│ │ ├── pipeline_onnx_stable_diffusion_img2img.py
│ │ ├── pipeline_onnx_stable_diffusion_inpaint.py
│ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py
│ │ ├── pipeline_onnx_stable_diffusion_upscale.py
│ │ ├── pipeline_stable_diffusion.py
│ │ ├── pipeline_stable_diffusion_attend_and_excite.py
│ │ ├── pipeline_stable_diffusion_controlnet.py
│ │ ├── pipeline_stable_diffusion_depth2img.py
│ │ ├── pipeline_stable_diffusion_image_variation.py
│ │ ├── pipeline_stable_diffusion_img2img.py
│ │ ├── pipeline_stable_diffusion_inpaint.py
│ │ ├── pipeline_stable_diffusion_inpaint_legacy.py
│ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py
│ │ ├── pipeline_stable_diffusion_k_diffusion.py
│ │ ├── pipeline_stable_diffusion_latent_upscale.py
│ │ ├── pipeline_stable_diffusion_model_editing.py
│ │ ├── pipeline_stable_diffusion_panorama.py
│ │ ├── pipeline_stable_diffusion_pix2pix_zero.py
│ │ ├── pipeline_stable_diffusion_sag.py
│ │ ├── pipeline_stable_diffusion_upscale.py
│ │ ├── pipeline_stable_unclip.py
│ │ ├── pipeline_stable_unclip_img2img.py
│ │ ├── safety_checker.py
│ │ ├── safety_checker_flax.py
│ │ └── stable_unclip_image_normalizer.py
│ ├── stable_diffusion_safe
│ │ ├── __init__.py
│ │ ├── pipeline_stable_diffusion_safe.py
│ │ └── safety_checker.py
│ ├── stochastic_karras_ve
│ │ ├── __init__.py
│ │ └── pipeline_stochastic_karras_ve.py
│ ├── text_to_video_synthesis
│ │ ├── __init__.py
│ │ └── pipeline_text_to_video_synth.py
│ ├── unclip
│ │ ├── __init__.py
│ │ ├── pipeline_unclip.py
│ │ ├── pipeline_unclip_image_variation.py
│ │ └── text_proj.py
│ ├── versatile_diffusion
│ │ ├── __init__.py
│ │ ├── modeling_text_unet.py
│ │ ├── pipeline_versatile_diffusion.py
│ │ ├── pipeline_versatile_diffusion_dual_guided.py
│ │ ├── pipeline_versatile_diffusion_image_variation.py
│ │ └── pipeline_versatile_diffusion_text_to_image.py
│ └── vq_diffusion
│ │ ├── __init__.py
│ │ └── pipeline_vq_diffusion.py
│ ├── schedulers
│ ├── README.md
│ ├── __init__.py
│ ├── scheduling_ddim.py
│ ├── scheduling_ddim_flax.py
│ ├── scheduling_ddim_inverse.py
│ ├── scheduling_ddpm.py
│ ├── scheduling_ddpm_flax.py
│ ├── scheduling_deis_multistep.py
│ ├── scheduling_dpmsolver_multistep.py
│ ├── scheduling_dpmsolver_multistep_flax.py
│ ├── scheduling_dpmsolver_singlestep.py
│ ├── scheduling_euler_ancestral_discrete.py
│ ├── scheduling_euler_discrete.py
│ ├── scheduling_heun_discrete.py
│ ├── scheduling_ipndm.py
│ ├── scheduling_k_dpm_2_ancestral_discrete.py
│ ├── scheduling_k_dpm_2_discrete.py
│ ├── scheduling_karras_ve.py
│ ├── scheduling_karras_ve_flax.py
│ ├── scheduling_lms_discrete.py
│ ├── scheduling_lms_discrete_flax.py
│ ├── scheduling_pndm.py
│ ├── scheduling_pndm_flax.py
│ ├── scheduling_repaint.py
│ ├── scheduling_sde_ve.py
│ ├── scheduling_sde_ve_flax.py
│ ├── scheduling_sde_vp.py
│ ├── scheduling_unclip.py
│ ├── scheduling_unipc_multistep.py
│ ├── scheduling_utils.py
│ ├── scheduling_utils_flax.py
│ └── scheduling_vq_diffusion.py
│ ├── training_utils.py
│ └── utils
│ ├── __init__.py
│ ├── accelerate_utils.py
│ ├── constants.py
│ ├── deprecation_utils.py
│ ├── doc_utils.py
│ ├── dummy_flax_and_transformers_objects.py
│ ├── dummy_flax_objects.py
│ ├── dummy_note_seq_objects.py
│ ├── dummy_onnx_objects.py
│ ├── dummy_pt_objects.py
│ ├── dummy_torch_and_librosa_objects.py
│ ├── dummy_torch_and_scipy_objects.py
│ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py
│ ├── dummy_torch_and_transformers_and_onnx_objects.py
│ ├── dummy_torch_and_transformers_objects.py
│ ├── dummy_transformers_and_torch_and_note_seq_objects.py
│ ├── dynamic_modules_utils.py
│ ├── hub_utils.py
│ ├── import_utils.py
│ ├── logging.py
│ ├── model_card_template.md
│ ├── outputs.py
│ ├── pil_utils.py
│ ├── testing_utils.py
│ └── torch_utils.py
├── tests
└── utils
├── check_config_docstrings.py
├── check_copies.py
├── check_doc_toc.py
├── check_dummies.py
├── check_inits.py
├── check_repo.py
├── check_table.py
├── custom_init_isort.py
├── get_modified_files.py
├── overwrite_expected_slice.py
├── print_env.py
├── release.py
└── stale.py
/images/framwork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/images/framwork.png
--------------------------------------------------------------------------------
/mmocr/.circleci/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTORCH="1.8.1"
2 | ARG CUDA="10.2"
3 | ARG CUDNN="7"
4 |
5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
6 |
7 | # To fix GPG key error when running apt-get update
8 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
9 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
10 |
11 | RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx
12 |
--------------------------------------------------------------------------------
/mmocr/.codespellrc:
--------------------------------------------------------------------------------
1 | [codespell]
2 | skip = *.ipynb
3 | count =
4 | quiet-level = 3
5 | ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist,ned
6 |
--------------------------------------------------------------------------------
/mmocr/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | */__init__.py
4 |
--------------------------------------------------------------------------------
/mmocr/.dev_scripts/benchmark_options.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 | third_part_libs = [
4 | 'pip install -r ../requirements/albu.txt',
5 | ]
6 |
7 | default_floating_range = 0.5
8 |
--------------------------------------------------------------------------------
/mmocr/.dev_scripts/benchmark_train_models.txt:
--------------------------------------------------------------------------------
1 | textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
2 | textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
3 | textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
4 | textrecog/abinet/abinet-vision_20e_st-an_mj.py
5 | textrecog/crnn/crnn_mini-vgg_5e_mj.py
6 | textrecog/aster/aster_resnet45_6e_st_mj.py
7 | textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
8 | textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
9 | textrecog/svtr/svtr-small_20e_st_mj.py
10 |
--------------------------------------------------------------------------------
/mmocr/.dev_scripts/covignore.cfg:
--------------------------------------------------------------------------------
1 | # Each line should be the relative path to the root directory
2 | # of this repo. Support regular expression as well.
3 | # For example:
4 | # mmocr/models/textdet/postprocess/utils.py
5 | # .*/utils.py
6 | .*/__init__.py
7 |
8 | # It will be removed after all models have been refactored
9 | mmocr/utils/bbox_utils.py
10 |
11 | # Major part is covered, however, it's hard to cover model's output.
12 | mmocr/models/textdet/detectors/mmdet_wrapper.py
13 |
14 | # It will be removed after KieVisualizer and TextSpotterVisualizer
15 | mmocr/visualization/visualize.py
16 |
17 | # Add tests for data preparers later
18 | mmocr/datasets/preparers
19 |
--------------------------------------------------------------------------------
/mmocr/.owners.yml:
--------------------------------------------------------------------------------
1 | assign:
2 | strategy:
3 | random
4 | # daily-shift-based
5 | scedule:
6 | '*/1 * * * *'
7 | assignees:
8 | - gaotongxiao
9 | - Harold-lkk
10 |
--------------------------------------------------------------------------------
/mmocr/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | formats: all
4 |
5 | python:
6 | version: 3.7
7 | install:
8 | - requirements: requirements/docs.txt
9 | - requirements: requirements/readthedocs.txt
10 |
--------------------------------------------------------------------------------
/mmocr/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | title: "OpenMMLab Text Detection, Recognition and Understanding Toolbox"
4 | authors:
5 | - name: "MMOCR Contributors"
6 | version: 0.3.0
7 | date-released: 2020-08-15
8 | repository-code: "https://github.com/open-mmlab/mmocr"
9 | license: Apache-2.0
10 |
--------------------------------------------------------------------------------
/mmocr/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements/*.txt
2 | include mmocr/.mim/model-index.yml
3 | include mmocr/.mim/dicts/*.txt
4 | recursive-include mmocr/.mim/configs *.py *.yml
5 | recursive-include mmocr/.mim/tools *.sh *.py
6 |
--------------------------------------------------------------------------------
/mmocr/README.md:
--------------------------------------------------------------------------------
1 | 该项目基于mmocr框架,请依据requirements.txt搭建环境
2 |
3 | 在configs/textdet中配置好文本图像数据集的路径后,使用./my_train.sh脚本进行检测实验
4 |
--------------------------------------------------------------------------------
/mmocr/configs/backbone/oclip/metafile.yml:
--------------------------------------------------------------------------------
1 | Collections:
2 | - Name: oCLIP
3 | Metadata:
4 | Training Data: SynthText
5 | Architecture:
6 | - CLIPResNet
7 | Paper:
8 | URL: https://arxiv.org/abs/2203.03911
9 | Title: 'Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting'
10 | README: configs/backbone/oclip/README.md
11 |
12 | Models:
13 | Weights: https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth
14 |
--------------------------------------------------------------------------------
/mmocr/configs/kie/_base_/datasets/wildreceipt.py:
--------------------------------------------------------------------------------
1 | wildreceipt_data_root = 'data/wildreceipt/'
2 |
3 | wildreceipt_train = dict(
4 | type='WildReceiptDataset',
5 | data_root=wildreceipt_data_root,
6 | metainfo=wildreceipt_data_root + 'class_list.txt',
7 | ann_file='train.txt',
8 | pipeline=None)
9 |
10 | wildreceipt_test = dict(
11 | type='WildReceiptDataset',
12 | data_root=wildreceipt_data_root,
13 | metainfo=wildreceipt_data_root + 'class_list.txt',
14 | ann_file='test.txt',
15 | test_mode=True,
16 | pipeline=None)
17 |
--------------------------------------------------------------------------------
/mmocr/configs/kie/_base_/schedules/schedule_adam_60e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(
3 | type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001))
4 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1)
5 | val_cfg = dict(type='ValLoop')
6 | test_cfg = dict(type='TestLoop')
7 | # learning rate
8 | param_scheduler = [
9 | dict(type='MultiStepLR', milestones=[40, 50], end=60),
10 | ]
11 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/ctw1500.py:
--------------------------------------------------------------------------------
1 | ctw1500_textdet_data_root = 'data/ctw1500'
2 |
3 | ctw1500_textdet_train = dict(
4 | type='OCRDataset',
5 | data_root=ctw1500_textdet_data_root,
6 | ann_file='textdet_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | ctw1500_textdet_test = dict(
11 | type='OCRDataset',
12 | data_root=ctw1500_textdet_data_root,
13 | ann_file='textdet_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
1 | icdar2013_textdet_data_root = 'data/icdar2013'
2 |
3 | icdar2013_textdet_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2013_textdet_data_root,
6 | ann_file='textdet_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | icdar2013_textdet_test = dict(
11 | type='OCRDataset',
12 | data_root=icdar2013_textdet_data_root,
13 | ann_file='textdet_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/icdar2017.py:
--------------------------------------------------------------------------------
1 | icdar2017_textdet_data_root = 'data/mlt2017'
2 |
3 | icdar2017_textdet_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2017_textdet_data_root,
6 | ann_file='textdet_test.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | icdar2017_textdet_test = dict(
11 | type='OCRDataset',
12 | data_root=icdar2017_textdet_data_root,
13 | ann_file='textdet_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/synthtext.py:
--------------------------------------------------------------------------------
1 | synthtext_textdet_data_root = 'data/synthtext'
2 |
3 | synthtext_textdet_train = dict(
4 | type='OCRDataset',
5 | data_root=synthtext_textdet_data_root,
6 | ann_file='textdet_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
1 | totaltext_textdet_data_root = 'data/totaltext'
2 |
3 | totaltext_textdet_train = dict(
4 | type='OCRDataset',
5 | data_root=totaltext_textdet_data_root,
6 | ann_file='textdet_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | totaltext_textdet_test = dict(
11 | type='OCRDataset',
12 | data_root=totaltext_textdet_data_root,
13 | ann_file='textdet_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/toy_data.py:
--------------------------------------------------------------------------------
1 | toy_det_data_root = 'tests/data/det_toy_dataset'
2 |
3 | toy_det_train = dict(
4 | type='OCRDataset',
5 | data_root=toy_det_data_root,
6 | ann_file='instances_training.json',
7 | data_prefix=dict(img_path='imgs/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
11 | toy_det_test = dict(
12 | type='OCRDataset',
13 | data_root=toy_det_data_root,
14 | ann_file='instances_test.json',
15 | data_prefix=dict(img_path='imgs/'),
16 | test_mode=True,
17 | pipeline=None)
18 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/pretrain_runtime.py:
--------------------------------------------------------------------------------
1 | _base_ = 'default_runtime.py'
2 |
3 | default_hooks = dict(
4 | logger=dict(type='LoggerHook', interval=1000),
5 | checkpoint=dict(
6 | type='CheckpointHook',
7 | interval=10000,
8 | by_epoch=False,
9 | max_keep_ckpts=1),
10 | )
11 |
12 | # Evaluation
13 | val_evaluator = None
14 | test_evaluator = None
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_adam_600e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3))
3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=10)
4 | val_cfg = dict(type='ValLoop')
5 | test_cfg = dict(type='TestLoop')
6 | # learning rate
7 | param_scheduler = [
8 | dict(type='PolyLR', power=0.9, end=60),
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_100k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(
3 | type='OptimWrapper',
4 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
5 |
6 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
7 | # test_cfg = None
8 | # val_cfg = None
9 | val_cfg = dict(type='ValLoop')
10 | test_cfg = dict(type='TestLoop')
11 | # learning policy
12 | param_scheduler = [
13 | dict(type='PolyLR', power=0.9, eta_min=1e-7, by_epoch=False, end=100000),
14 | ]
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_1200e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(
3 | type='OptimWrapper',
4 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
5 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1200, val_interval=100)
6 | val_cfg = dict(type='ValLoop')
7 | test_cfg = dict(type='TestLoop')
8 | # learning policy
9 | param_scheduler = [
10 | dict(type='PolyLR', power=0.9, eta_min=1e-7, end=1200),
11 | ]
12 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_base.py:
--------------------------------------------------------------------------------
1 | # Note: This schedule config serves as a base config for other schedules.
2 | # Users would have to at least fill in "max_epochs" and "val_interval"
3 | # in order to use this config in their experiments.
4 |
5 | # optimizer
6 | optim_wrapper = dict(
7 | type='OptimWrapper',
8 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=20)
10 | val_cfg = dict(type='ValLoop')
11 | test_cfg = dict(type='TestLoop')
12 | # learning policy
13 | param_scheduler = [
14 | dict(type='ConstantLR', factor=1.0),
15 | ]
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='CLIPResNet',
9 | init_cfg=dict(
10 | type='Pretrained',
11 | checkpoint='https://download.openmmlab.com/'
12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 |
14 | _base_.train_dataloader.num_workers = 24
15 | _base_.optim_wrapper.optimizer.lr = 0.002
16 |
17 | param_scheduler = [
18 | dict(type='LinearLR', end=100, start_factor=0.001),
19 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
20 | ]
21 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnet/dbnet_resnet50_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='mmdet.ResNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | frozen_stages=-1,
13 | norm_cfg=dict(type='BN', requires_grad=True),
14 | norm_eval=True,
15 | style='pytorch',
16 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
17 |
18 | _base_.train_dataloader.num_workers = 24
19 | _base_.optim_wrapper.optimizer.lr = 0.002
20 |
21 | param_scheduler = [
22 | dict(type='LinearLR', end=100, start_factor=0.001),
23 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
24 | ]
25 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='CLIPResNet',
9 | init_cfg=dict(
10 | type='Pretrained',
11 | checkpoint='https://download.openmmlab.com/'
12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 |
14 | _base_.train_dataloader.num_workers = 24
15 | _base_.optim_wrapper.optimizer.lr = 0.002
16 |
17 | param_scheduler = [
18 | dict(type='LinearLR', end=200, start_factor=0.001),
19 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
20 | ]
21 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='mmdet.ResNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | frozen_stages=-1,
13 | norm_cfg=dict(type='BN', requires_grad=True),
14 | norm_eval=True,
15 | style='pytorch',
16 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
17 |
18 | _base_.train_dataloader.num_workers = 24
19 | _base_.optim_wrapper.optimizer.lr = 0.003
20 |
21 | param_scheduler = [
22 | dict(type='LinearLR', end=200, start_factor=0.001),
23 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
24 | ]
25 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'drrg_resnet50_fpn-unet_1200e_ctw1500.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='CLIPResNet',
9 | init_cfg=dict(
10 | type='Pretrained',
11 | checkpoint='https://download.openmmlab.com/'
12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 |
14 | param_scheduler = [
15 | dict(type='LinearLR', end=100, start_factor=0.001),
16 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
17 | ]
18 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/_base_fcenet_resnet50-dcnv2_fpn.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_fcenet_resnet50_fpn.py',
3 | ]
4 |
5 | model = dict(
6 | backbone=dict(
7 | norm_eval=True,
8 | style='pytorch',
9 | dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
10 | stage_with_dcn=(False, True, True, True)),
11 | det_head=dict(
12 | module_loss=dict(
13 | type='FCEModuleLoss',
14 | num_sample=50,
15 | level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
16 | postprocessor=dict(text_repr_type='poly', alpha=1.0, beta=2.0)))
17 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.backbone = dict(
8 | type='CLIPResNet',
9 | out_indices=(1, 2, 3),
10 | init_cfg=dict(
11 | type='Pretrained',
12 | checkpoint='https://download.openmmlab.com/'
13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 |
15 | _base_.train_dataloader.num_workers = 24
16 | _base_.optim_wrapper.optimizer.lr = 0.0005
17 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'fcenet_resnet50_fpn_1500e_icdar2015.py',
3 | ]
4 | load_from = None
5 |
6 | _base_.model.backbone = dict(
7 | type='CLIPResNet',
8 | out_indices=(1, 2, 3),
9 | init_cfg=dict(
10 | type='Pretrained',
11 | checkpoint='https://download.openmmlab.com/'
12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 |
14 | _base_.train_dataloader.batch_size = 16
15 | _base_.train_dataloader.num_workers = 24
16 | _base_.optim_wrapper.optimizer.lr = 0.0005
17 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'mask-rcnn_resnet50_fpn_160e_ctw1500.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.cfg.backbone = dict(
8 | _scope_='mmocr',
9 | type='CLIPResNet',
10 | init_cfg=dict(
11 | type='Pretrained',
12 | checkpoint='https://download.openmmlab.com/'
13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 |
15 | _base_.optim_wrapper.optimizer.lr = 0.02
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
3 | ]
4 |
5 | load_from = None
6 |
7 | _base_.model.cfg.backbone = dict(
8 | _scope_='mmocr',
9 | type='CLIPResNet',
10 | init_cfg=dict(
11 | type='Pretrained',
12 | checkpoint='https://download.openmmlab.com/'
13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 |
15 | _base_.optim_wrapper.optimizer.lr = 0.02
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2017.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
3 | '../_base_/datasets/icdar2017.py',
4 | ]
5 |
6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train
7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test
8 | # use the same pipeline as icdar2015
9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline
10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline
11 |
12 | train_dataloader = dict(dataset=icdar2017_textdet_train)
13 | val_dataloader = dict(dataset=icdar2017_textdet_test)
14 | test_dataloader = val_dataloader
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/panet/_base_panet_resnet50_fpem-ffm.py:
--------------------------------------------------------------------------------
1 | _base_ = '_base_panet_resnet18_fpem-ffm.py'
2 |
3 | model = dict(
4 | type='PANet',
5 | backbone=dict(
6 | _delete_=True,
7 | type='mmdet.ResNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='caffe',
15 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
16 | ),
17 | neck=dict(in_channels=[256, 512, 1024, 2048]),
18 | det_head=dict(postprocessor=dict(text_repr_type='poly')))
19 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'psenet_resnet50_fpnf_600e_ctw1500.py',
3 | ]
4 |
5 | _base_.model.backbone = dict(
6 | type='CLIPResNet',
7 | init_cfg=dict(
8 | type='Pretrained',
9 | checkpoint='https://download.openmmlab.com/'
10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'psenet_resnet50_fpnf_600e_icdar2015.py',
3 | ]
4 |
5 | _base_.model.backbone = dict(
6 | type='CLIPResNet',
7 | init_cfg=dict(
8 | type='Pretrained',
9 | checkpoint='https://download.openmmlab.com/'
10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2017.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'psenet_resnet50_fpnf_600e_icdar2015.py',
3 | '../_base_/datasets/icdar2017.py',
4 | ]
5 |
6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train
7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test
8 | # use the same pipeline as icdar2015
9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline
10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline
11 |
12 | train_dataloader = dict(dataset=icdar2017_textdet_train)
13 | val_dataloader = dict(dataset=icdar2017_textdet_test)
14 | test_dataloader = val_dataloader
15 |
16 | auto_scale_lr = dict(base_batch_size=64 * 4)
17 |
--------------------------------------------------------------------------------
/mmocr/configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'textsnake_resnet50_fpn-unet_1200e_ctw1500.py',
3 | ]
4 |
5 | _base_.model.backbone = dict(
6 | type='CLIPResNet',
7 | init_cfg=dict(
8 | type='Pretrained',
9 | checkpoint='https://download.openmmlab.com/'
10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/coco_text_v1.py:
--------------------------------------------------------------------------------
1 | cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1'
2 |
3 | cocotextv1_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=cocotextv1_textrecog_data_root,
6 | ann_file='train_labels.json',
7 | test_mode=False,
8 | pipeline=None)
9 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/cute80.py:
--------------------------------------------------------------------------------
1 | cute80_textrecog_data_root = 'data/cute80'
2 |
3 | cute80_textrecog_test = dict(
4 | type='OCRDataset',
5 | data_root=cute80_textrecog_data_root,
6 | ann_file='textrecog_test.json',
7 | test_mode=True,
8 | pipeline=None)
9 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2011.py:
--------------------------------------------------------------------------------
1 | icdar2011_textrecog_data_root = 'data/rec/icdar_2011/'
2 |
3 | icdar2011_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2011_textrecog_data_root,
6 | ann_file='train_labels.json',
7 | test_mode=False,
8 | pipeline=None)
9 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
1 | icdar2013_textrecog_data_root = 'data/icdar2013'
2 |
3 | icdar2013_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2013_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | icdar2013_textrecog_test = dict(
10 | type='OCRDataset',
11 | data_root=icdar2013_textrecog_data_root,
12 | ann_file='textrecog_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
16 | icdar2013_857_textrecog_test = dict(
17 | type='OCRDataset',
18 | data_root=icdar2013_textrecog_data_root,
19 | ann_file='textrecog_test_857.json',
20 | test_mode=True,
21 | pipeline=None)
22 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
1 | icdar2015_textrecog_data_root = 'data/icdar2015'
2 |
3 | icdar2015_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2015_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | icdar2015_textrecog_test = dict(
10 | type='OCRDataset',
11 | data_root=icdar2015_textrecog_data_root,
12 | ann_file='textrecog_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
16 | icdar2015_1811_textrecog_test = dict(
17 | type='OCRDataset',
18 | data_root=icdar2015_textrecog_data_root,
19 | ann_file='textrecog_test_1811.json',
20 | test_mode=True,
21 | pipeline=None)
22 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/iiit5k.py:
--------------------------------------------------------------------------------
1 | iiit5k_textrecog_data_root = 'data/iiit5k'
2 |
3 | iiit5k_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=iiit5k_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | iiit5k_textrecog_test = dict(
10 | type='OCRDataset',
11 | data_root=iiit5k_textrecog_data_root,
12 | ann_file='textrecog_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/mjsynth.py:
--------------------------------------------------------------------------------
1 | mjsynth_textrecog_data_root = 'data/mjsynth'
2 |
3 | mjsynth_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=mjsynth_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | mjsynth_sub_textrecog_train = dict(
10 | type='OCRDataset',
11 | data_root=mjsynth_textrecog_data_root,
12 | ann_file='subset_textrecog_train.json',
13 | pipeline=None)
14 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/svt.py:
--------------------------------------------------------------------------------
1 | svt_textrecog_data_root = 'data/svt'
2 |
3 | svt_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=svt_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | svt_textrecog_test = dict(
10 | type='OCRDataset',
11 | data_root=svt_textrecog_data_root,
12 | ann_file='textrecog_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/svtp.py:
--------------------------------------------------------------------------------
1 | svtp_textrecog_data_root = 'data/svtp'
2 |
3 | svtp_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=svtp_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | svtp_textrecog_test = dict(
10 | type='OCRDataset',
11 | data_root=svtp_textrecog_data_root,
12 | ann_file='textrecog_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/synthtext.py:
--------------------------------------------------------------------------------
1 | synthtext_textrecog_data_root = 'data/synthtext'
2 |
3 | synthtext_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=synthtext_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | pipeline=None)
8 |
9 | synthtext_sub_textrecog_train = dict(
10 | type='OCRDataset',
11 | data_root=synthtext_textrecog_data_root,
12 | ann_file='subset_textrecog_train.json',
13 | pipeline=None)
14 |
15 | synthtext_an_textrecog_train = dict(
16 | type='OCRDataset',
17 | data_root=synthtext_textrecog_data_root,
18 | ann_file='alphanumeric_textrecog_train.json',
19 | pipeline=None)
20 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/synthtext_add.py:
--------------------------------------------------------------------------------
1 | synthtext_add_textrecog_data_root = 'data/rec/synthtext_add/'
2 |
3 | synthtext_add_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=synthtext_add_textrecog_data_root,
6 | ann_file='train_labels.json',
7 | test_mode=False,
8 | pipeline=None)
9 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
1 | totaltext_textrecog_data_root = 'data/totaltext/'
2 |
3 | totaltext_textrecog_train = dict(
4 | type='OCRDataset',
5 | data_root=totaltext_textrecog_data_root,
6 | ann_file='textrecog_train.json',
7 | test_mode=False,
8 | pipeline=None)
9 |
10 | totaltext_textrecog_test = dict(
11 | type='OCRDataset',
12 | data_root=totaltext_textrecog_data_root,
13 | ann_file='textrecog_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/toy_data.py:
--------------------------------------------------------------------------------
1 | toy_data_root = 'tests/data/rec_toy_dataset/'
2 |
3 | toy_rec_train = dict(
4 | type='OCRDataset',
5 | data_root=toy_data_root,
6 | data_prefix=dict(img_path='imgs/'),
7 | ann_file='labels.json',
8 | pipeline=None,
9 | test_mode=False)
10 |
11 | toy_rec_test = dict(
12 | type='OCRDataset',
13 | data_root=toy_data_root,
14 | data_prefix=dict(img_path='imgs/'),
15 | ann_file='labels.json',
16 | pipeline=None,
17 | test_mode=True)
18 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adadelta_5e.py:
--------------------------------------------------------------------------------
1 | optim_wrapper = dict(
2 | type='OptimWrapper', optimizer=dict(type='Adadelta', lr=1.0))
3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
4 | val_cfg = dict(type='ValLoop')
5 | test_cfg = dict(type='TestLoop')
6 | # learning rate
7 | param_scheduler = [
8 | dict(type='ConstantLR', factor=1.0),
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adam_base.py:
--------------------------------------------------------------------------------
1 | # Note: This schedule config serves as a base config for other schedules.
2 | # Users would have to at least fill in "max_epochs" and "val_interval"
3 | # in order to use this config in their experiments.
4 |
5 | # optimizer
6 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=3e-4))
7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=1)
8 | val_cfg = dict(type='ValLoop')
9 | test_cfg = dict(type='TestLoop')
10 | # learning policy
11 | param_scheduler = [
12 | dict(type='ConstantLR', factor=1.0),
13 | ]
14 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adam_step_5e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3))
3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
4 | val_cfg = dict(type='ValLoop')
5 | test_cfg = dict(type='TestLoop')
6 | # learning policy
7 | param_scheduler = [
8 | dict(type='MultiStepLR', milestones=[3, 4], end=5),
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adamw_cos_6e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(
3 | type='OptimWrapper',
4 | optimizer=dict(
5 | type='AdamW',
6 | lr=4e-4,
7 | betas=(0.9, 0.999),
8 | eps=1e-08,
9 | weight_decay=0.05))
10 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=6, val_interval=1)
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | # learning policy
15 | param_scheduler = [
16 | dict(
17 | type='CosineAnnealingLR',
18 | T_max=6,
19 | eta_min=4e-6,
20 | convert_to_iter_based=True)
21 | ]
22 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/abinet/_base_abinet.py:
--------------------------------------------------------------------------------
1 | _base_ = '_base_abinet-vision.py'
2 |
3 | model = dict(
4 | decoder=dict(
5 | d_model=512,
6 | num_iters=3,
7 | language_decoder=dict(
8 | type='ABILanguageDecoder',
9 | d_model=512,
10 | n_head=8,
11 | d_inner=2048,
12 | n_layers=4,
13 | dropout=0.1,
14 | detach_tokens=True,
15 | use_self_attn=False,
16 | )), )
17 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'nrtr_resnet31-1by16-1by8_6e_st_mj.py',
3 | ]
4 |
5 | model = dict(backbone=dict(last_stage_pool=False))
6 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/sar/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py',
3 | ]
4 |
5 | model = dict(decoder=dict(type='SequentialSARDecoder'))
6 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = ['satrn_shallow_5e_st_mj.py']
2 |
3 | model = dict(
4 | backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=256),
5 | encoder=dict(
6 | type='SATRNEncoder',
7 | n_layers=6,
8 | n_head=8,
9 | d_k=256 // 8,
10 | d_v=256 // 8,
11 | d_model=256,
12 | n_position=100,
13 | d_inner=256 * 4,
14 | dropout=0.1),
15 | decoder=dict(
16 | type='NRTRDecoder',
17 | n_layers=6,
18 | d_embedding=256,
19 | n_head=8,
20 | d_model=256,
21 | d_inner=256 * 4,
22 | d_k=256 // 8,
23 | d_v=256 // 8))
24 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-base_20e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'svtr-tiny_20e_st_mj.py',
3 | ]
4 |
5 | model = dict(
6 | preprocessor=dict(output_image_size=(48, 160), ),
7 | encoder=dict(
8 | img_size=[48, 160],
9 | max_seq_len=40,
10 | out_channels=256,
11 | embed_dims=[128, 256, 384],
12 | depth=[3, 6, 9],
13 | num_heads=[4, 8, 12],
14 | mixer_types=['Local'] * 8 + ['Global'] * 10),
15 | decoder=dict(in_channels=256))
16 |
17 | train_dataloader = dict(batch_size=256, )
18 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-large_20e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'svtr-tiny_20e_st_mj.py',
3 | ]
4 |
5 | model = dict(
6 | preprocessor=dict(output_image_size=(48, 160), ),
7 | encoder=dict(
8 | img_size=[48, 160],
9 | max_seq_len=40,
10 | out_channels=384,
11 | embed_dims=[192, 256, 512],
12 | depth=[3, 9, 9],
13 | num_heads=[6, 8, 16],
14 | mixer_types=['Local'] * 10 + ['Global'] * 11),
15 | decoder=dict(in_channels=384))
16 |
17 | train_dataloader = dict(batch_size=128, )
18 |
19 | optim_wrapper = dict(optimizer=dict(lr=2.5 / (10**4)))
20 |
--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-small_20e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | 'svtr-tiny_20e_st_mj.py',
3 | ]
4 |
5 | model = dict(
6 | encoder=dict(
7 | embed_dims=[96, 192, 256],
8 | depth=[3, 6, 6],
9 | num_heads=[3, 6, 8],
10 | mixer_types=['Local'] * 8 + ['Global'] * 7))
11 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cocotextv2/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker'
5 |
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cocotextv2/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 |
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/ctw1500/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 |
6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
8 |
9 | config_generator = dict(type='TextRecogConfigGenerator')
10 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/ctw1500/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 |
6 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
7 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
8 |
9 | _base_.test_preparer.obtainer.files.append(
10 | dict(
11 | url='https://download.openmmlab.com/mmocr/data/1.x/textspotting/'
12 | 'ctw1500/lexicons.zip',
13 | save_name='ctw1500_lexicons.zip',
14 | md5='168150ca45da161917bf35a20e45b8d6',
15 | content=['lexicons'],
16 | mapping=[['ctw1500_lexicons/lexicons', 'lexicons']]))
17 |
18 | _base_.delete.append('ctw1500_lexicons')
19 | config_generator = dict(type='TextSpottingConfigGenerator')
20 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cute80/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 |
3 | ```text
4 | # timage/img_name text 1 text
5 |
6 | timage/001.jpg RONALDO 1 RONALDO
7 | timage/002.jpg 7 1 7
8 | timage/003.jpg SEACREST 1 SEACREST
9 | timage/004.jpg BEACH 1 BEACH
10 | ```
11 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/funsd/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 |
6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
8 |
9 | config_generator = dict(type='TextRecogConfigGenerator')
10 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/funsd/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
3 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
4 |
5 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
6 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
7 |
8 | config_generator = dict(type='TextSpottingConfigGenerator')
9 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/icdar2013/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Detection**
2 |
3 | ```text
4 | # train split
5 | # x1 y1 x2 y2 "transcript"
6 |
7 | 158 128 411 181 "Footpath"
8 | 443 128 501 169 "To"
9 | 64 200 363 243 "Colchester"
10 |
11 | # test split
12 | # x1, y1, x2, y2, "transcript"
13 |
14 | 38, 43, 920, 215, "Tiredness"
15 | 275, 264, 665, 450, "kills"
16 | 0, 699, 77, 830, "A"
17 | ```
18 |
19 | **Text Recognition**
20 |
21 | ```text
22 | # img_name, "text"
23 |
24 | word_1.png, "PROPER"
25 | word_2.png, "FOOD"
26 | word_3.png, "PRONTO"
27 | ```
28 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/icdar2015/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Detection**
2 |
3 | ```text
4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans
5 |
6 | 377,117,463,117,465,130,378,130,Genaxis Theatre
7 | 493,115,519,115,519,131,493,131,[06]
8 | 374,155,409,155,409,170,374,170,###
9 | ```
10 |
11 | **Text Recognition**
12 |
13 | ```text
14 | # img_name, "text"
15 |
16 | word_1.png, "Genaxis Theatre"
17 | word_2.png, "[06]"
18 | word_3.png, "62-03"
19 | ```
20 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/iiit5k/metafile.yml:
--------------------------------------------------------------------------------
1 | Name: 'IIIT5K'
2 | Paper:
3 | Title: Scene Text Recognition using Higher Order Language Priors
4 | URL: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/Home/mishraBMVC12.pdf
5 | Venue: BMVC
6 | Year: '2012'
7 | BibTeX: '@InProceedings{MishraBMVC12,
8 | author = "Mishra, A. and Alahari, K. and Jawahar, C.~V.",
9 | title = "Scene Text Recognition using Higher Order Language Priors",
10 | booktitle = "BMVC",
11 | year = "2012"}'
12 | Data:
13 | Website: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html
14 | Language:
15 | - English
16 | Scene:
17 | - Natural Scene
18 | Granularity:
19 | - Word
20 | Tasks:
21 | - textrecog
22 | License:
23 | Type: N/A
24 | Link: N/A
25 | Format: .txt
26 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/iiit5k/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 |
3 | ```text
4 | # img_name, "text"
5 |
6 | train/1009_2.png You
7 | train/1017_1.png Rescue
8 | train/1017_2.png mission
9 | ```
10 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/mjsynth/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 |
3 | ```txt
4 | ./3000/7/182_slinking_71711.jpg 71711
5 | ./3000/7/182_REMODELERS_64541.jpg 64541
6 | ```
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Detection, Text Recognition and Text Spotting**
2 |
3 | ```text
4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans
5 |
6 | 72,25,326,25,326,64,72,64,TAN WOON YANN
7 | 50,82,440,82,440,121,50,121,BOOK TA .K(TAMAN DAYA) SDN BND
8 | 205,121,285,121,285,139,205,139,789417-W
9 | ```
10 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
7 |
8 | config_generator = dict(type='TextRecogConfigGenerator')
9 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
6 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
7 |
8 | config_generator = dict(type='TextSpottingConfigGenerator')
9 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/metafile.yml:
--------------------------------------------------------------------------------
1 | Name: 'Street View Text Dataset (SVT)'
2 | Paper:
3 | Title: Word Spotting in the Wild
4 | URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf
5 | Venue: ECCV
6 | Year: '2010'
7 | BibTeX: '@inproceedings{wang2010word,
8 | title={Word spotting in the wild},
9 | author={Wang, Kai and Belongie, Serge},
10 | booktitle={European conference on computer vision},
11 | pages={591--604},
12 | year={2010},
13 | organization={Springer}}'
14 | Data:
15 | Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset
16 | Language:
17 | - English
18 | Scene:
19 | - Natural Scene
20 | Granularity:
21 | - Word
22 | Tasks:
23 | - textdet
24 | - textrecog
25 | - textspotting
26 | License:
27 | Type: N/A
28 | Link: N/A
29 | Format: .xml
30 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
5 |
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 |
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svtp/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 |
3 | ```txt
4 | 13_15_0_par.jpg WYNDHAM
5 | 13_15_1_par.jpg HOTEL
6 | 12_16_0_par.jpg UNITED
7 | ```
8 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/synthtext/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
5 |
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/textocr/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker'
5 |
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/textocr/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.val_preparer.packer.type = 'TextSpottingPacker'
5 |
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/totaltext/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Detection/Spotting**
2 |
3 | ```text
4 | x: [[259 313 389 427 354 302]], y: [[542 462 417 459 507 582]], ornt: [u'c'], transcriptions: [u'PAUL']
5 | x: [[400 478 494 436]], y: [[398 380 448 465]], ornt: [u'#'], transcriptions: [u'#']
6 | ```
7 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/totaltext/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
7 |
8 | config_generator = dict(type='TextRecogConfigGenerator')
9 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textdet.py:
--------------------------------------------------------------------------------
1 | _base_ = ['kie.py']
2 |
3 | _base_.train_preparer.update(
4 | dict(
5 | parser=dict(type='WildreceiptTextDetAnnParser'),
6 | packer=dict(type='TextDetPacker'),
7 | dumper=dict(type='JsonDumper')))
8 | _base_.test_preparer.update(
9 | dict(
10 | parser=dict(type='WildreceiptTextDetAnnParser'),
11 | packer=dict(type='TextDetPacker'),
12 | dumper=dict(type='JsonDumper')))
13 |
14 | config_generator = dict(type='TextDetConfigGenerator')
15 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.update(
4 | dict(
5 | parser=dict(type='WildreceiptTextDetAnnParser'),
6 | packer=dict(type='TextRecogCropPacker'),
7 | dumper=dict(type='JsonDumper')))
8 |
9 | _base_.test_preparer.update(
10 | dict(
11 | parser=dict(type='WildreceiptTextDetAnnParser'),
12 | packer=dict(type='TextRecogCropPacker'),
13 | dumper=dict(type='JsonDumper')))
14 |
15 | config_generator = dict(type='TextRecogConfigGenerator')
16 |
--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 |
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 |
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 |
--------------------------------------------------------------------------------
/mmocr/demo/demo_densetext_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_densetext_det.jpg
--------------------------------------------------------------------------------
/mmocr/demo/demo_kie.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_kie.jpeg
--------------------------------------------------------------------------------
/mmocr/demo/demo_text_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_det.jpg
--------------------------------------------------------------------------------
/mmocr/demo/demo_text_ocr.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_ocr.jpg
--------------------------------------------------------------------------------
/mmocr/demo/demo_text_recog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_recog.jpg
--------------------------------------------------------------------------------
/mmocr/demo/resources/demo_kie_pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/demo_kie_pred.png
--------------------------------------------------------------------------------
/mmocr/demo/resources/det_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/det_vis.png
--------------------------------------------------------------------------------
/mmocr/demo/resources/kie_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/kie_vis.png
--------------------------------------------------------------------------------
/mmocr/demo/resources/log_analysis_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/log_analysis_demo.png
--------------------------------------------------------------------------------
/mmocr/demo/resources/rec_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/rec_vis.png
--------------------------------------------------------------------------------
/mmocr/dicts/english_digits_symbols.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 | A
38 | B
39 | C
40 | D
41 | E
42 | F
43 | G
44 | H
45 | I
46 | J
47 | K
48 | L
49 | M
50 | N
51 | O
52 | P
53 | Q
54 | R
55 | S
56 | T
57 | U
58 | V
59 | W
60 | X
61 | Y
62 | Z
63 | !
64 | "
65 | #
66 | $
67 | %
68 | &
69 | '
70 | (
71 | )
72 | *
73 | +
74 | ,
75 | -
76 | .
77 | /
78 | :
79 | ;
80 | <
81 | =
82 | >
83 | ?
84 | @
85 | [
86 | \
87 | ]
88 | _
89 | `
90 | ~
--------------------------------------------------------------------------------
/mmocr/dicts/english_digits_symbols_space.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 | A
38 | B
39 | C
40 | D
41 | E
42 | F
43 | G
44 | H
45 | I
46 | J
47 | K
48 | L
49 | M
50 | N
51 | O
52 | P
53 | Q
54 | R
55 | S
56 | T
57 | U
58 | V
59 | W
60 | X
61 | Y
62 | Z
63 | !
64 | "
65 | #
66 | $
67 | %
68 | &
69 | '
70 | (
71 | )
72 | *
73 | +
74 | ,
75 | -
76 | .
77 | /
78 | :
79 | ;
80 | <
81 | =
82 | >
83 | ?
84 | @
85 | [
86 | \
87 | ]
88 | _
89 | `
90 | ~
91 |
--------------------------------------------------------------------------------
/mmocr/dicts/lower_english_digits.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
--------------------------------------------------------------------------------
/mmocr/dicts/lower_english_digits_space.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 |
--------------------------------------------------------------------------------
/mmocr/dicts/sdmgr_dict.txt:
--------------------------------------------------------------------------------
1 | /
2 | \
3 | .
4 | $
5 | £
6 | €
7 | ¥
8 | :
9 | -
10 | ,
11 | *
12 | #
13 | (
14 | )
15 | %
16 | @
17 | !
18 | '
19 | &
20 | =
21 | >
22 | +
23 | "
24 | ×
25 | ?
26 | <
27 | [
28 | ]
29 | _
30 | 0
31 | 1
32 | 2
33 | 3
34 | 4
35 | 5
36 | 6
37 | 7
38 | 8
39 | 9
40 | a
41 | b
42 | c
43 | d
44 | e
45 | f
46 | g
47 | h
48 | i
49 | j
50 | k
51 | l
52 | m
53 | n
54 | o
55 | p
56 | q
57 | r
58 | s
59 | t
60 | u
61 | v
62 | w
63 | x
64 | y
65 | z
66 | A
67 | B
68 | C
69 | D
70 | E
71 | F
72 | G
73 | H
74 | I
75 | J
76 | K
77 | L
78 | M
79 | N
80 | O
81 | P
82 | Q
83 | R
84 | S
85 | T
86 | U
87 | V
88 | W
89 | X
90 | Y
91 | Z
--------------------------------------------------------------------------------
/mmocr/docker/serve/config.properties:
--------------------------------------------------------------------------------
1 | inference_address=http://0.0.0.0:8080
2 | management_address=http://0.0.0.0:8081
3 | metrics_address=http://0.0.0.0:8082
4 | model_store=/home/model-server/model-store
5 | load_models=all
6 |
--------------------------------------------------------------------------------
/mmocr/docker/serve/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | if [[ "$1" = "serve" ]]; then
5 | shift 1
6 | torchserve --start --ts-config /home/model-server/config.properties
7 | else
8 | eval "$@"
9 | fi
10 |
11 | # prevent docker exit
12 | tail -f /dev/null
13 |
--------------------------------------------------------------------------------
/mmocr/docs/en/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/mmocr/docs/en/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../images/mmocr.png");
3 | background-size: 110px 40px;
4 | height: 40px;
5 | width: 110px;
6 | }
7 |
--------------------------------------------------------------------------------
/mmocr/docs/en/_static/images/mmocr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/en/_static/images/mmocr.png
--------------------------------------------------------------------------------
/mmocr/docs/en/_static/js/collapsed.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = ['Migration Guides', 'API Reference']
2 |
--------------------------------------------------------------------------------
/mmocr/docs/en/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 |
11 |
12 | ..
13 | autogenerated from source/_templates/classtemplate.rst
14 | note it does not have :inherited-members:
15 |
--------------------------------------------------------------------------------
/mmocr/docs/en/api/apis.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.apis
5 | ===================================
6 |
7 | .. contents:: mmocr.apis
8 | :depth: 2
9 | :local:
10 | :backlinks: top
11 |
12 | .. currentmodule:: mmocr.apis.inferencers
13 |
14 | Inferencers
15 | ---------------------------------------------
16 |
17 | .. autosummary::
18 | :toctree: generated
19 | :nosignatures:
20 | :template: classtemplate.rst
21 |
22 | MMOCRInferencer
23 | TextDetInferencer
24 | TextRecInferencer
25 | TextSpotInferencer
26 | KIEInferencer
27 |
--------------------------------------------------------------------------------
/mmocr/docs/en/api/engine.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.engine
5 | ===================================
6 |
7 | .. contents:: mmocr.engine
8 | :depth: 2
9 | :local:
10 | :backlinks: top
11 |
12 | .. currentmodule:: mmocr.engine.hooks
13 |
14 | Hooks
15 | ---------------------------------------------
16 |
17 | .. autosummary::
18 | :toctree: generated
19 | :nosignatures:
20 | :template: classtemplate.rst
21 |
22 | VisualizationHook
23 |
--------------------------------------------------------------------------------
/mmocr/docs/en/api/structures.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.structures
5 | ===================================
6 |
7 | .. currentmodule:: mmocr.structures
8 | .. autosummary::
9 | :toctree: generated
10 | :nosignatures:
11 | :template: classtemplate.rst
12 |
13 | TextDetDataSample
14 | TextRecogDataSample
15 | KIEDataSample
16 |
--------------------------------------------------------------------------------
/mmocr/docs/en/api/visualization.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.visualization
5 | ===================================
6 |
7 | .. currentmodule:: mmocr.visualization
8 |
9 | .. autosummary::
10 | :toctree: generated
11 | :nosignatures:
12 | :template: classtemplate.rst
13 |
14 | BaseLocalVisualizer
15 | TextDetLocalVisualizer
16 | TextRecogLocalVisualizer
17 | TextSpottingLocalVisualizer
18 | KIELocalVisualizer
19 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/convention.md:
--------------------------------------------------------------------------------
1 | # Convention\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/data_flow.md:
--------------------------------------------------------------------------------
1 | # Data Flow\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/engine.md:
--------------------------------------------------------------------------------
1 | # Engine\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/models.md:
--------------------------------------------------------------------------------
1 | # Models\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/overview.md:
--------------------------------------------------------------------------------
1 | # Overview & Features\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/visualizers.md:
--------------------------------------------------------------------------------
1 | # Visualizers\[coming soon\]
2 |
3 | Coming Soon!
4 |
--------------------------------------------------------------------------------
/mmocr/docs/en/docutils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 |
--------------------------------------------------------------------------------
/mmocr/docs/en/migration/model.md:
--------------------------------------------------------------------------------
1 | # Pretrained Model Migration
2 |
3 | Due to the extensive refactoring and fixing of the model structure in the new version, MMOCR 1.x does not support load weights trained by the old version. We have updated the pre-training weights and logs of all models on our website.
4 |
5 | In addition, we are working on the development of a weight migration tool for text detection tasks and plan to release it in the near future. Since the text recognition and key information extraction models are too much modified and the migration is lossy, we do not plan to support them accordingly for the time being. If you have specific requirements, please feel free to raise an [Issue](https://github.com/open-mmlab/mmocr/issues).
6 |
--------------------------------------------------------------------------------
/mmocr/docs/en/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 |
--------------------------------------------------------------------------------
/mmocr/docs/en/switch_language.md:
--------------------------------------------------------------------------------
1 | ## English
2 |
3 | ## 简体中文
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../images/mmocr.png");
3 | background-size: 110px 40px;
4 | height: 40px;
5 | width: 110px;
6 | }
7 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/images/mmocr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/zh_cn/_static/images/mmocr.png
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/js/collapsed.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = ['MMOCR 0.x 迁移指南', 'API 文档']
2 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 |
11 |
12 | ..
13 | autogenerated from source/_templates/classtemplate.rst
14 | note it does not have :inherited-members:
15 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/apis.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.apis
5 | ===================================
6 |
7 | .. contents:: mmocr.apis
8 | :depth: 2
9 | :local:
10 | :backlinks: top
11 |
12 | .. currentmodule:: mmocr.apis.inferencers
13 |
14 | Inferencers
15 | ---------------------------------------------
16 |
17 | .. autosummary::
18 | :toctree: generated
19 | :nosignatures:
20 | :template: classtemplate.rst
21 |
22 | MMOCRInferencer
23 | TextDetInferencer
24 | TextRecInferencer
25 | TextSpotInferencer
26 | KIEInferencer
27 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/engine.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.engine
5 | ===================================
6 |
7 | .. contents:: mmocr.engine
8 | :depth: 2
9 | :local:
10 | :backlinks: top
11 |
12 | .. currentmodule:: mmocr.engine.hooks
13 |
14 | Hooks
15 | ---------------------------------------------
16 |
17 | .. autosummary::
18 | :toctree: generated
19 | :nosignatures:
20 | :template: classtemplate.rst
21 |
22 | VisualizationHook
23 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/structures.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.structures
5 | ===================================
6 |
7 | .. currentmodule:: mmocr.structures
8 | .. autosummary::
9 | :toctree: generated
10 | :nosignatures:
11 | :template: classtemplate.rst
12 |
13 | TextDetDataSample
14 | TextRecogDataSample
15 | KIEDataSample
16 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/visualization.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | mmocr.visualization
5 | ===================================
6 |
7 | .. currentmodule:: mmocr.visualization
8 |
9 | .. autosummary::
10 | :toctree: generated
11 | :nosignatures:
12 | :template: classtemplate.rst
13 |
14 | BaseLocalVisualizer
15 | TextDetLocalVisualizer
16 | TextRecogLocalVisualizer
17 | TextSpottingLocalVisualizer
18 | KIELocalVisualizer
19 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/convention.md:
--------------------------------------------------------------------------------
1 | # 开发默认约定\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/data_flow.md:
--------------------------------------------------------------------------------
1 | # 数据流\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/engine.md:
--------------------------------------------------------------------------------
1 | # 引擎\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/models.md:
--------------------------------------------------------------------------------
1 | # 模型\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/overview.md:
--------------------------------------------------------------------------------
1 | # 设计理念与特性\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/visualizers.md:
--------------------------------------------------------------------------------
1 | # 可视化组件\[待更新\]
2 |
3 | 待更新
4 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/cp_origin_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation
4 |
5 | for filename in $(find ../en/ -name '*.md' -printf "%P\n");
6 | do
7 | mkdir -p $(dirname $filename)
8 | cp -n ../en/$filename ./$filename
9 | done
10 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/docutils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/migration/model.md:
--------------------------------------------------------------------------------
1 | # 预训练模型迁移指南
2 |
3 | 由于在新版本中我们对模型的结构进行了大量的重构和修复,MMOCR 1.x 并不能直接读入旧版的预训练权重。我们在网站上同步更新了所有模型的预训练权重和log,供有需要的用户使用。
4 |
5 | 此外,我们正在进行针对文本检测任务的权重迁移工具的开发,并计划于近期版本内发布。由于文本识别和关键信息提取模型改动过大,且迁移是有损的,我们暂时不计划作相应支持。如果您有具体的需求,欢迎通过 [Issue](https://github.com/open-mmlab/mmocr/issues) 向我们提问。
6 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/notes/branches.md:
--------------------------------------------------------------------------------
1 | # 分支
2 |
3 | 本文档旨在全面解释 MMOCR 中每个分支的目的和功能。
4 |
5 | ## 分支概述
6 |
7 | ### 1. `main`
8 |
9 | `main` 分支是 MMOCR 项目的默认分支。它包含了 MMOCR 的最新稳定版本,目前包含了 MMOCR 1.x(例如 v1.0.0)的代码。`main` 分支确保用户能够使用最新和最可靠的软件版本。
10 |
11 | ### 2. `dev-1.x`
12 |
13 | `dev-1.x` 分支用于开发 MMOCR 的下一个版本。此分支将在发版前进行依赖性测试,通过的提交将会合成到新版本中,并被发布到 `main` 分支。通过设置单独的开发分支,项目可以在不影响 `main` 分支稳定性的情况下继续发展。**所有 PR 应合并到 `dev-1.x` 分支。**
14 |
15 | ### 3. `0.x`
16 |
17 | `0.x` 分支用作 MMOCR 0.x(例如 v0.6.3)的存档。此分支将不再积极接受更新或改进,但它仍可作为历史参考,或供尚未升级到 MMOCR 1.x 的用户使用。
18 |
19 | ### 4. `1.x`
20 |
21 | 它是 `main` 分支的别名,旨在实现从兼容性时期平稳过渡。它将在 2023 年的年中删除。
22 |
23 | ```{note}
24 | 分支映射在 2023.04.06 发生了变化。有关旧分支映射和迁移指南,请参阅[分支迁移指南](../migration/branches.md)。
25 | ```
26 |
--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/switch_language.md:
--------------------------------------------------------------------------------
1 | ## English
2 |
3 | ## 简体中文
4 |
--------------------------------------------------------------------------------
/mmocr/mmocr/apis/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .inferencers import * # NOQA
3 |
--------------------------------------------------------------------------------
/mmocr/mmocr/apis/inferencers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .kie_inferencer import KIEInferencer
3 | from .mmocr_inferencer import MMOCRInferencer
4 | from .textdet_inferencer import TextDetInferencer
5 | from .textrec_inferencer import TextRecInferencer
6 | from .textspot_inferencer import TextSpotInferencer
7 |
8 | __all__ = [
9 | 'TextDetInferencer', 'TextRecInferencer', 'KIEInferencer',
10 | 'MMOCRInferencer', 'TextSpotInferencer'
11 | ]
12 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset_wrapper import ConcatDataset
3 | from .icdar_dataset import IcdarDataset
4 | from .ocr_dataset import OCRDataset
5 | from .recog_lmdb_dataset import RecogLMDBDataset
6 | from .recog_text_dataset import RecogTextDataset
7 | from .samplers import * # NOQA
8 | from .transforms import * # NOQA
9 | from .wildreceipt_dataset import WildReceiptDataset
10 |
11 | __all__ = [
12 | 'IcdarDataset', 'OCRDataset', 'RecogLMDBDataset', 'RecogTextDataset',
13 | 'WildReceiptDataset', 'ConcatDataset'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .config_generators import * # noqa
3 | from .data_preparer import DatasetPreparer
4 | from .dumpers import * # noqa
5 | from .gatherers import * # noqa
6 | from .obtainers import * # noqa
7 | from .packers import * # noqa
8 | from .parsers import * # noqa
9 |
10 | __all__ = ['DatasetPreparer']
11 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/config_generators/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseDatasetConfigGenerator
3 | from .textdet_config_generator import TextDetConfigGenerator
4 | from .textrecog_config_generator import TextRecogConfigGenerator
5 | from .textspotting_config_generator import TextSpottingConfigGenerator
6 |
7 | __all__ = [
8 | 'BaseDatasetConfigGenerator', 'TextDetConfigGenerator',
9 | 'TextRecogConfigGenerator', 'TextSpottingConfigGenerator'
10 | ]
11 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseDumper
3 | from .json_dumper import JsonDumper
4 | from .lmdb_dumper import TextRecogLMDBDumper
5 | from .wild_receipt_openset_dumper import WildreceiptOpensetDumper
6 |
7 | __all__ = [
8 | 'BaseDumper', 'JsonDumper', 'WildreceiptOpensetDumper',
9 | 'TextRecogLMDBDumper'
10 | ]
11 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/json_dumper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os.path as osp
3 | from typing import Dict
4 |
5 | import mmengine
6 |
7 | from mmocr.registry import DATA_DUMPERS
8 | from .base import BaseDumper
9 |
10 |
11 | @DATA_DUMPERS.register_module()
12 | class JsonDumper(BaseDumper):
13 | """Dumper for json file."""
14 |
15 | def dump(self, data: Dict) -> None:
16 | """Dump data to json file.
17 |
18 | Args:
19 | data (Dict): Data to be dumped.
20 | """
21 |
22 | filename = f'{self.task}_{self.split}.json'
23 | dst_file = osp.join(self.data_root, filename)
24 | mmengine.dump(data, dst_file)
25 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/wild_receipt_openset_dumper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os.path as osp
3 | from typing import List
4 |
5 | from mmocr.registry import DATA_DUMPERS
6 | from mmocr.utils import list_to_file
7 | from .base import BaseDumper
8 |
9 |
10 | @DATA_DUMPERS.register_module()
11 | class WildreceiptOpensetDumper(BaseDumper):
12 |
13 | def dump(self, data: List):
14 | """Dump data to txt file.
15 |
16 | Args:
17 | data (List): Data to be dumped.
18 | """
19 |
20 | filename = f'openset_{self.split}.txt'
21 | dst_file = osp.join(self.data_root, filename)
22 | list_to_file(dst_file, data)
23 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/gatherers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 | from .base import BaseGatherer
4 | from .mono_gatherer import MonoGatherer
5 | from .naf_gatherer import NAFGatherer
6 | from .pair_gatherer import PairGatherer
7 |
8 | __all__ = ['BaseGatherer', 'MonoGatherer', 'PairGatherer', 'NAFGatherer']
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/obtainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .naive_data_obtainer import NaiveDataObtainer
3 |
4 | __all__ = ['NaiveDataObtainer']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/packers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BasePacker
3 | from .textdet_packer import TextDetPacker
4 | from .textrecog_packer import TextRecogCropPacker, TextRecogPacker
5 | from .textspotting_packer import TextSpottingPacker
6 | from .wildreceipt_packer import WildReceiptPacker
7 |
8 | __all__ = [
9 | 'BasePacker', 'TextDetPacker', 'TextRecogPacker', 'TextRecogCropPacker',
10 | 'TextSpottingPacker', 'WildReceiptPacker'
11 | ]
12 |
--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .batch_aug import BatchAugSampler
3 |
4 | __all__ = ['BatchAugSampler']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .hooks import * # NOQA
3 |
--------------------------------------------------------------------------------
/mmocr/mmocr/engine/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .visualization_hook import VisualizationHook
3 |
4 | __all__ = ['VisualizationHook']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .evaluator import * # NOQA
3 | from .metrics import * # NOQA
4 |
--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .multi_datasets_evaluator import MultiDatasetsEvaluator
3 |
4 | __all__ = ['MultiDatasetsEvaluator']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/functional/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .hmean import compute_hmean
3 |
4 | __all__ = ['compute_hmean']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .f_metric import F1Metric
3 | from .hmean_iou_metric import HmeanIOUMetric
4 | from .recog_metric import CharMetric, OneMinusNEDMetric, WordMetric
5 |
6 | __all__ = [
7 | 'WordMetric', 'CharMetric', 'OneMinusNEDMetric', 'HmeanIOUMetric',
8 | 'F1Metric'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import * # NOQA
3 | from .kie import * # NOQA
4 | from .textdet import * # NOQA
5 | from .textrecog import * # NOQA
6 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .backbones import * # NOQA
3 | from .dictionary import * # NOQA
4 | from .layers import * # NOQA
5 | from .losses import * # NOQA
6 | from .modules import * # NOQA
7 | from .plugins import * # NOQA
8 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .clip_resnet import CLIPResNet
3 | from .unet import UNet
4 |
5 | __all__ = ['UNet', 'CLIPResNet']
6 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/dictionary/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 | from .dictionary import Dictionary
4 |
5 | __all__ = ['Dictionary']
6 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .transformer_layers import TFDecoderLayer, TFEncoderLayer
3 |
4 | __all__ = ['TFEncoderLayer', 'TFDecoderLayer']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .bce_loss import (MaskedBalancedBCELoss, MaskedBalancedBCEWithLogitsLoss,
3 | MaskedBCELoss, MaskedBCEWithLogitsLoss)
4 | from .ce_loss import CrossEntropyLoss
5 | from .dice_loss import MaskedDiceLoss, MaskedSquareDiceLoss
6 | from .l1_loss import MaskedSmoothL1Loss, SmoothL1Loss
7 |
8 | __all__ = [
9 | 'MaskedBalancedBCEWithLogitsLoss', 'MaskedDiceLoss', 'MaskedSmoothL1Loss',
10 | 'MaskedSquareDiceLoss', 'MaskedBCEWithLogitsLoss', 'SmoothL1Loss',
11 | 'CrossEntropyLoss', 'MaskedBalancedBCELoss', 'MaskedBCELoss'
12 | ]
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/losses/ce_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch.nn as nn
3 |
4 | from mmocr.registry import MODELS
5 |
6 |
7 | @MODELS.register_module()
8 | class CrossEntropyLoss(nn.CrossEntropyLoss):
9 | """Cross entropy loss."""
10 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .transformer_module import (MultiHeadAttention, PositionalEncoding,
3 | PositionwiseFeedForward,
4 | ScaledDotProductAttention)
5 |
6 | __all__ = [
7 | 'ScaledDotProductAttention', 'MultiHeadAttention',
8 | 'PositionwiseFeedForward', 'PositionalEncoding'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import AvgPool2d
3 |
4 | __all__ = ['AvgPool2d']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .extractors import * # NOQA
3 | from .heads import * # NOQA
4 | from .module_losses import * # NOQA
5 | from .postprocessors import * # NOQA
6 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/extractors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr import SDMGR
3 |
4 | __all__ = ['SDMGR']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_head import SDMGRHead
3 |
4 | __all__ = ['SDMGRHead']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/module_losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_module_loss import SDMGRModuleLoss
3 |
4 | __all__ = ['SDMGRModuleLoss']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/postprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_postprocessor import SDMGRPostProcessor
3 |
4 | __all__ = ['SDMGRPostProcessor']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessors import * # NOQA
3 | from .detectors import * # NOQA
4 | from .heads import * # NOQA
5 | from .module_losses import * # NOQA
6 | from .necks import * # NOQA
7 | from .postprocessors import * # NOQA
8 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessor import TextDetDataPreprocessor
3 |
4 | __all__ = ['TextDetDataPreprocessor']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dbnet import DBNet
3 | from .drrg import DRRG
4 | from .fcenet import FCENet
5 | from .mmdet_wrapper import MMDetWrapper
6 | from .panet import PANet
7 | from .psenet import PSENet
8 | from .single_stage_text_detector import SingleStageTextDetector
9 | from .textsnake import TextSnake
10 |
11 | __all__ = [
12 | 'SingleStageTextDetector', 'DBNet', 'PANet', 'PSENet', 'TextSnake',
13 | 'FCENet', 'DRRG', 'MMDetWrapper'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/dbnet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class DBNet(SingleStageTextDetector):
8 | """The class for implementing DBNet text detector: Real-time Scene Text
9 | Detection with Differentiable Binarization.
10 |
11 | [https://arxiv.org/abs/1911.08947].
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/drrg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class DRRG(SingleStageTextDetector):
8 | """The class for implementing DRRG text detector. Deep Relational Reasoning
9 | Graph Network for Arbitrary Shape Text Detection.
10 |
11 | [https://arxiv.org/abs/2003.07493]
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/fcenet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class FCENet(SingleStageTextDetector):
8 | """The class for implementing FCENet text detector
9 | FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text
10 | Detection
11 |
12 | [https://arxiv.org/abs/2104.10442]
13 | """
14 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/panet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class PANet(SingleStageTextDetector):
8 | """The class for implementing PANet text detector:
9 |
10 | Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel
11 | Aggregation Network [https://arxiv.org/abs/1908.05900].
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/psenet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class PSENet(SingleStageTextDetector):
8 | """The class for implementing PSENet text detector: Shape Robust Text
9 | Detection with Progressive Scale Expansion Network.
10 |
11 | [https://arxiv.org/abs/1806.02559].
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/textsnake.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .single_stage_text_detector import SingleStageTextDetector
4 |
5 |
6 | @MODELS.register_module()
7 | class TextSnake(SingleStageTextDetector):
8 | """The class for implementing TextSnake text detector: TextSnake: A
9 | Flexible Representation for Detecting Text of Arbitrary Shapes.
10 |
11 | [https://arxiv.org/abs/1807.01544]
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseTextDetHead
3 | from .db_head import DBHead
4 | from .drrg_head import DRRGHead
5 | from .fce_head import FCEHead
6 | from .pan_head import PANHead
7 | from .pse_head import PSEHead
8 | from .textsnake_head import TextSnakeHead
9 |
10 | __all__ = [
11 | 'PSEHead', 'PANHead', 'DBHead', 'FCEHead', 'TextSnakeHead', 'DRRGHead',
12 | 'BaseTextDetHead'
13 | ]
14 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/module_losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .db_module_loss import DBModuleLoss
3 | from .drrg_module_loss import DRRGModuleLoss
4 | from .fce_module_loss import FCEModuleLoss
5 | from .pan_module_loss import PANModuleLoss
6 | from .pse_module_loss import PSEModuleLoss
7 | from .seg_based_module_loss import SegBasedModuleLoss
8 | from .textsnake_module_loss import TextSnakeModuleLoss
9 |
10 | __all__ = [
11 | 'PANModuleLoss', 'PSEModuleLoss', 'DBModuleLoss', 'TextSnakeModuleLoss',
12 | 'FCEModuleLoss', 'DRRGModuleLoss', 'SegBasedModuleLoss'
13 | ]
14 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .fpem_ffm import FPEM_FFM
3 | from .fpn_cat import FPNC
4 | from .fpn_unet import FPN_UNet
5 | from .fpnf import FPNF
6 |
7 | __all__ = ['FPEM_FFM', 'FPNF', 'FPNC', 'FPN_UNet']
8 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/postprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseTextDetPostProcessor
3 | from .db_postprocessor import DBPostprocessor
4 | from .drrg_postprocessor import DRRGPostprocessor
5 | from .fce_postprocessor import FCEPostprocessor
6 | from .pan_postprocessor import PANPostprocessor
7 | from .pse_postprocessor import PSEPostprocessor
8 | from .textsnake_postprocessor import TextSnakePostprocessor
9 |
10 | __all__ = [
11 | 'PSEPostprocessor', 'PANPostprocessor', 'DBPostprocessor',
12 | 'DRRGPostprocessor', 'FCEPostprocessor', 'TextSnakePostprocessor',
13 | 'BaseTextDetPostProcessor'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .backbones import * # NOQA
3 | from .data_preprocessors import * # NOQA
4 | from .decoders import * # NOQA
5 | from .encoders import * # NOQA
6 | from .layers import * # NOQA
7 | from .module_losses import * # NOQA
8 | from .plugins import * # NOQA
9 | from .postprocessors import * # NOQA
10 | from .preprocessors import * # NOQA
11 | from .recognizers import * # NOQA
12 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mini_vgg import MiniVGG
3 | from .mobilenet_v2 import MobileNetV2
4 | from .nrtr_modality_transformer import NRTRModalityTransform
5 | from .resnet import ResNet
6 | from .resnet31_ocr import ResNet31OCR
7 | from .resnet_abi import ResNetABI
8 | from .shallow_cnn import ShallowCNN
9 |
10 | __all__ = [
11 | 'ResNet31OCR', 'MiniVGG', 'NRTRModalityTransform', 'ShallowCNN',
12 | 'ResNetABI', 'ResNet', 'MobileNetV2'
13 | ]
14 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessor import TextRecogDataPreprocessor
3 |
4 | __all__ = ['TextRecogDataPreprocessor']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .abi_encoder import ABIEncoder
3 | from .aster_encoder import ASTEREncoder
4 | from .base import BaseEncoder
5 | from .channel_reduction_encoder import ChannelReductionEncoder
6 | from .nrtr_encoder import NRTREncoder
7 | from .sar_encoder import SAREncoder
8 | from .satrn_encoder import SATRNEncoder
9 | from .svtr_encoder import SVTREncoder
10 |
11 | __all__ = [
12 | 'SAREncoder', 'NRTREncoder', 'BaseEncoder', 'ChannelReductionEncoder',
13 | 'SATRNEncoder', 'ABIEncoder', 'SVTREncoder', 'ASTEREncoder'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/encoders/base.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.model import BaseModule
3 |
4 | from mmocr.registry import MODELS
5 |
6 |
7 | @MODELS.register_module()
8 | class BaseEncoder(BaseModule):
9 | """Base Encoder class for text recognition."""
10 |
11 | def forward(self, feat, **kwargs):
12 | return feat
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .conv_layer import BasicBlock, Bottleneck
3 | from .dot_product_attention_layer import DotProductAttentionLayer
4 | from .lstm_layer import BidirectionalLSTM
5 | from .position_aware_layer import PositionAwareLayer
6 | from .robust_scanner_fusion_layer import RobustScannerFusionLayer
7 | from .satrn_layers import Adaptive2DPositionalEncoding, SATRNEncoderLayer
8 |
9 | __all__ = [
10 | 'BidirectionalLSTM', 'Adaptive2DPositionalEncoding', 'BasicBlock',
11 | 'Bottleneck', 'RobustScannerFusionLayer', 'DotProductAttentionLayer',
12 | 'PositionAwareLayer', 'SATRNEncoderLayer'
13 | ]
14 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/lstm_layer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch.nn as nn
3 |
4 |
5 | class BidirectionalLSTM(nn.Module):
6 |
7 | def __init__(self, nIn, nHidden, nOut):
8 | super().__init__()
9 |
10 | self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
11 | self.embedding = nn.Linear(nHidden * 2, nOut)
12 |
13 | def forward(self, input):
14 | recurrent, _ = self.rnn(input)
15 | T, b, h = recurrent.size()
16 | t_rec = recurrent.view(T * b, h)
17 |
18 | output = self.embedding(t_rec) # [T * b, nOut]
19 | output = output.view(T, b, -1)
20 |
21 | return output
22 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/robust_scanner_fusion_layer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.nn as nn
4 | from mmengine.model import BaseModule
5 |
6 |
7 | class RobustScannerFusionLayer(BaseModule):
8 |
9 | def __init__(self, dim_model, dim=-1, init_cfg=None):
10 | super().__init__(init_cfg=init_cfg)
11 |
12 | self.dim_model = dim_model
13 | self.dim = dim
14 |
15 | self.linear_layer = nn.Linear(dim_model * 2, dim_model * 2)
16 | self.glu_layer = nn.GLU(dim=dim)
17 |
18 | def forward(self, x0, x1):
19 | assert x0.size() == x1.size()
20 | fusion_input = torch.cat([x0, x1], self.dim)
21 | output = self.linear_layer(fusion_input)
22 | output = self.glu_layer(output)
23 |
24 | return output
25 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/module_losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .abi_module_loss import ABIModuleLoss
3 | from .base import BaseTextRecogModuleLoss
4 | from .ce_module_loss import CEModuleLoss
5 | from .ctc_module_loss import CTCModuleLoss
6 |
7 | __all__ = [
8 | 'BaseTextRecogModuleLoss', 'CEModuleLoss', 'CTCModuleLoss', 'ABIModuleLoss'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import GCAModule, Maxpool2d
3 |
4 | __all__ = ['Maxpool2d', 'GCAModule']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/postprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .attn_postprocessor import AttentionPostprocessor
3 | from .base import BaseTextRecogPostprocessor
4 | from .ctc_postprocessor import CTCPostProcessor
5 |
6 | __all__ = [
7 | 'BaseTextRecogPostprocessor', 'AttentionPostprocessor', 'CTCPostProcessor'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .tps_preprocessor import STN, TPStransform
3 |
4 | __all__ = ['TPStransform', 'STN']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/preprocessors/base.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.model import BaseModule
3 |
4 | from mmocr.registry import MODELS
5 |
6 |
7 | @MODELS.register_module()
8 | class BasePreprocessor(BaseModule):
9 | """Base Preprocessor class for text recognition."""
10 |
11 | def forward(self, x, **kwargs):
12 | return x
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .abinet import ABINet
3 | from .aster import ASTER
4 | from .base import BaseRecognizer
5 | from .crnn import CRNN
6 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
7 | from .encoder_decoder_recognizer_tta import EncoderDecoderRecognizerTTAModel
8 | from .master import MASTER
9 | from .nrtr import NRTR
10 | from .robust_scanner import RobustScanner
11 | from .sar import SARNet
12 | from .satrn import SATRN
13 | from .svtr import SVTR
14 |
15 | __all__ = [
16 | 'BaseRecognizer', 'EncoderDecoderRecognizer', 'CRNN', 'SARNet', 'NRTR',
17 | 'RobustScanner', 'SATRN', 'ABINet', 'MASTER', 'SVTR', 'ASTER',
18 | 'EncoderDecoderRecognizerTTAModel'
19 | ]
20 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/abinet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class ABINet(EncoderDecoderRecognizer):
8 | """Implementation of `Read Like Humans: Autonomous, Bidirectional and
9 | Iterative LanguageModeling for Scene Text Recognition.
10 |
11 | `_
12 | """
13 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/aster.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class ASTER(EncoderDecoderRecognizer):
8 | """Implement `ASTER: An Attentional Scene Text Recognizer with Flexible
9 | Rectification.
10 |
11 | `_"""
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/nrtr.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class NRTR(EncoderDecoderRecognizer):
8 | """Implementation of `NRTR `_"""
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/robust_scanner.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class RobustScanner(EncoderDecoderRecognizer):
8 | """Implementation of `RobustScanner.
9 |
10 |
11 | """
12 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/sar.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class SARNet(EncoderDecoderRecognizer):
8 | """Implementation of `SAR `_"""
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/satrn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class SATRN(EncoderDecoderRecognizer):
8 | """Implementation of `SATRN `_"""
9 |
--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/svtr.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 |
5 |
6 | @MODELS.register_module()
7 | class SVTR(EncoderDecoderRecognizer):
8 | """A PyTorch implementation of : `SVTR: Scene Text Recognition with a
9 | Single Visual Model `_"""
10 |
--------------------------------------------------------------------------------
/mmocr/mmocr/structures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .kie_data_sample import KIEDataSample
3 | from .textdet_data_sample import TextDetDataSample
4 | from .textrecog_data_sample import TextRecogDataSample
5 | from .textspotting_data_sample import TextSpottingDataSample
6 |
7 | __all__ = [
8 | 'TextDetDataSample', 'TextRecogDataSample', 'KIEDataSample',
9 | 'TextSpottingDataSample'
10 | ]
11 |
--------------------------------------------------------------------------------
/mmocr/mmocr/testing/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data import create_dummy_dict_file, create_dummy_textdet_inputs
3 |
4 | __all__ = ['create_dummy_dict_file', 'create_dummy_textdet_inputs']
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/utils/collect_env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.utils import get_git_hash
3 | from mmengine.utils.dl_utils import collect_env as collect_base_env
4 |
5 | import mmocr
6 |
7 |
8 | def collect_env():
9 | """Collect the information of the running environments."""
10 | env_info = collect_base_env()
11 | env_info['MMOCR'] = mmocr.__version__ + '+' + get_git_hash()[:7]
12 | return env_info
13 |
14 |
15 | if __name__ == '__main__':
16 | for name, val in collect_env().items():
17 | print(f'{name}: {val}')
18 |
--------------------------------------------------------------------------------
/mmocr/mmocr/version.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 |
3 | __version__ = '1.0.0'
4 | short_version = __version__
5 |
--------------------------------------------------------------------------------
/mmocr/mmocr/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base_visualizer import BaseLocalVisualizer
3 | from .kie_visualizer import KIELocalVisualizer
4 | from .textdet_visualizer import TextDetLocalVisualizer
5 | from .textrecog_visualizer import TextRecogLocalVisualizer
6 | from .textspotting_visualizer import TextSpottingLocalVisualizer
7 |
8 | __all__ = [
9 | 'BaseLocalVisualizer', 'KIELocalVisualizer', 'TextDetLocalVisualizer',
10 | 'TextRecogLocalVisualizer', 'TextSpottingLocalVisualizer'
11 | ]
12 |
--------------------------------------------------------------------------------
/mmocr/model-index.yml:
--------------------------------------------------------------------------------
1 | Import:
2 | - configs/textdet/dbnet/metafile.yml
3 | - configs/textdet/dbnetpp/metafile.yml
4 | - configs/textdet/maskrcnn/metafile.yml
5 | - configs/textdet/drrg/metafile.yml
6 | - configs/textdet/fcenet/metafile.yml
7 | - configs/textdet/panet/metafile.yml
8 | - configs/textdet/psenet/metafile.yml
9 | - configs/textdet/textsnake/metafile.yml
10 | - configs/textrecog/abinet/metafile.yml
11 | - configs/textrecog/aster/metafile.yml
12 | - configs/textrecog/crnn/metafile.yml
13 | - configs/textrecog/master/metafile.yml
14 | - configs/textrecog/nrtr/metafile.yml
15 | - configs/textrecog/svtr/metafile.yml
16 | - configs/textrecog/robust_scanner/metafile.yml
17 | - configs/textrecog/sar/metafile.yml
18 | - configs/textrecog/satrn/metafile.yml
19 | - configs/kie/sdmgr/metafile.yml
20 |
--------------------------------------------------------------------------------
/mmocr/my_test.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python tools/test.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py output/new_10k_synthtext/epoch_1.pth --save-preds
2 |
--------------------------------------------------------------------------------
/mmocr/my_train.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python tools/train.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py --work-dir output/new_SD_base_10000_curve --amp
2 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | from .metric import * # NOQA
4 | from .model import * # NOQA
5 | from .utils import * # NOQA
6 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/metric/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .e2e_hmean_iou_metric import E2EHmeanIOUMetric
3 |
4 | __all__ = ['E2EHmeanIOUMetric']
5 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/model/abcnet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .two_stage_text_spotting import TwoStageTextSpotter
4 |
5 |
6 | @MODELS.register_module()
7 | class ABCNet(TwoStageTextSpotter):
8 | """CTC-loss based recognizer."""
9 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/model/abcnet_rec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.models.textrecog import EncoderDecoderRecognizer
3 | from mmocr.registry import MODELS
4 |
5 |
6 | @MODELS.register_module()
7 | class ABCNetRec(EncoderDecoderRecognizer):
8 | """CTC-loss based recognizer."""
9 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .bezier_utils import bezier2poly, poly2bezier
3 |
4 | __all__ = ['poly2bezier', 'bezier2poly']
5 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
1 | icdar2015_textspotting_data_root = 'data/icdar2015'
2 |
3 | icdar2015_textspotting_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2015_textspotting_data_root,
6 | ann_file='textspotting_train.json',
7 | pipeline=None)
8 |
9 | icdar2015_textspotting_test = dict(
10 | type='OCRDataset',
11 | data_root=icdar2015_textspotting_data_root,
12 | ann_file='textspotting_test.json',
13 | test_mode=True,
14 | # indices=50,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/_base_/schedules/schedule_sgd_500e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optim_wrapper = dict(
3 | type='OptimWrapper',
4 | optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001),
5 | clip_grad=dict(type='value', clip_value=1))
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=20)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 | # learning policy
10 | param_scheduler = [
11 | dict(type='LinearLR', end=1000, start_factor=0.001, by_epoch=False),
12 | ]
13 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/abcnet_v2/abcnet-v2_resnet50_bifpn_500e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_abcnet-v2_resnet50_bifpn.py',
3 | '../_base_/datasets/icdar2015.py',
4 | '../_base_/default_runtime.py',
5 | ]
6 |
7 | # dataset settings
8 | icdar2015_textspotting_test = _base_.icdar2015_textspotting_test
9 | icdar2015_textspotting_test.pipeline = _base_.test_pipeline
10 |
11 | val_dataloader = dict(
12 | batch_size=1,
13 | num_workers=4,
14 | persistent_workers=True,
15 | sampler=dict(type='DefaultSampler', shuffle=False),
16 | dataset=icdar2015_textspotting_test)
17 |
18 | test_dataloader = val_dataloader
19 |
20 | val_cfg = dict(type='ValLoop')
21 | test_cfg = dict(type='TestLoop')
22 |
23 | custom_imports = dict(imports=['abcnet'], allow_failed_imports=False)
24 |
--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/dicts/abcnet.txt:
--------------------------------------------------------------------------------
1 |
2 | !
3 | "
4 | #
5 | $
6 | %
7 | &
8 | '
9 | (
10 | )
11 | *
12 | +
13 | ,
14 | -
15 | .
16 | /
17 | 0
18 | 1
19 | 2
20 | 3
21 | 4
22 | 5
23 | 6
24 | 7
25 | 8
26 | 9
27 | :
28 | ;
29 | <
30 | =
31 | >
32 | ?
33 | @
34 | A
35 | B
36 | C
37 | D
38 | E
39 | F
40 | G
41 | H
42 | I
43 | J
44 | K
45 | L
46 | M
47 | N
48 | O
49 | P
50 | Q
51 | R
52 | S
53 | T
54 | U
55 | V
56 | W
57 | X
58 | Y
59 | Z
60 | [
61 | \
62 | ]
63 | ^
64 | _
65 | `
66 | a
67 | b
68 | c
69 | d
70 | e
71 | f
72 | g
73 | h
74 | i
75 | j
76 | k
77 | l
78 | m
79 | n
80 | o
81 | p
82 | q
83 | r
84 | s
85 | t
86 | u
87 | v
88 | w
89 | x
90 | y
91 | z
92 | {
93 | |
94 | }
95 | ~
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/ctw1500-spts.py:
--------------------------------------------------------------------------------
1 | ctw1500_textspotting_data_root = 'data/CTW1500'
2 |
3 | ctw1500_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=ctw1500_textspotting_data_root,
6 | ann_file='annotations/train_ctw1500_maxlen25_v2.json',
7 | data_prefix=dict(img_path='ctwtrain_text_image/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
11 | ctw1500_textspotting_test = dict(
12 | type='AdelDataset',
13 | data_root=ctw1500_textspotting_data_root,
14 | ann_file='annotations/test_ctw1500_maxlen25.json',
15 | data_prefix=dict(img_path='ctwtest_text_image/'),
16 | test_mode=True,
17 | pipeline=None)
18 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2013-spts.py:
--------------------------------------------------------------------------------
1 | icdar2013_textspotting_data_root = 'spts-data/icdar2013'
2 |
3 | icdar2013_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=icdar2013_textspotting_data_root,
6 | ann_file='ic13_train.json',
7 | data_prefix=dict(img_path='train_images/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
11 | icdar2013_textspotting_test = dict(
12 | type='AdelDataset',
13 | data_root=icdar2013_textspotting_data_root,
14 | data_prefix=dict(img_path='test_images/'),
15 | ann_file='ic13_test.json',
16 | test_mode=True,
17 | pipeline=None)
18 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
1 | icdar2013_textspotting_data_root = 'data/icdar2013'
2 |
3 | icdar2013_textspotting_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2013_textspotting_data_root,
6 | ann_file='textspotting_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | icdar2013_textspotting_test = dict(
11 | type='OCRDataset',
12 | data_root=icdar2013_textspotting_data_root,
13 | ann_file='textspotting_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2015-spts.py:
--------------------------------------------------------------------------------
1 | icdar2015_textspotting_data_root = 'spts-data/icdar2015'
2 |
3 | icdar2015_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=icdar2015_textspotting_data_root,
6 | ann_file='ic15_train.json',
7 | data_prefix=dict(img_path='train_images/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
11 | icdar2015_textspotting_test = dict(
12 | type='AdelDataset',
13 | data_root=icdar2015_textspotting_data_root,
14 | data_prefix=dict(img_path='test_images/'),
15 | ann_file='ic15_test.json',
16 | test_mode=True,
17 | pipeline=None)
18 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
1 | icdar2015_textspotting_data_root = 'data/icdar2015'
2 |
3 | icdar2015_textspotting_train = dict(
4 | type='OCRDataset',
5 | data_root=icdar2015_textspotting_data_root,
6 | ann_file='textspotting_train.json',
7 | pipeline=None)
8 |
9 | icdar2015_textspotting_test = dict(
10 | type='OCRDataset',
11 | data_root=icdar2015_textspotting_data_root,
12 | ann_file='textspotting_test.json',
13 | test_mode=True,
14 | pipeline=None)
15 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/mlt-spts.py:
--------------------------------------------------------------------------------
1 | mlt_textspotting_data_root = 'spts-data/mlt2017'
2 |
3 | mlt_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=mlt_textspotting_data_root,
6 | ann_file='train.json',
7 | data_prefix=dict(img_path='MLT_train_images/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/syntext1-spts.py:
--------------------------------------------------------------------------------
1 | syntext1_textspotting_data_root = 'spts-data/syntext1'
2 |
3 | syntext1_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=syntext1_textspotting_data_root,
6 | ann_file='train.json',
7 | data_prefix=dict(img_path='syntext_word_eng/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/syntext2-spts.py:
--------------------------------------------------------------------------------
1 | syntext2_textspotting_data_root = 'spts-data/syntext2'
2 |
3 | syntext2_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=syntext2_textspotting_data_root,
6 | ann_file='train.json',
7 | data_prefix=dict(img_path='emcs_imgs/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/totaltext-spts.py:
--------------------------------------------------------------------------------
1 | totaltext_textspotting_data_root = 'spts-data/totaltext'
2 |
3 | totaltext_textspotting_train = dict(
4 | type='AdelDataset',
5 | data_root=totaltext_textspotting_data_root,
6 | ann_file='train.json',
7 | data_prefix=dict(img_path='train_images/'),
8 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
9 | pipeline=None)
10 |
11 | totaltext_textspotting_test = dict(
12 | type='AdelDataset',
13 | data_root=totaltext_textspotting_data_root,
14 | ann_file='test.json',
15 | data_prefix=dict(img_path='test_images/'),
16 | test_mode=True,
17 | pipeline=None)
18 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
1 | totaltext_textspotting_data_root = 'data/totaltext'
2 |
3 | totaltext_textspotting_train = dict(
4 | type='OCRDataset',
5 | data_root=totaltext_textspotting_data_root,
6 | ann_file='textspotting_train.json',
7 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 | pipeline=None)
9 |
10 | totaltext_textspotting_test = dict(
11 | type='OCRDataset',
12 | data_root=totaltext_textspotting_data_root,
13 | ann_file='textspotting_test.json',
14 | test_mode=True,
15 | pipeline=None)
16 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/dicts/spts.txt:
--------------------------------------------------------------------------------
1 |
2 | !
3 | "
4 | #
5 | $
6 | %
7 | &
8 | '
9 | (
10 | )
11 | *
12 | +
13 | ,
14 | -
15 | .
16 | /
17 | 0
18 | 1
19 | 2
20 | 3
21 | 4
22 | 5
23 | 6
24 | 7
25 | 8
26 | 9
27 | :
28 | ;
29 | <
30 | =
31 | >
32 | ?
33 | @
34 | A
35 | B
36 | C
37 | D
38 | E
39 | F
40 | G
41 | H
42 | I
43 | J
44 | K
45 | L
46 | M
47 | N
48 | O
49 | P
50 | Q
51 | R
52 | S
53 | T
54 | U
55 | V
56 | W
57 | X
58 | Y
59 | Z
60 | [
61 | \
62 | ]
63 | ^
64 | _
65 | `
66 | a
67 | b
68 | c
69 | d
70 | e
71 | f
72 | g
73 | h
74 | i
75 | j
76 | k
77 | l
78 | m
79 | n
80 | o
81 | p
82 | q
83 | r
84 | s
85 | t
86 | u
87 | v
88 | w
89 | x
90 | y
91 | z
92 | {
93 | |
94 | }
95 | ~
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | from .datasets import * # NOQA
4 | from .metric import * # NOQA
5 | from .model import * # NOQA
6 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .adel_dataset import AdelDataset
3 | from .transforms.spts_transforms import (Bezier2Polygon, ConvertText,
4 | LoadOCRAnnotationsWithBezier,
5 | Polygon2Bezier, RescaleToShortSide)
6 |
7 | __all__ = [
8 | 'AdelDataset', 'LoadOCRAnnotationsWithBezier', 'Bezier2Polygon',
9 | 'Polygon2Bezier', 'ConvertText', 'RescaleToShortSide'
10 | ]
11 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/metric/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .e2e_point_metric import E2EPointMetric
3 |
4 | __all__ = ['E2EPointMetric']
5 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .spts import SPTS
3 | from .spts_decoder import SPTSDecoder
4 | from .spts_dictionary import SPTSDictionary
5 | from .spts_encoder import SPTSEncoder
6 | from .spts_module_loss import SPTSModuleLoss
7 | from .spts_postprocessor import SPTSPostprocessor
8 |
9 | __all__ = [
10 | 'SPTSEncoder', 'SPTSDecoder', 'SPTSPostprocessor', 'SPTS',
11 | 'SPTSDictionary', 'SPTSModuleLoss'
12 | ]
13 |
--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/model/spts.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_text_spotter import EncoderDecoderTextSpotter
4 |
5 |
6 | @MODELS.register_module()
7 | class SPTS(EncoderDecoderTextSpotter):
8 | """SPTS."""
9 |
--------------------------------------------------------------------------------
/mmocr/projects/example_project/configs/dbnet_dummy-resnet_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = ['mmocr::textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py']
2 |
3 | custom_imports = dict(imports=['dummy'])
4 |
5 | _base_.model.backbone.type = 'DummyResNet'
6 |
--------------------------------------------------------------------------------
/mmocr/projects/example_project/dummy/__init__.py:
--------------------------------------------------------------------------------
1 | from .dummy_resnet import DummyResNet
2 |
3 | __all__ = ['DummyResNet']
4 |
--------------------------------------------------------------------------------
/mmocr/projects/example_project/dummy/dummy_resnet.py:
--------------------------------------------------------------------------------
1 | from mmdet.models.backbones import ResNet
2 |
3 | from mmocr.registry import MODELS
4 |
5 |
6 | @MODELS.register_module()
7 | class DummyResNet(ResNet):
8 | """Implements a dummy ResNet wrapper for demonstration purpose.
9 |
10 | Args:
11 | **kwargs: All the arguments are passed to the parent class.
12 | """
13 |
14 | def __init__(self, **kwargs) -> None:
15 | print('Hello world!')
16 | super().__init__(**kwargs)
17 |
--------------------------------------------------------------------------------
/mmocr/projects/selected.txt:
--------------------------------------------------------------------------------
1 | projects/ABCNet/README.md
2 | projects/ABCNet/README_V2.md
3 | projects/SPTS/README.md
4 |
--------------------------------------------------------------------------------
/mmocr/requirements/albu.txt:
--------------------------------------------------------------------------------
1 | albumentations>=1.1.0 --no-binary qudida,albumentations
2 |
--------------------------------------------------------------------------------
/mmocr/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmocr
2 | numpy
3 | pyclipper
4 | torch>=1.1
5 |
--------------------------------------------------------------------------------
/mmocr/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | docutils==0.16.0
2 | markdown>=3.4.0
3 | myst-parser
4 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
5 | sphinx==4.0.2
6 | sphinx-tabs
7 | sphinx_copybutton
8 | sphinx_markdown_tables>=0.0.16
9 | tabulate
10 |
--------------------------------------------------------------------------------
/mmocr/requirements/mminstall.txt:
--------------------------------------------------------------------------------
1 | mmcv>=2.0.0rc4,<2.1.0
2 | mmdet>=3.0.0rc5,<3.1.0
3 | mmengine>=0.7.0, <1.0.0
4 |
--------------------------------------------------------------------------------
/mmocr/requirements/optional.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/requirements/optional.txt
--------------------------------------------------------------------------------
/mmocr/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | imgaug
2 | kwarray
3 | lmdb
4 | matplotlib
5 | mmcv>=2.0.0rc1
6 | mmdet>=3.0.0rc0
7 | mmengine>=0.1.0
8 | pyclipper
9 | rapidfuzz>=2.0.0
10 | regex
11 | scikit-image
12 | scipy
13 | shapely
14 | titlecase
15 | torch
16 | torchvision
17 |
--------------------------------------------------------------------------------
/mmocr/requirements/runtime.txt:
--------------------------------------------------------------------------------
1 | imgaug
2 | lmdb
3 | matplotlib
4 | numpy
5 | opencv-python >=4.2.0.32, != 4.5.5.* # avoid Github security alert
6 | pyclipper
7 | pycocotools
8 | rapidfuzz>=2.0.0
9 | scikit-image
10 |
--------------------------------------------------------------------------------
/mmocr/requirements/tests.txt:
--------------------------------------------------------------------------------
1 | asynctest
2 | codecov
3 | flake8
4 | interrogate
5 | isort
6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
7 | kwarray
8 | lanms-neo==1.0.2
9 | parameterized
10 | pytest
11 | pytest-cov
12 | pytest-runner
13 | ubelt
14 | xdoctest >= 0.10.0
15 | yapf
16 |
--------------------------------------------------------------------------------
/mmocr/resources/illustration.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/illustration.jpg
--------------------------------------------------------------------------------
/mmocr/resources/kie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/kie.jpg
--------------------------------------------------------------------------------
/mmocr/resources/mmocr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/mmocr-logo.png
--------------------------------------------------------------------------------
/mmocr/resources/textdet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textdet.jpg
--------------------------------------------------------------------------------
/mmocr/resources/textrecog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textrecog.jpg
--------------------------------------------------------------------------------
/mmocr/resources/verification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/verification.png
--------------------------------------------------------------------------------
/mmocr/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 |
4 | [yapf]
5 | based_on_style = pep8
6 | blank_line_before_nested_class_or_def = true
7 | split_before_expression_after_opening_paren = true
8 | split_penalty_import_names=0
9 | SPLIT_PENALTY_AFTER_OPENING_BRACKET=800
10 |
11 | [isort]
12 | line_length = 79
13 | multi_line_output = 0
14 | extra_standard_library = setuptools
15 | known_first_party = mmocr
16 | known_third_party = PIL,cv2,imgaug,lanms,lmdb,matplotlib,mmcv,mmdet,numpy,packaging,pyclipper,pytest,pytorch_sphinx_theme,rapidfuzz,requests,scipy,shapely,skimage,titlecase,torch,torchvision,ts,yaml,mmengine
17 | no_lines_before = STDLIB,LOCALFOLDER
18 | default_section = THIRDPARTY
19 |
20 | [style]
21 | BASED_ON_STYLE = pep8
22 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
23 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
24 |
--------------------------------------------------------------------------------
/mmocr/tests/test_evaluation/test_functional/test_hmean.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | from mmocr.evaluation.functional import compute_hmean
5 |
6 |
7 | class TestHmean(TestCase):
8 |
9 | def test_compute_hmean(self):
10 | with self.assertRaises(AssertionError):
11 | compute_hmean(0, 0, 0.0, 0)
12 | with self.assertRaises(AssertionError):
13 | compute_hmean(0, 0, 0, 0.0)
14 | with self.assertRaises(AssertionError):
15 | compute_hmean([1], 0, 0, 0)
16 | with self.assertRaises(AssertionError):
17 | compute_hmean(0, [1], 0, 0)
18 |
19 | _, _, hmean = compute_hmean(2, 2, 2, 2)
20 | self.assertEqual(hmean, 1)
21 |
22 | _, _, hmean = compute_hmean(0, 0, 2, 2)
23 | self.assertEqual(hmean, 0)
24 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_common/test_modules/test_transformer_module.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.common.modules import PositionalEncoding
7 |
8 |
9 | class TestPositionalEncoding(TestCase):
10 |
11 | def test_forward(self):
12 | pos_encoder = PositionalEncoding()
13 | x = torch.rand(1, 30, 512)
14 | out = pos_encoder(x)
15 | assert out.size() == x.size()
16 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_common/test_plugins/test_avgpool.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.common.plugins import AvgPool2d
7 |
8 |
9 | class TestAvgPool2d(TestCase):
10 |
11 | def setUp(self) -> None:
12 | self.img = torch.rand(1, 3, 32, 100)
13 |
14 | def test_avgpool2d(self):
15 | avgpool2d = AvgPool2d(kernel_size=2, stride=2)
16 | self.assertEqual(avgpool2d(self.img).shape, torch.Size([1, 3, 16, 50]))
17 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_heads/test_pse_head.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textdet.heads import PSEHead
7 |
8 |
9 | class TestPSEHead(TestCase):
10 |
11 | def setUp(self):
12 | self.feature = torch.randn((2, 10, 40, 50))
13 |
14 | def test_init(self):
15 | with self.assertRaises(TypeError):
16 | PSEHead(in_channels=1)
17 |
18 | with self.assertRaises(TypeError):
19 | PSEHead(out_channels='out')
20 |
21 | def test_forward(self):
22 | pse_head = PSEHead(in_channels=[10], hidden_dim=128, out_channel=7)
23 | results = pse_head(self.feature)
24 | self.assertEqual(results.shape, (2, 7, 40, 50))
25 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_heads/test_textsnake_head.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textdet.heads import TextSnakeHead
7 |
8 |
9 | class TestTextSnakeHead(TestCase):
10 |
11 | def test_init(self):
12 | with self.assertRaises(AssertionError):
13 | TextSnakeHead(in_channels='test')
14 |
15 | def test_forward(self):
16 | ts_head = TextSnakeHead(in_channels=10)
17 | data = torch.randn((2, 10, 40, 50))
18 | results = ts_head(data, None)
19 | self.assertEqual(results.shape, (2, 5, 40, 50))
20 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_necks/test_fpnf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import unittest
3 |
4 | import torch
5 | from parameterized import parameterized
6 |
7 | from mmocr.models.textdet.necks import FPNF
8 |
9 |
10 | class TestFPNF(unittest.TestCase):
11 |
12 | def setUp(self):
13 | in_channels = [256, 512, 1024, 2048]
14 | size = [112, 56, 28, 14]
15 | inputs = []
16 | for i in range(4):
17 | inputs.append(torch.rand(1, in_channels[i], size[i], size[i]))
18 | self.inputs = inputs
19 |
20 | @parameterized.expand([('concat'), ('add')])
21 | def test_forward(self, fusion_type):
22 | fpnf = FPNF(fusion_type=fusion_type)
23 | outputs = fpnf.forward(self.inputs)
24 | self.assertListEqual(list(outputs.size()), [1, 256, 112, 112])
25 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_mini_vgg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.backbones import MiniVGG
7 |
8 |
9 | class TestMiniVGG(TestCase):
10 |
11 | def test_forward(self):
12 |
13 | model = MiniVGG()
14 | model.init_weights()
15 | model.train()
16 |
17 | imgs = torch.randn(1, 3, 32, 160)
18 | feats = model(imgs)
19 | self.assertEqual(feats.shape, torch.Size([1, 512, 1, 41]))
20 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_mobilenet_v2.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.backbones import MobileNetV2
7 |
8 |
9 | class TestMobileNetV2(TestCase):
10 |
11 | def setUp(self) -> None:
12 | self.img = torch.rand(1, 3, 32, 160)
13 |
14 | def test_mobilenetv2(self):
15 | mobilenet_v2 = MobileNetV2()
16 | self.assertEqual(
17 | mobilenet_v2(self.img).shape, torch.Size([1, 1280, 1, 43]))
18 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_nrtr_modality_transformer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import unittest
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.backbones import NRTRModalityTransform
7 |
8 |
9 | class TestNRTRBackbone(unittest.TestCase):
10 |
11 | def setUp(self):
12 | self.img = torch.randn(2, 3, 32, 100)
13 |
14 | def test_encoder(self):
15 | nrtr_backbone = NRTRModalityTransform()
16 | nrtr_backbone.init_weights()
17 | nrtr_backbone.train()
18 | out_enc = nrtr_backbone(self.img)
19 | self.assertEqual(out_enc.shape, torch.Size([2, 512, 1, 25]))
20 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_shallow_cnn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import unittest
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.backbones import ShallowCNN
7 |
8 |
9 | class TestShallowCNN(unittest.TestCase):
10 |
11 | def setUp(self):
12 | self.imgs = torch.randn(1, 1, 32, 100)
13 |
14 | def test_shallow_cnn(self):
15 |
16 | model = ShallowCNN()
17 | model.init_weights()
18 | model.train()
19 |
20 | feat = model(self.imgs)
21 | self.assertEqual(feat.shape, torch.Size([1, 512, 8, 25]))
22 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_encoders/test_abi_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.encoders.abi_encoder import ABIEncoder
7 |
8 |
9 | class TestABIEncoder(TestCase):
10 |
11 | def test_init(self):
12 | with self.assertRaises(AssertionError):
13 | ABIEncoder(d_model=512, n_head=10)
14 |
15 | def test_forward(self):
16 | model = ABIEncoder()
17 | x = torch.randn(10, 512, 8, 32)
18 | self.assertEqual(model(x, None).shape, torch.Size([10, 512, 8, 32]))
19 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_encoders/test_aster_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import unittest
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.encoders import ASTEREncoder
7 |
8 |
9 | class TestASTEREncoder(unittest.TestCase):
10 |
11 | def test_encoder(self):
12 | encoder = ASTEREncoder(10)
13 | feat = torch.randn(2, 10, 1, 25)
14 | out = encoder(feat)
15 | self.assertEqual(out.shape, torch.Size([2, 25, 10]))
16 |
--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_plugins/test_maxpool.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from unittest import TestCase
3 |
4 | import torch
5 |
6 | from mmocr.models.textrecog.plugins import Maxpool2d
7 |
8 |
9 | class TestMaxpool2d(TestCase):
10 |
11 | def setUp(self) -> None:
12 | self.img = torch.rand(1, 3, 32, 100)
13 |
14 | def test_maxpool2d(self):
15 | maxpool2d = Maxpool2d(kernel_size=2, stride=2)
16 | self.assertEqual(maxpool2d(self.img).shape, torch.Size([1, 3, 16, 50]))
17 |
--------------------------------------------------------------------------------
/mmocr/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 | NNODES=${NNODES:-1}
7 | NODE_RANK=${NODE_RANK:-0}
8 | PORT=${PORT:-29500}
9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 |
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 | --nnodes=$NNODES \
14 | --node_rank=$NODE_RANK \
15 | --master_addr=$MASTER_ADDR \
16 | --nproc_per_node=$GPUS \
17 | --master_port=$PORT \
18 | $(dirname "$0")/test.py \
19 | $CONFIG \
20 | $CHECKPOINT \
21 | --launcher pytorch \
22 | ${@:4}
23 |
--------------------------------------------------------------------------------
/mmocr/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env bash
3 |
4 | CONFIG=$1
5 | GPUS=$2
6 | NNODES=${NNODES:-1}
7 | NODE_RANK=${NODE_RANK:-0}
8 | PORT=${PORT:-29500}
9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 |
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 | --nnodes=$NNODES \
14 | --node_rank=$NODE_RANK \
15 | --master_addr=$MASTER_ADDR \
16 | --nproc_per_node=$GPUS \
17 | --master_port=$PORT \
18 | $(dirname "$0")/train.py \
19 | $CONFIG \
20 | --launcher pytorch ${@:3}
21 |
--------------------------------------------------------------------------------
/mmocr/tools/slurm_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 | export PYTHONPATH=`pwd`:$PYTHONPATH
5 |
6 | PARTITION=$1
7 | JOB_NAME=$2
8 | CONFIG=$3
9 | CHECKPOINT=$4
10 | GPUS=${GPUS:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 |
15 | srun -p ${PARTITION} \
16 | --job-name=${JOB_NAME} \
17 | --gres=gpu:${GPUS_PER_NODE} \
18 | --ntasks=${GPUS} \
19 | --ntasks-per-node=${GPUS_PER_NODE} \
20 | --kill-on-bad-exit=1 \
21 | ${SRUN_ARGS} \
22 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
23 |
--------------------------------------------------------------------------------
/mmocr/tools/slurm_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | export MASTER_PORT=$((12000 + $RANDOM % 20000))
3 |
4 | set -x
5 |
6 | PARTITION=$1
7 | JOB_NAME=$2
8 | CONFIG=$3
9 | WORK_DIR=$4
10 | GPUS=${GPUS:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
13 | PY_ARGS=${@:5}
14 | SRUN_ARGS=${SRUN_ARGS:-""}
15 |
16 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
17 | srun -p ${PARTITION} \
18 | --job-name=${JOB_NAME} \
19 | --gres=gpu:${GPUS_PER_NODE} \
20 | --ntasks=${GPUS} \
21 | --ntasks-per-node=${GPUS_PER_NODE} \
22 | --cpus-per-task=${CPUS_PER_TASK} \
23 | --kill-on-bad-exit=1 \
24 | ${SRUN_ARGS} \
25 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
26 |
--------------------------------------------------------------------------------
/textfussion/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include src/diffusers/utils/model_card_template.md
3 |
--------------------------------------------------------------------------------
/textfussion/README.md:
--------------------------------------------------------------------------------
1 | 该项目主要基于diffusers==0.15.0.dev0框架,请依照requirements.txt进行环境的搭建
2 |
3 | 准备好文本图像训练集后,通过./my_inpainting/new_paradigm_train.sh脚本进行生成模型的训练
4 |
5 | 完成模型训练后,通过./my_inpainting/my_build_synth_data_baseline.py脚本,制作合成数据集
6 |
--------------------------------------------------------------------------------
/textfussion/_typos.toml:
--------------------------------------------------------------------------------
1 | # Files for typos
2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started
3 |
4 | [default.extend-identifiers]
5 |
6 | [default.extend-words]
7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py
8 | nd="np" # nd may be np (numpy)
9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py
10 |
11 |
12 | [files]
13 | extend-exclude = ["_typos.toml"]
14 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .rl import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/experimental/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .value_guided_sampling import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .mel import Mel
2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline
3 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import (
14 | AudioLDMPipeline,
15 | )
16 | else:
17 | from .pipeline_audioldm import AudioLDMPipeline
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddim import DDIMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddpm import DDPMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dit import DiTPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_transformers_available
2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
3 |
4 |
5 | if is_transformers_available():
6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_latent_diffusion_uncond import LDMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional, Union
3 |
4 | import numpy as np
5 | import PIL
6 | from PIL import Image
7 |
8 | from ...utils import is_torch_available, is_transformers_available
9 |
10 |
11 | if is_transformers_available() and is_torch_available():
12 | from .image_encoder import PaintByExampleImageEncoder
13 | from .pipeline_paint_by_example import PaintByExamplePipeline
14 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_pndm import PNDMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_repaint import RePaintPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
2 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
14 | else:
15 | from .pipeline_unclip import UnCLIPPipeline
16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
17 | from .text_proj import UnCLIPTextProjModel
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_torch_available, is_transformers_available
2 |
3 |
4 | if is_transformers_available() and is_torch_available():
5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline
6 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class MidiProcessor(metaclass=DummyObject):
6 | _backends = ["note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["note_seq"])
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_onnx_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class OnnxRuntimeModel(metaclass=DummyObject):
6 | _backends = ["onnx"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["onnx"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["onnx"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["onnx"])
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class LMSDiscreteScheduler(metaclass=DummyObject):
6 | _backends = ["torch", "scipy"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "scipy"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "scipy"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "scipy"])
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["torch", "transformers", "k_diffusion"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "transformers", "k_diffusion"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["transformers", "torch", "note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["transformers", "torch", "note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["transformers", "torch", "note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["transformers", "torch", "note_seq"])
18 |
--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/pil_utils.py:
--------------------------------------------------------------------------------
1 | import PIL.Image
2 | import PIL.ImageOps
3 | from packaging import version
4 |
5 |
6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
7 | PIL_INTERPOLATION = {
8 | "linear": PIL.Image.Resampling.BILINEAR,
9 | "bilinear": PIL.Image.Resampling.BILINEAR,
10 | "bicubic": PIL.Image.Resampling.BICUBIC,
11 | "lanczos": PIL.Image.Resampling.LANCZOS,
12 | "nearest": PIL.Image.Resampling.NEAREST,
13 | }
14 | else:
15 | PIL_INTERPOLATION = {
16 | "linear": PIL.Image.LINEAR,
17 | "bilinear": PIL.Image.BILINEAR,
18 | "bicubic": PIL.Image.BICUBIC,
19 | "lanczos": PIL.Image.LANCZOS,
20 | "nearest": PIL.Image.NEAREST,
21 | }
22 |
--------------------------------------------------------------------------------
/textfussion/docs/source/_config.py:
--------------------------------------------------------------------------------
1 | # docstyle-ignore
2 | INSTALL_CONTENT = """
3 | # Diffusers installation
4 | ! pip install diffusers transformers datasets accelerate
5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
6 | # ! pip install git+https://github.com/huggingface/diffusers.git
7 | """
8 |
9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
--------------------------------------------------------------------------------
/textfussion/docs/source/en/api/experimental/rl.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # TODO
14 |
15 | Coming soon!
--------------------------------------------------------------------------------
/textfussion/docs/source/en/imgs/access_request.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/access_request.png
--------------------------------------------------------------------------------
/textfussion/docs/source/en/imgs/diffusers_library.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/diffusers_library.jpg
--------------------------------------------------------------------------------
/textfussion/docs/source/en/using-diffusers/audio.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Using Diffusers for audio
14 |
15 | [`DanceDiffusionPipeline`] and [`AudioDiffusionPipeline`] can be used to generate
16 | audio rapidly! More coming soon!
--------------------------------------------------------------------------------
/textfussion/docs/source/ko/in_translation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # 번역중
14 |
15 | 열심히 번역을 진행중입니다. 조금만 기다려주세요.
16 | 감사합니다!
--------------------------------------------------------------------------------
/textfussion/examples/community/one_step_unet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import torch
3 |
4 | from diffusers import DiffusionPipeline
5 |
6 |
7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
8 | def __init__(self, unet, scheduler):
9 | super().__init__()
10 |
11 | self.register_modules(unet=unet, scheduler=scheduler)
12 |
13 | def __call__(self):
14 | image = torch.randn(
15 | (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
16 | )
17 | timestep = 1
18 |
19 | model_output = self.unet(image, timestep).sample
20 | scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample
21 |
22 | result = scheduler_output - scheduler_output + torch.ones_like(scheduler_output)
23 |
24 | return result
25 |
--------------------------------------------------------------------------------
/textfussion/examples/controlnet/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | datasets
7 |
--------------------------------------------------------------------------------
/textfussion/examples/controlnet/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | datasets
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | Jinja2
10 |
--------------------------------------------------------------------------------
/textfussion/examples/dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/textfussion/examples/dreambooth/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 |
--------------------------------------------------------------------------------
/textfussion/examples/inference/image_to_image.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401
4 |
5 |
6 | warnings.warn(
7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import"
8 | " StableDiffusionImg2ImgPipeline` instead."
9 | )
10 |
--------------------------------------------------------------------------------
/textfussion/examples/inference/inpainting.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline # noqa F401
4 |
5 |
6 | warnings.warn(
7 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import"
8 | " StableDiffusionInpaintPipeline` instead."
9 | )
10 |
--------------------------------------------------------------------------------
/textfussion/examples/instruct_pix2pix/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/README.md:
--------------------------------------------------------------------------------
1 | # Research projects
2 |
3 | This folder contains various research projects using 🧨 Diffusers.
4 | They are not really maintained by the core maintainers of this library and often require a specific version of Diffusers that is indicated in the requirements file of each folder.
5 | Updating them to the most recent version of the library will require some work.
6 |
7 | To use any of them, just run the command
8 |
9 | ```
10 | pip install -r requirements.txt
11 | ```
12 | inside the folder of your choice.
13 |
14 | If you need help with any of those, please open an issue where you directly ping the author(s), as indicated at the top of the README of each folder.
15 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/colossalai/inference.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from diffusers import StableDiffusionPipeline
4 |
5 |
6 | model_id = "path-to-your-trained-model"
7 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
8 |
9 | prompt = "A photo of sks dog in a bucket"
10 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
11 |
12 | image.save("dog-bucket.png")
13 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/colossalai/requirement.txt:
--------------------------------------------------------------------------------
1 | diffusers
2 | torch
3 | torchvision
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | transformers
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/dreambooth_inpaint/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.9.0
2 | accelerate
3 | torchvision
4 | transformers>=4.21.0
5 | ftfy
6 | tensorboard
7 | Jinja2
8 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/intel_opts/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.21.0
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | intel_extension_for_pytorch>=1.13
8 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/lora/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
7 | Jinja2
8 | git+https://github.com/huggingface/peft.git
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/mulit_token_textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/mulit_token_textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/multi_subject_dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/README.md:
--------------------------------------------------------------------------------
1 | ## Diffusers examples with ONNXRuntime optimizations
2 |
3 | **This research project is not actively maintained by the diffusers team. For any questions or comments, please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions.**
4 |
5 | This aims to provide diffusers examples with ONNXRuntime optimizations for training/fine-tuning unconditional image generation, text to image, and textual inversion. Please see individual directories for more details on how to run each task using ONNXRuntime.
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
7 | modelcards
8 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | modelcards
7 |
--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 |
--------------------------------------------------------------------------------
/textfussion/examples/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | transformers>=4.25.1
3 | datasets
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/textfussion/examples/text_to_image/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | datasets
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | Jinja2
10 |
--------------------------------------------------------------------------------
/textfussion/examples/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/textfussion/examples/textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 |
--------------------------------------------------------------------------------
/textfussion/examples/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 |
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__init__.py
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__init__.py
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/engines/__init__.py
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__init__.py
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__init__.py
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/ori.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/ori.png
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/output.png
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res.png
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res_area.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_area.png
--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res_trilinear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_trilinear.png
--------------------------------------------------------------------------------
/textfussion/my_inpainting/train_vae.sh:
--------------------------------------------------------------------------------
1 | export MODEL_NAME="stabilityai/stable-diffusion-2-inpainting"
2 | export OUTPUT_DIR="output/pretrain_8702_text_vae"
3 |
4 | NCCL_P2P_DISABLE=1 accelerate launch train_vae.py \
5 | --pretrained_model_name_or_path=$MODEL_NAME \
6 | --output_dir=$OUTPUT_DIR \
7 | --resolution=512 \
8 | --train_batch_size=4 \
9 | --gradient_accumulation_steps=1 \
10 | --gradient_checkpointing \
11 | --learning_rate=5e-6 \
12 | --num_train_epochs=3 \
13 | --lr_scheduler="constant" \
14 | --lr_warmup_steps=3000 \
15 | --dataloader_num_workers=8 \
16 | --mixed_precision=fp16 \
17 |
--------------------------------------------------------------------------------
/textfussion/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py37']
4 |
5 | [tool.ruff]
6 | # Never enforce `E501` (line length violations).
7 | ignore = ["C901", "E501", "E741", "W605"]
8 | select = ["C", "E", "F", "I", "W"]
9 | line-length = 119
10 |
11 | # Ignore import violations in all `__init__.py` files.
12 | [tool.ruff.per-file-ignores]
13 | "__init__.py" = ["E402", "F401", "F403", "F811"]
14 | "src/diffusers/utils/dummy_*.py" = ["F401"]
15 |
16 | [tool.ruff.isort]
17 | lines-after-imports = 2
18 | known-first-party = ["diffusers"]
19 |
--------------------------------------------------------------------------------
/textfussion/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/scripts/__init__.py
--------------------------------------------------------------------------------
/textfussion/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | default_section = FIRSTPARTY
3 | ensure_newline_before_comments = True
4 | force_grid_wrap = 0
5 | include_trailing_comma = True
6 | known_first_party = accelerate
7 | known_third_party =
8 | numpy
9 | torch
10 | torch_xla
11 |
12 | line_length = 119
13 | lines_after_imports = 2
14 | multi_line_output = 3
15 | use_parentheses = True
16 |
17 | [flake8]
18 | ignore = E203, E722, E501, E741, W503, W605
19 | max-line-length = 119
20 | per-file-ignores = __init__.py:F401
21 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | diffusers-cli = diffusers.commands.diffusers_cli:main
3 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | diffusers
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/README.md:
--------------------------------------------------------------------------------
1 | # 🧨 Diffusers Experimental
2 |
3 | We are adding experimental code to support novel applications and usages of the Diffusers library.
4 | Currently, the following experiments are supported:
5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model.
--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .rl import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .value_guided_sampling import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 |
3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models).
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .mel import Mel
2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline
3 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import (
14 | AudioLDMPipeline,
15 | )
16 | else:
17 | from .pipeline_audioldm import AudioLDMPipeline
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddim import DDIMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddpm import DDPMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dit import DiTPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_transformers_available
2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
3 |
4 |
5 | if is_transformers_available():
6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_latent_diffusion_uncond import LDMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional, Union
3 |
4 | import numpy as np
5 | import PIL
6 | from PIL import Image
7 |
8 | from ...utils import is_torch_available, is_transformers_available
9 |
10 |
11 | if is_transformers_available() and is_torch_available():
12 | from .image_encoder import PaintByExampleImageEncoder
13 | from .pipeline_paint_by_example import PaintByExamplePipeline
14 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_pndm import PNDMPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_repaint import RePaintPipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
2 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
14 | else:
15 | from .pipeline_unclip import UnCLIPPipeline
16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
17 | from .text_proj import UnCLIPTextProjModel
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_torch_available, is_transformers_available
2 |
3 |
4 | if is_transformers_available() and is_torch_available():
5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline
6 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/schedulers/README.md:
--------------------------------------------------------------------------------
1 | # Schedulers
2 |
3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview).
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class MidiProcessor(metaclass=DummyObject):
6 | _backends = ["note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["note_seq"])
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_onnx_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class OnnxRuntimeModel(metaclass=DummyObject):
6 | _backends = ["onnx"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["onnx"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["onnx"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["onnx"])
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class LMSDiscreteScheduler(metaclass=DummyObject):
6 | _backends = ["torch", "scipy"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "scipy"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "scipy"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "scipy"])
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["torch", "transformers", "k_diffusion"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "transformers", "k_diffusion"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["transformers", "torch", "note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["transformers", "torch", "note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["transformers", "torch", "note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["transformers", "torch", "note_seq"])
18 |
--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/pil_utils.py:
--------------------------------------------------------------------------------
1 | import PIL.Image
2 | import PIL.ImageOps
3 | from packaging import version
4 |
5 |
6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
7 | PIL_INTERPOLATION = {
8 | "linear": PIL.Image.Resampling.BILINEAR,
9 | "bilinear": PIL.Image.Resampling.BILINEAR,
10 | "bicubic": PIL.Image.Resampling.BICUBIC,
11 | "lanczos": PIL.Image.Resampling.LANCZOS,
12 | "nearest": PIL.Image.Resampling.NEAREST,
13 | }
14 | else:
15 | PIL_INTERPOLATION = {
16 | "linear": PIL.Image.LINEAR,
17 | "bilinear": PIL.Image.BILINEAR,
18 | "bicubic": PIL.Image.BICUBIC,
19 | "lanczos": PIL.Image.LANCZOS,
20 | "nearest": PIL.Image.NEAREST,
21 | }
22 |
--------------------------------------------------------------------------------
/textfussion/tests:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------