├── README.md
├── images
    └── framwork.png
├── mmocr
    ├── .circleci
    │   ├── config.yml
    │   ├── docker
    │   │   └── Dockerfile
    │   └── test.yml
    ├── .codespellrc
    ├── .coveragerc
    ├── .dev_scripts
    │   ├── benchmark_full_models.txt
    │   ├── benchmark_options.py
    │   ├── benchmark_train_models.txt
    │   ├── covignore.cfg
    │   └── diff_coverage_test.sh
    ├── .gitignore
    ├── .owners.yml
    ├── .pre-commit-config.yaml
    ├── .pylintrc
    ├── .readthedocs.yml
    ├── CITATION.cff
    ├── LICENSE
    ├── MANIFEST.in
    ├── README.md
    ├── configs
    │   ├── backbone
    │   │   └── oclip
    │   │   │   ├── README.md
    │   │   │   └── metafile.yml
    │   ├── kie
    │   │   ├── _base_
    │   │   │   ├── datasets
    │   │   │   │   ├── wildreceipt-openset.py
    │   │   │   │   └── wildreceipt.py
    │   │   │   ├── default_runtime.py
    │   │   │   └── schedules
    │   │   │   │   └── schedule_adam_60e.py
    │   │   └── sdmgr
    │   │   │   ├── README.md
    │   │   │   ├── _base_sdmgr_novisual.py
    │   │   │   ├── _base_sdmgr_unet16.py
    │   │   │   ├── metafile.yml
    │   │   │   ├── sdmgr_novisual_60e_wildreceipt-openset.py
    │   │   │   ├── sdmgr_novisual_60e_wildreceipt.py
    │   │   │   └── sdmgr_unet16_60e_wildreceipt.py
    │   ├── textdet
    │   │   ├── _base_
    │   │   │   ├── datasets
    │   │   │   │   ├── ctw1500.py
    │   │   │   │   ├── icdar2013.py
    │   │   │   │   ├── icdar2015.py
    │   │   │   │   ├── icdar2017.py
    │   │   │   │   ├── synthtext.py
    │   │   │   │   ├── totaltext.py
    │   │   │   │   └── toy_data.py
    │   │   │   ├── default_runtime.py
    │   │   │   ├── pretrain_runtime.py
    │   │   │   └── schedules
    │   │   │   │   ├── schedule_adam_600e.py
    │   │   │   │   ├── schedule_sgd_100k.py
    │   │   │   │   ├── schedule_sgd_1200e.py
    │   │   │   │   └── schedule_sgd_base.py
    │   │   ├── dbnet
    │   │   │   ├── README.md
    │   │   │   ├── _base_dbnet_resnet18_fpnc.py
    │   │   │   ├── _base_dbnet_resnet50-dcnv2_fpnc.py
    │   │   │   ├── dbnet_resnet18_fpnc_100k_synthtext.py
    │   │   │   ├── dbnet_resnet18_fpnc_1200e_icdar2015.py
    │   │   │   ├── dbnet_resnet18_fpnc_1200e_totaltext.py
    │   │   │   ├── dbnet_resnet50-dcnv2_fpnc_100k_synthtext.py
    │   │   │   ├── dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py
    │   │   │   ├── dbnet_resnet50-oclip_1200e_icdar2015.py
    │   │   │   ├── dbnet_resnet50_1200e_icdar2015.py
    │   │   │   ├── dbnet_resnet50_120e_synth_train_ic15_test.py
    │   │   │   ├── metafile.yml
    │   │   │   ├── synth_data_train_100k_ic15_test.py
    │   │   │   ├── synth_finetune_from_pretrain_ctw1500.py
    │   │   │   └── synth_finetune_from_pretrain_ic15_test.py
    │   │   ├── dbnetpp
    │   │   │   ├── README.md
    │   │   │   ├── _base_dbnetpp_resnet50-dcnv2_fpnc.py
    │   │   │   ├── dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py
    │   │   │   ├── dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
    │   │   │   ├── dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py
    │   │   │   ├── dbnetpp_resnet50_fpnc_1200e_icdar2015.py
    │   │   │   └── metafile.yml
    │   │   ├── drrg
    │   │   │   ├── README.md
    │   │   │   ├── _base_drrg_resnet50_fpn-unet.py
    │   │   │   ├── drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py
    │   │   │   ├── drrg_resnet50_fpn-unet_1200e_ctw1500.py
    │   │   │   └── metafile.yml
    │   │   ├── fcenet
    │   │   │   ├── README.md
    │   │   │   ├── _base_fcenet_resnet50-dcnv2_fpn.py
    │   │   │   ├── _base_fcenet_resnet50_fpn.py
    │   │   │   ├── fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
    │   │   │   ├── fcenet_resnet50-oclip_fpn_1500e_ctw1500.py
    │   │   │   ├── fcenet_resnet50-oclip_fpn_1500e_icdar2015.py
    │   │   │   ├── fcenet_resnet50_fpn_1500e_icdar2015.py
    │   │   │   ├── fcenet_resnet50_fpn_1500e_totaltext.py
    │   │   │   ├── finetune.py
    │   │   │   ├── metafile.yml
    │   │   │   └── synth_data_train_real_data_test.py
    │   │   ├── maskrcnn
    │   │   │   ├── README.md
    │   │   │   ├── _base_mask-rcnn_resnet50_fpn.py
    │   │   │   ├── mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py
    │   │   │   ├── mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py
    │   │   │   ├── mask-rcnn_resnet50_fpn_160e_ctw1500.py
    │   │   │   ├── mask-rcnn_resnet50_fpn_160e_icdar2015.py
    │   │   │   ├── mask-rcnn_resnet50_fpn_160e_icdar2017.py
    │   │   │   └── metafile.yml
    │   │   ├── panet
    │   │   │   ├── README.md
    │   │   │   ├── _base_panet_resnet18_fpem-ffm.py
    │   │   │   ├── _base_panet_resnet50_fpem-ffm.py
    │   │   │   ├── metafile.yml
    │   │   │   ├── panet_resnet18_fpem-ffm_600e_ctw1500.py
    │   │   │   ├── panet_resnet18_fpem-ffm_600e_icdar2015.py
    │   │   │   ├── panet_resnet50_fpem-ffm_600e_icdar2017.py
    │   │   │   └── synth_data_train_real_data_test.py
    │   │   ├── psenet
    │   │   │   ├── README.md
    │   │   │   ├── _base_psenet_resnet50_fpnf.py
    │   │   │   ├── metafile.yml
    │   │   │   ├── psenet_resnet50-oclip_fpnf_600e_ctw1500.py
    │   │   │   ├── psenet_resnet50-oclip_fpnf_600e_icdar2015.py
    │   │   │   ├── psenet_resnet50_fpnf_600e_ctw1500.py
    │   │   │   ├── psenet_resnet50_fpnf_600e_icdar2015.py
    │   │   │   ├── psenet_resnet50_fpnf_600e_icdar2017.py
    │   │   │   └── psenet_resnet50_synth_train_ic15_test.py
    │   │   └── textsnake
    │   │   │   ├── README.md
    │   │   │   ├── _base_textsnake_resnet50_fpn-unet.py
    │   │   │   ├── metafile.yml
    │   │   │   ├── textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py
    │   │   │   └── textsnake_resnet50_fpn-unet_1200e_ctw1500.py
    │   └── textrecog
    │   │   ├── _base_
    │   │       ├── datasets
    │   │       │   ├── coco_text_v1.py
    │   │       │   ├── cute80.py
    │   │       │   ├── icdar2011.py
    │   │       │   ├── icdar2013.py
    │   │       │   ├── icdar2015.py
    │   │       │   ├── iiit5k.py
    │   │       │   ├── mjsynth.py
    │   │       │   ├── svt.py
    │   │       │   ├── svtp.py
    │   │       │   ├── synthtext.py
    │   │       │   ├── synthtext_add.py
    │   │       │   ├── totaltext.py
    │   │       │   └── toy_data.py
    │   │       ├── default_runtime.py
    │   │       └── schedules
    │   │       │   ├── schedule_adadelta_5e.py
    │   │       │   ├── schedule_adam_base.py
    │   │       │   ├── schedule_adam_step_5e.py
    │   │       │   └── schedule_adamw_cos_6e.py
    │   │   ├── abinet
    │   │       ├── README.md
    │   │       ├── _base_abinet-vision.py
    │   │       ├── _base_abinet.py
    │   │       ├── abinet-vision_20e_st-an_mj.py
    │   │       ├── abinet_20e_st-an_mj.py
    │   │       └── metafile.yml
    │   │   ├── aster
    │   │       ├── README.md
    │   │       ├── _base_aster.py
    │   │       ├── aster_resnet45_6e_st_mj.py
    │   │       └── metafile.yml
    │   │   ├── crnn
    │   │       ├── README.md
    │   │       ├── _base_crnn_mini-vgg.py
    │   │       ├── crnn_mini-vgg_5e_mj.py
    │   │       ├── crnn_mini-vgg_5e_toy.py
    │   │       └── metafile.yml
    │   │   ├── master
    │   │       ├── README.md
    │   │       ├── _base_master_resnet31.py
    │   │       ├── master_resnet31_12e_st_mj_sa.py
    │   │       ├── master_resnet31_12e_toy.py
    │   │       └── metafile.yml
    │   │   ├── nrtr
    │   │       ├── README.md
    │   │       ├── _base_nrtr_modality-transform.py
    │   │       ├── _base_nrtr_resnet31.py
    │   │       ├── metafile.yml
    │   │       ├── nrtr_modality-transform_6e_st_mj.py
    │   │       ├── nrtr_modality-transform_6e_toy.py
    │   │       ├── nrtr_resnet31-1by16-1by8_6e_st_mj.py
    │   │       └── nrtr_resnet31-1by8-1by4_6e_st_mj.py
    │   │   ├── robust_scanner
    │   │       ├── README.md
    │   │       ├── _base_robustscanner_resnet31.py
    │   │       ├── metafile.yml
    │   │       ├── robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
    │   │       └── robustscanner_resnet31_5e_toy.py
    │   │   ├── sar
    │   │       ├── README.md
    │   │       ├── _base_sar_resnet31_parallel-decoder.py
    │   │       ├── metafile.yml
    │   │       ├── sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
    │   │       ├── sar_resnet31_parallel-decoder_5e_toy.py
    │   │       └── sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py
    │   │   ├── satrn
    │   │       ├── README.md
    │   │       ├── _base_satrn_shallow.py
    │   │       ├── metafile.yml
    │   │       ├── satrn_shallow-small_5e_st_mj.py
    │   │       └── satrn_shallow_5e_st_mj.py
    │   │   └── svtr
    │   │       ├── README.md
    │   │       ├── _base_svtr-tiny.py
    │   │       ├── metafile.yml
    │   │       ├── svtr-base_20e_st_mj.py
    │   │       ├── svtr-large_20e_st_mj.py
    │   │       ├── svtr-small_20e_st_mj.py
    │   │       └── svtr-tiny_20e_st_mj.py
    ├── dataset_zoo
    │   ├── cocotextv2
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── ctw1500
    │   │   ├── metafile.yml
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── cute80
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   └── textrecog.py
    │   ├── funsd
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── icdar2013
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── icdar2015
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── iiit5k
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   └── textrecog.py
    │   ├── mjsynth
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   └── textrecog.py
    │   ├── naf
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── sroie
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── svt
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── svtp
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   └── textrecog.py
    │   ├── synthtext
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── textocr
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   ├── totaltext
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    │   └── wildreceipt
    │   │   ├── kie.py
    │   │   ├── metafile.yml
    │   │   ├── sample_anno.md
    │   │   ├── textdet.py
    │   │   ├── textrecog.py
    │   │   └── textspotting.py
    ├── demo
    │   ├── demo_densetext_det.jpg
    │   ├── demo_kie.jpeg
    │   ├── demo_text_det.jpg
    │   ├── demo_text_ocr.jpg
    │   ├── demo_text_recog.jpg
    │   └── resources
    │   │   ├── demo_kie_pred.png
    │   │   ├── det_vis.png
    │   │   ├── kie_vis.png
    │   │   ├── log_analysis_demo.png
    │   │   └── rec_vis.png
    ├── dicts
    │   ├── chinese_english_digits.txt
    │   ├── english_digits_symbols.txt
    │   ├── english_digits_symbols_space.txt
    │   ├── lower_english_digits.txt
    │   ├── lower_english_digits_space.txt
    │   └── sdmgr_dict.txt
    ├── docker
    │   ├── Dockerfile
    │   └── serve
    │   │   ├── Dockerfile
    │   │   ├── config.properties
    │   │   └── entrypoint.sh
    ├── docs
    │   ├── en
    │   │   ├── Makefile
    │   │   ├── _static
    │   │   │   ├── css
    │   │   │   │   └── readthedocs.css
    │   │   │   ├── images
    │   │   │   │   └── mmocr.png
    │   │   │   └── js
    │   │   │   │   ├── collapsed.js
    │   │   │   │   └── table.js
    │   │   ├── _templates
    │   │   │   └── classtemplate.rst
    │   │   ├── api
    │   │   │   ├── apis.rst
    │   │   │   ├── datasets.rst
    │   │   │   ├── engine.rst
    │   │   │   ├── evaluation.rst
    │   │   │   ├── models.rst
    │   │   │   ├── structures.rst
    │   │   │   ├── transforms.rst
    │   │   │   ├── utils.rst
    │   │   │   └── visualization.rst
    │   │   ├── basic_concepts
    │   │   │   ├── convention.md
    │   │   │   ├── data_flow.md
    │   │   │   ├── datasets.md
    │   │   │   ├── engine.md
    │   │   │   ├── evaluation.md
    │   │   │   ├── models.md
    │   │   │   ├── overview.md
    │   │   │   ├── structures.md
    │   │   │   ├── transforms.md
    │   │   │   └── visualizers.md
    │   │   ├── conf.py
    │   │   ├── contact.md
    │   │   ├── dataset_zoo.py
    │   │   ├── docutils.conf
    │   │   ├── get_started
    │   │   │   ├── faq.md
    │   │   │   ├── install.md
    │   │   │   ├── overview.md
    │   │   │   └── quick_run.md
    │   │   ├── index.rst
    │   │   ├── make.bat
    │   │   ├── merge_docs.sh
    │   │   ├── migration
    │   │   │   ├── branches.md
    │   │   │   ├── code.md
    │   │   │   ├── dataset.md
    │   │   │   ├── model.md
    │   │   │   ├── news.md
    │   │   │   ├── overview.md
    │   │   │   └── transforms.md
    │   │   ├── notes
    │   │   │   ├── branches.md
    │   │   │   ├── changelog.md
    │   │   │   ├── changelog_v0.x.md
    │   │   │   └── contribution_guide.md
    │   │   ├── project_zoo.py
    │   │   ├── requirements.txt
    │   │   ├── stats.py
    │   │   ├── switch_language.md
    │   │   ├── user_guides
    │   │   │   ├── config.md
    │   │   │   ├── data_prepare
    │   │   │   │   ├── dataset_preparer.md
    │   │   │   │   ├── det.md
    │   │   │   │   ├── kie.md
    │   │   │   │   └── recog.md
    │   │   │   ├── dataset_prepare.md
    │   │   │   ├── inference.md
    │   │   │   ├── train_test.md
    │   │   │   ├── useful_tools.md
    │   │   │   └── visualization.md
    │   │   └── weight_list.py
    │   └── zh_cn
    │   │   ├── Makefile
    │   │   ├── _static
    │   │       ├── css
    │   │       │   └── readthedocs.css
    │   │       ├── images
    │   │       │   └── mmocr.png
    │   │       └── js
    │   │       │   ├── collapsed.js
    │   │       │   └── table.js
    │   │   ├── _templates
    │   │       └── classtemplate.rst
    │   │   ├── api
    │   │       ├── apis.rst
    │   │       ├── datasets.rst
    │   │       ├── engine.rst
    │   │       ├── evaluation.rst
    │   │       ├── models.rst
    │   │       ├── structures.rst
    │   │       ├── transforms.rst
    │   │       ├── utils.rst
    │   │       └── visualization.rst
    │   │   ├── basic_concepts
    │   │       ├── convention.md
    │   │       ├── data_flow.md
    │   │       ├── datasets.md
    │   │       ├── engine.md
    │   │       ├── evaluation.md
    │   │       ├── models.md
    │   │       ├── overview.md
    │   │       ├── structures.md
    │   │       ├── transforms.md
    │   │       └── visualizers.md
    │   │   ├── conf.py
    │   │   ├── contact.md
    │   │   ├── cp_origin_docs.sh
    │   │   ├── dataset_zoo.py
    │   │   ├── docutils.conf
    │   │   ├── get_started
    │   │       ├── install.md
    │   │       ├── overview.md
    │   │       └── quick_run.md
    │   │   ├── index.rst
    │   │   ├── make.bat
    │   │   ├── merge_docs.sh
    │   │   ├── migration
    │   │       ├── branches.md
    │   │       ├── code.md
    │   │       ├── dataset.md
    │   │       ├── model.md
    │   │       ├── news.md
    │   │       ├── overview.md
    │   │       └── transforms.md
    │   │   ├── notes
    │   │       ├── branches.md
    │   │       └── contribution_guide.md
    │   │   ├── project_zoo.py
    │   │   ├── stats.py
    │   │   ├── switch_language.md
    │   │   ├── user_guides
    │   │       ├── config.md
    │   │       ├── data_prepare
    │   │       │   ├── dataset_preparer.md
    │   │       │   └── kie.md
    │   │       ├── dataset_prepare.md
    │   │       ├── inference.md
    │   │       ├── train_test.md
    │   │       ├── useful_tools.md
    │   │       └── visualization.md
    │   │   └── weight_list.py
    ├── mmocr
    │   ├── __init__.py
    │   ├── apis
    │   │   ├── __init__.py
    │   │   └── inferencers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_mmocr_inferencer.py
    │   │   │   ├── kie_inferencer.py
    │   │   │   ├── mmocr_inferencer.py
    │   │   │   ├── textdet_inferencer.py
    │   │   │   ├── textrec_inferencer.py
    │   │   │   └── textspot_inferencer.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── dataset_wrapper.py
    │   │   ├── icdar_dataset.py
    │   │   ├── ocr_dataset.py
    │   │   ├── preparers
    │   │   │   ├── __init__.py
    │   │   │   ├── config_generators
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── textdet_config_generator.py
    │   │   │   │   ├── textrecog_config_generator.py
    │   │   │   │   └── textspotting_config_generator.py
    │   │   │   ├── data_preparer.py
    │   │   │   ├── dumpers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── json_dumper.py
    │   │   │   │   ├── lmdb_dumper.py
    │   │   │   │   └── wild_receipt_openset_dumper.py
    │   │   │   ├── gatherers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── mono_gatherer.py
    │   │   │   │   ├── naf_gatherer.py
    │   │   │   │   └── pair_gatherer.py
    │   │   │   ├── obtainers
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── naive_data_obtainer.py
    │   │   │   ├── packers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── textdet_packer.py
    │   │   │   │   ├── textrecog_packer.py
    │   │   │   │   ├── textspotting_packer.py
    │   │   │   │   └── wildreceipt_packer.py
    │   │   │   └── parsers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── coco_parser.py
    │   │   │   │   ├── ctw1500_parser.py
    │   │   │   │   ├── funsd_parser.py
    │   │   │   │   ├── icdar_txt_parser.py
    │   │   │   │   ├── mjsynth_parser.py
    │   │   │   │   ├── naf_parser.py
    │   │   │   │   ├── sroie_parser.py
    │   │   │   │   ├── svt_parser.py
    │   │   │   │   ├── synthtext_parser.py
    │   │   │   │   ├── totaltext_parser.py
    │   │   │   │   └── wildreceipt_parser.py
    │   │   ├── recog_lmdb_dataset.py
    │   │   ├── recog_text_dataset.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   └── batch_aug.py
    │   │   ├── transforms
    │   │   │   ├── __init__.py
    │   │   │   ├── adapters.py
    │   │   │   ├── formatting.py
    │   │   │   ├── loading.py
    │   │   │   ├── ocr_transforms.py
    │   │   │   ├── textdet_transforms.py
    │   │   │   ├── textrecog_transforms.py
    │   │   │   └── wrappers.py
    │   │   └── wildreceipt_dataset.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   └── hooks
    │   │   │   ├── __init__.py
    │   │   │   └── visualization_hook.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── evaluator
    │   │   │   ├── __init__.py
    │   │   │   └── multi_datasets_evaluator.py
    │   │   ├── functional
    │   │   │   ├── __init__.py
    │   │   │   └── hmean.py
    │   │   └── metrics
    │   │   │   ├── __init__.py
    │   │   │   ├── f_metric.py
    │   │   │   ├── hmean_iou_metric.py
    │   │   │   └── recog_metric.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── common
    │   │   │   ├── __init__.py
    │   │   │   ├── backbones
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── clip_resnet.py
    │   │   │   │   └── unet.py
    │   │   │   ├── dictionary
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── dictionary.py
    │   │   │   ├── layers
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── transformer_layers.py
    │   │   │   ├── losses
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bce_loss.py
    │   │   │   │   ├── ce_loss.py
    │   │   │   │   ├── dice_loss.py
    │   │   │   │   └── l1_loss.py
    │   │   │   ├── modules
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── transformer_module.py
    │   │   │   └── plugins
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── common.py
    │   │   ├── kie
    │   │   │   ├── __init__.py
    │   │   │   ├── extractors
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── sdmgr.py
    │   │   │   ├── heads
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── sdmgr_head.py
    │   │   │   ├── module_losses
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── sdmgr_module_loss.py
    │   │   │   └── postprocessors
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── sdmgr_postprocessor.py
    │   │   ├── textdet
    │   │   │   ├── __init__.py
    │   │   │   ├── data_preprocessors
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── data_preprocessor.py
    │   │   │   ├── detectors
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── dbnet.py
    │   │   │   │   ├── drrg.py
    │   │   │   │   ├── fcenet.py
    │   │   │   │   ├── mmdet_wrapper.py
    │   │   │   │   ├── panet.py
    │   │   │   │   ├── psenet.py
    │   │   │   │   ├── single_stage_text_detector.py
    │   │   │   │   └── textsnake.py
    │   │   │   ├── heads
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── db_head.py
    │   │   │   │   ├── drrg_head.py
    │   │   │   │   ├── fce_head.py
    │   │   │   │   ├── pan_head.py
    │   │   │   │   ├── pse_head.py
    │   │   │   │   └── textsnake_head.py
    │   │   │   ├── module_losses
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── db_module_loss.py
    │   │   │   │   ├── drrg_module_loss.py
    │   │   │   │   ├── fce_module_loss.py
    │   │   │   │   ├── pan_module_loss.py
    │   │   │   │   ├── pse_module_loss.py
    │   │   │   │   ├── seg_based_module_loss.py
    │   │   │   │   └── textsnake_module_loss.py
    │   │   │   ├── necks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── fpem_ffm.py
    │   │   │   │   ├── fpn_cat.py
    │   │   │   │   ├── fpn_unet.py
    │   │   │   │   └── fpnf.py
    │   │   │   └── postprocessors
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── db_postprocessor.py
    │   │   │   │   ├── drrg_postprocessor.py
    │   │   │   │   ├── fce_postprocessor.py
    │   │   │   │   ├── pan_postprocessor.py
    │   │   │   │   ├── pse_postprocessor.py
    │   │   │   │   └── textsnake_postprocessor.py
    │   │   └── textrecog
    │   │   │   ├── __init__.py
    │   │   │   ├── backbones
    │   │   │       ├── __init__.py
    │   │   │       ├── mini_vgg.py
    │   │   │       ├── mobilenet_v2.py
    │   │   │       ├── nrtr_modality_transformer.py
    │   │   │       ├── resnet.py
    │   │   │       ├── resnet31_ocr.py
    │   │   │       ├── resnet_abi.py
    │   │   │       └── shallow_cnn.py
    │   │   │   ├── data_preprocessors
    │   │   │       ├── __init__.py
    │   │   │       └── data_preprocessor.py
    │   │   │   ├── decoders
    │   │   │       ├── __init__.py
    │   │   │       ├── abi_fuser.py
    │   │   │       ├── abi_language_decoder.py
    │   │   │       ├── abi_vision_decoder.py
    │   │   │       ├── aster_decoder.py
    │   │   │       ├── base.py
    │   │   │       ├── crnn_decoder.py
    │   │   │       ├── master_decoder.py
    │   │   │       ├── nrtr_decoder.py
    │   │   │       ├── position_attention_decoder.py
    │   │   │       ├── robust_scanner_fuser.py
    │   │   │       ├── sar_decoder.py
    │   │   │       ├── sar_decoder_with_bs.py
    │   │   │       ├── sequence_attention_decoder.py
    │   │   │       └── svtr_decoder.py
    │   │   │   ├── encoders
    │   │   │       ├── __init__.py
    │   │   │       ├── abi_encoder.py
    │   │   │       ├── aster_encoder.py
    │   │   │       ├── base.py
    │   │   │       ├── channel_reduction_encoder.py
    │   │   │       ├── nrtr_encoder.py
    │   │   │       ├── sar_encoder.py
    │   │   │       ├── satrn_encoder.py
    │   │   │       └── svtr_encoder.py
    │   │   │   ├── layers
    │   │   │       ├── __init__.py
    │   │   │       ├── conv_layer.py
    │   │   │       ├── dot_product_attention_layer.py
    │   │   │       ├── lstm_layer.py
    │   │   │       ├── position_aware_layer.py
    │   │   │       ├── robust_scanner_fusion_layer.py
    │   │   │       └── satrn_layers.py
    │   │   │   ├── module_losses
    │   │   │       ├── __init__.py
    │   │   │       ├── abi_module_loss.py
    │   │   │       ├── base.py
    │   │   │       ├── ce_module_loss.py
    │   │   │       └── ctc_module_loss.py
    │   │   │   ├── plugins
    │   │   │       ├── __init__.py
    │   │   │       └── common.py
    │   │   │   ├── postprocessors
    │   │   │       ├── __init__.py
    │   │   │       ├── attn_postprocessor.py
    │   │   │       ├── base.py
    │   │   │       └── ctc_postprocessor.py
    │   │   │   ├── preprocessors
    │   │   │       ├── __init__.py
    │   │   │       ├── base.py
    │   │   │       └── tps_preprocessor.py
    │   │   │   └── recognizers
    │   │   │       ├── __init__.py
    │   │   │       ├── abinet.py
    │   │   │       ├── aster.py
    │   │   │       ├── base.py
    │   │   │       ├── crnn.py
    │   │   │       ├── encoder_decoder_recognizer.py
    │   │   │       ├── encoder_decoder_recognizer_tta.py
    │   │   │       ├── master.py
    │   │   │       ├── nrtr.py
    │   │   │       ├── robust_scanner.py
    │   │   │       ├── sar.py
    │   │   │       ├── satrn.py
    │   │   │       └── svtr.py
    │   ├── registry.py
    │   ├── structures
    │   │   ├── __init__.py
    │   │   ├── kie_data_sample.py
    │   │   ├── textdet_data_sample.py
    │   │   ├── textrecog_data_sample.py
    │   │   └── textspotting_data_sample.py
    │   ├── testing
    │   │   ├── __init__.py
    │   │   └── data.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── bbox_utils.py
    │   │   ├── bezier_utils.py
    │   │   ├── check_argument.py
    │   │   ├── collect_env.py
    │   │   ├── data_converter_utils.py
    │   │   ├── fileio.py
    │   │   ├── img_utils.py
    │   │   ├── mask_utils.py
    │   │   ├── parsers.py
    │   │   ├── point_utils.py
    │   │   ├── polygon_utils.py
    │   │   ├── processing.py
    │   │   ├── setup_env.py
    │   │   ├── string_utils.py
    │   │   ├── transform_utils.py
    │   │   └── typing_utils.py
    │   ├── version.py
    │   └── visualization
    │   │   ├── __init__.py
    │   │   ├── base_visualizer.py
    │   │   ├── kie_visualizer.py
    │   │   ├── textdet_visualizer.py
    │   │   ├── textrecog_visualizer.py
    │   │   └── textspotting_visualizer.py
    ├── model-index.yml
    ├── my_test.sh
    ├── my_train.sh
    ├── projects
    │   ├── ABCNet
    │   │   ├── README.md
    │   │   ├── README_V2.md
    │   │   ├── abcnet
    │   │   │   ├── __init__.py
    │   │   │   ├── metric
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── e2e_hmean_iou_metric.py
    │   │   │   ├── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── abcnet.py
    │   │   │   │   ├── abcnet_det_head.py
    │   │   │   │   ├── abcnet_det_module_loss.py
    │   │   │   │   ├── abcnet_det_postprocessor.py
    │   │   │   │   ├── abcnet_postprocessor.py
    │   │   │   │   ├── abcnet_rec.py
    │   │   │   │   ├── abcnet_rec_backbone.py
    │   │   │   │   ├── abcnet_rec_decoder.py
    │   │   │   │   ├── abcnet_rec_encoder.py
    │   │   │   │   ├── base_roi_extractor.py
    │   │   │   │   ├── base_roi_head.py
    │   │   │   │   ├── bezier_roi_extractor.py
    │   │   │   │   ├── bifpn.py
    │   │   │   │   ├── coordinate_head.py
    │   │   │   │   ├── rec_roi_head.py
    │   │   │   │   └── two_stage_text_spotting.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── bezier_utils.py
    │   │   ├── config
    │   │   │   ├── _base_
    │   │   │   │   ├── datasets
    │   │   │   │   │   └── icdar2015.py
    │   │   │   │   ├── default_runtime.py
    │   │   │   │   └── schedules
    │   │   │   │   │   └── schedule_sgd_500e.py
    │   │   │   ├── abcnet
    │   │   │   │   ├── _base_abcnet_resnet50_fpn.py
    │   │   │   │   └── abcnet_resnet50_fpn_500e_icdar2015.py
    │   │   │   └── abcnet_v2
    │   │   │   │   ├── _base_abcnet-v2_resnet50_bifpn.py
    │   │   │   │   └── abcnet-v2_resnet50_bifpn_500e_icdar2015.py
    │   │   └── dicts
    │   │   │   └── abcnet.txt
    │   ├── README.md
    │   ├── SPTS
    │   │   ├── README.md
    │   │   ├── config
    │   │   │   ├── _base_
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── ctw1500-spts.py
    │   │   │   │   │   ├── icdar2013-spts.py
    │   │   │   │   │   ├── icdar2013.py
    │   │   │   │   │   ├── icdar2015-spts.py
    │   │   │   │   │   ├── icdar2015.py
    │   │   │   │   │   ├── mlt-spts.py
    │   │   │   │   │   ├── syntext1-spts.py
    │   │   │   │   │   ├── syntext2-spts.py
    │   │   │   │   │   ├── totaltext-spts.py
    │   │   │   │   │   └── totaltext.py
    │   │   │   │   └── default_runtime.py
    │   │   │   └── spts
    │   │   │   │   ├── _base_spts_resnet50.py
    │   │   │   │   ├── _base_spts_resnet50_mmocr.py
    │   │   │   │   ├── spts_resnet50_8xb8-150e_pretrain-spts.py
    │   │   │   │   ├── spts_resnet50_8xb8-200e_icdar2013.py
    │   │   │   │   ├── spts_resnet50_8xb8-200e_icdar2015.py
    │   │   │   │   └── spts_resnet50_8xb8-200e_totaltext.py
    │   │   ├── dicts
    │   │   │   └── spts.txt
    │   │   ├── spts
    │   │   │   ├── __init__.py
    │   │   │   ├── datasets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── adel_dataset.py
    │   │   │   │   └── transforms
    │   │   │   │   │   └── spts_transforms.py
    │   │   │   ├── metric
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── e2e_point_metric.py
    │   │   │   └── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_text_spotter.py
    │   │   │   │   ├── encoder_decoder_text_spotter.py
    │   │   │   │   ├── position_embedding.py
    │   │   │   │   ├── spts.py
    │   │   │   │   ├── spts_decoder.py
    │   │   │   │   ├── spts_dictionary.py
    │   │   │   │   ├── spts_encoder.py
    │   │   │   │   ├── spts_module_loss.py
    │   │   │   │   └── spts_postprocessor.py
    │   │   └── tools
    │   │   │   └── ckpt_adapter.py
    │   ├── example_project
    │   │   ├── README.md
    │   │   ├── configs
    │   │   │   └── dbnet_dummy-resnet_fpnc_1200e_icdar2015.py
    │   │   └── dummy
    │   │   │   ├── __init__.py
    │   │   │   └── dummy_resnet.py
    │   ├── faq.md
    │   └── selected.txt
    ├── requirements.txt
    ├── requirements
    │   ├── albu.txt
    │   ├── build.txt
    │   ├── docs.txt
    │   ├── mminstall.txt
    │   ├── optional.txt
    │   ├── readthedocs.txt
    │   ├── runtime.txt
    │   └── tests.txt
    ├── resources
    │   ├── illustration.jpg
    │   ├── kie.jpg
    │   ├── mmocr-logo.png
    │   ├── textdet.jpg
    │   ├── textrecog.jpg
    │   └── verification.png
    ├── setup.cfg
    ├── setup.py
    ├── tests
    │   ├── models
    │   │   └── textrecog
    │   │   │   └── test_preprocessors
    │   │   │       └── test_tps_preprocessor.py
    │   ├── test_apis
    │   │   └── test_inferencers
    │   │   │   ├── test_kie_inferencer.py
    │   │   │   ├── test_mmocr_inferencer.py
    │   │   │   ├── test_textdet_inferencer.py
    │   │   │   └── test_textrec_inferencer.py
    │   ├── test_datasets
    │   │   ├── test_dataset_wrapper.py
    │   │   ├── test_icdar_dataset.py
    │   │   ├── test_preparers
    │   │   │   ├── test_config_generators
    │   │   │   │   ├── test_textdet_config_generator.py
    │   │   │   │   ├── test_textrecog_config_generator.py
    │   │   │   │   └── test_textspotting_config_generator.py
    │   │   │   ├── test_data_preparer.py
    │   │   │   ├── test_dumpers
    │   │   │   │   └── test_dumpers.py
    │   │   │   ├── test_gatherers
    │   │   │   │   ├── test_mono_gatherer.py
    │   │   │   │   └── test_pair_gatherer.py
    │   │   │   ├── test_packers
    │   │   │   │   ├── test_textdet_packer.py
    │   │   │   │   ├── test_textrecog_packer.py
    │   │   │   │   └── test_textspotting_packer.py
    │   │   │   └── test_parsers
    │   │   │   │   ├── test_ctw1500_parser.py
    │   │   │   │   ├── test_funsd_parser.py
    │   │   │   │   ├── test_icdar_txt_parsers.py
    │   │   │   │   ├── test_naf_parser.py
    │   │   │   │   ├── test_sroie_parser.py
    │   │   │   │   ├── test_svt_parsers.py
    │   │   │   │   ├── test_tt_parsers.py
    │   │   │   │   └── test_wildreceipt_parsers.py
    │   │   ├── test_recog_lmdb_dataset.py
    │   │   ├── test_recog_text_dataset.py
    │   │   ├── test_samplers
    │   │   │   └── test_batch_aug.py
    │   │   ├── test_transforms
    │   │   │   ├── test_adapters.py
    │   │   │   ├── test_formatting.py
    │   │   │   ├── test_loading.py
    │   │   │   ├── test_ocr_transforms.py
    │   │   │   ├── test_textdet_transforms.py
    │   │   │   ├── test_textrecog_transforms.py
    │   │   │   └── test_wrappers.py
    │   │   └── test_wildreceipt_dataset.py
    │   ├── test_engine
    │   │   └── test_hooks
    │   │   │   └── test_visualization_hook.py
    │   ├── test_evaluation
    │   │   ├── test_evaluator
    │   │   │   └── test_multi_datasets_evaluator.py
    │   │   ├── test_functional
    │   │   │   └── test_hmean.py
    │   │   └── test_metrics
    │   │   │   ├── test_f_metric.py
    │   │   │   ├── test_hmean_iou_metric.py
    │   │   │   └── test_recog_metric.py
    │   ├── test_init.py
    │   ├── test_models
    │   │   ├── test_common
    │   │   │   ├── test_backbones
    │   │   │   │   └── test_clip_resnet.py
    │   │   │   ├── test_layers
    │   │   │   │   └── test_transformer_layers.py
    │   │   │   ├── test_losses
    │   │   │   │   ├── test_bce_loss.py
    │   │   │   │   ├── test_dice_loss.py
    │   │   │   │   └── test_l1_loss.py
    │   │   │   ├── test_modules
    │   │   │   │   └── test_transformer_module.py
    │   │   │   └── test_plugins
    │   │   │   │   └── test_avgpool.py
    │   │   ├── test_kie
    │   │   │   ├── test_extractors
    │   │   │   │   └── test_sdmgr.py
    │   │   │   ├── test_heads
    │   │   │   │   └── test_sdmgr_head.py
    │   │   │   ├── test_module_losses
    │   │   │   │   └── test_sdmgr_module_loss.py
    │   │   │   └── test_postprocessors
    │   │   │   │   └── test_sdmgr_postprocessor.py
    │   │   ├── test_textdet
    │   │   │   ├── test_data_preprocessors
    │   │   │   │   └── test_textdet_data_preprocessor.py
    │   │   │   ├── test_detectors
    │   │   │   │   └── test_drrg.py
    │   │   │   ├── test_heads
    │   │   │   │   ├── test_base_head.py
    │   │   │   │   ├── test_db_head.py
    │   │   │   │   ├── test_drrg_head.py
    │   │   │   │   ├── test_fce_head.py
    │   │   │   │   ├── test_pan_head.py
    │   │   │   │   ├── test_pse_head.py
    │   │   │   │   └── test_textsnake_head.py
    │   │   │   ├── test_module_losses
    │   │   │   │   ├── test_db_module_loss.py
    │   │   │   │   ├── test_drrg_module_loss.py
    │   │   │   │   ├── test_fce_module_loss.py
    │   │   │   │   ├── test_pan_module_loss.py
    │   │   │   │   ├── test_pse_module_loss.py
    │   │   │   │   └── test_textsnake_module_loss.py
    │   │   │   ├── test_necks
    │   │   │   │   ├── test_fpem_ffm.py
    │   │   │   │   ├── test_fpn_cat.py
    │   │   │   │   ├── test_fpn_unet.py
    │   │   │   │   └── test_fpnf.py
    │   │   │   ├── test_postprocessors
    │   │   │   │   ├── test_base_postprocessor.py
    │   │   │   │   ├── test_db_postprocessor.py
    │   │   │   │   ├── test_drrg_postprocessor.py
    │   │   │   │   ├── test_fce_postprocessor.py
    │   │   │   │   ├── test_pan_postprocessor.py
    │   │   │   │   ├── test_pse_postprocessor.py
    │   │   │   │   └── test_textsnake_postprocessor.py
    │   │   │   └── test_wrappers
    │   │   │   │   └── test_mmdet_wrapper.py
    │   │   └── test_textrecog
    │   │   │   ├── test_backbones
    │   │   │       ├── test_mini_vgg.py
    │   │   │       ├── test_mobilenet_v2.py
    │   │   │       ├── test_nrtr_modality_transformer.py
    │   │   │       ├── test_resnet.py
    │   │   │       ├── test_resnet31_ocr.py
    │   │   │       ├── test_resnet_abi.py
    │   │   │       └── test_shallow_cnn.py
    │   │   │   ├── test_data_preprocessors
    │   │   │       └── test_data_preprocessor.py
    │   │   │   ├── test_decoders
    │   │   │       ├── test_abi_fuser.py
    │   │   │       ├── test_abi_language_decoder.py
    │   │   │       ├── test_abi_vision_decoder.py
    │   │   │       ├── test_aster_decoder.py
    │   │   │       ├── test_base_decoder.py
    │   │   │       ├── test_crnn_decoder.py
    │   │   │       ├── test_master_decoder.py
    │   │   │       ├── test_nrtr_decoder.py
    │   │   │       ├── test_position_attention_decoder.py
    │   │   │       ├── test_robust_scanner_fuser.py
    │   │   │       ├── test_sar_decoder.py
    │   │   │       ├── test_sequence_attention_decoder.py
    │   │   │       └── test_svtr_decoder.py
    │   │   │   ├── test_dictionary
    │   │   │       └── test_dictionary.py
    │   │   │   ├── test_encoders
    │   │   │       ├── test_abi_encoder.py
    │   │   │       ├── test_aster_encoder.py
    │   │   │       ├── test_channel_reduction_encoder.py
    │   │   │       ├── test_nrtr_encoder.py
    │   │   │       ├── test_sar_encoder.py
    │   │   │       ├── test_satrn_decoder.py
    │   │   │       └── test_svtr_encoder.py
    │   │   │   ├── test_layers
    │   │   │       └── test_conv_layer.py
    │   │   │   ├── test_module_losses
    │   │   │       ├── test_abi_module_loss.py
    │   │   │       ├── test_base_recog_module_loss.py
    │   │   │       ├── test_ce_module_loss.py
    │   │   │       └── test_ctc_module_loss.py
    │   │   │   ├── test_plugins
    │   │   │       ├── test_gcamodule.py
    │   │   │       └── test_maxpool.py
    │   │   │   ├── test_postprocessors
    │   │   │       ├── test_attn_postprocessor.py
    │   │   │       ├── test_base_textrecog_postprocessor.py
    │   │   │       └── test_ctc_postprocessor.py
    │   │   │   └── test_recognizers
    │   │   │       ├── test_encoder_decoder_recognizer.py
    │   │   │       └── test_encoder_decoder_recognizer_tta.py
    │   ├── test_structures
    │   │   ├── test_kie_data_sample.py
    │   │   ├── test_textdet_data_sample.py
    │   │   ├── test_textrecog_data_sample.py
    │   │   └── test_textspotting_data_sample.py
    │   ├── test_utils
    │   │   ├── test_bbox_utils.py
    │   │   ├── test_check_argument.py
    │   │   ├── test_data_converter_utils.py
    │   │   ├── test_fileio.py
    │   │   ├── test_img_utils.py
    │   │   ├── test_mask_utils.py
    │   │   ├── test_parsers.py
    │   │   ├── test_point_utils.py
    │   │   ├── test_polygon_utils.py
    │   │   ├── test_processing.py
    │   │   ├── test_string_utils.py
    │   │   └── test_transform_utils.py
    │   └── test_visualization
    │   │   ├── test_base_visualizer.py
    │   │   ├── test_kie_visualizer.py
    │   │   ├── test_textdet_visualizer.py
    │   │   ├── test_textrecog_visualizer.py
    │   │   └── test_textspotting_visualizer.py
    └── tools
    │   ├── analysis_tools
    │       ├── browse_dataset.py
    │       ├── get_flops.py
    │       ├── offline_eval.py
    │       └── print_config.py
    │   ├── dataset_converters
    │       ├── common
    │       │   ├── curvedsyntext_converter.py
    │       │   └── extract_kaist.py
    │       ├── kie
    │       │   └── closeset_to_openset.py
    │       ├── prepare_dataset.py
    │       ├── textdet
    │       │   ├── art_converter.py
    │       │   ├── bid_converter.py
    │       │   ├── coco_to_line_dict.py
    │       │   ├── cocotext_converter.py
    │       │   ├── data_migrator.py
    │       │   ├── detext_converter.py
    │       │   ├── funsd_converter.py
    │       │   ├── hiertext_converter.py
    │       │   ├── ic11_converter.py
    │       │   ├── ilst_converter.py
    │       │   ├── imgur_converter.py
    │       │   ├── kaist_converter.py
    │       │   ├── lsvt_converter.py
    │       │   ├── lv_converter.py
    │       │   ├── mtwi_converter.py
    │       │   ├── naf_converter.py
    │       │   ├── rctw_converter.py
    │       │   ├── rects_converter.py
    │       │   ├── sroie_converter.py
    │       │   └── vintext_converter.py
    │       └── textrecog
    │       │   ├── art_converter.py
    │       │   ├── bid_converter.py
    │       │   ├── cocotext_converter.py
    │       │   ├── data_migrator.py
    │       │   ├── detext_converter.py
    │       │   ├── funsd_converter.py
    │       │   ├── hiertext_converter.py
    │       │   ├── ic11_converter.py
    │       │   ├── ilst_converter.py
    │       │   ├── imgur_converter.py
    │       │   ├── kaist_converter.py
    │       │   ├── lmdb_converter.py
    │       │   ├── lsvt_converter.py
    │       │   ├── lv_converter.py
    │       │   ├── mtwi_converter.py
    │       │   ├── naf_converter.py
    │       │   ├── openvino_converter.py
    │       │   ├── rctw_converter.py
    │       │   ├── rects_converter.py
    │       │   ├── sroie_converter.py
    │       │   └── vintext_converter.py
    │   ├── dist_test.sh
    │   ├── dist_train.sh
    │   ├── infer.py
    │   ├── model_converters
    │       └── publish_model.py
    │   ├── slurm_test.sh
    │   ├── slurm_train.sh
    │   ├── test.py
    │   └── train.py
└── textfussion
    ├── CITATION.cff
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── LICENSE
    ├── MANIFEST.in
    ├── Makefile
    ├── PHILOSOPHY.md
    ├── README.md
    ├── _typos.toml
    ├── build
        └── lib
        │   └── diffusers
        │       ├── __init__.py
        │       ├── commands
        │           ├── __init__.py
        │           ├── diffusers_cli.py
        │           └── env.py
        │       ├── configuration_utils.py
        │       ├── dependency_versions_check.py
        │       ├── dependency_versions_table.py
        │       ├── experimental
        │           ├── __init__.py
        │           └── rl
        │           │   ├── __init__.py
        │           │   └── value_guided_sampling.py
        │       ├── image_processor.py
        │       ├── loaders.py
        │       ├── models
        │           ├── __init__.py
        │           ├── attention.py
        │           ├── attention_flax.py
        │           ├── attention_processor.py
        │           ├── autoencoder_kl.py
        │           ├── controlnet.py
        │           ├── controlnet_flax.py
        │           ├── cross_attention.py
        │           ├── dual_transformer_2d.py
        │           ├── embeddings.py
        │           ├── embeddings_flax.py
        │           ├── modeling_flax_pytorch_utils.py
        │           ├── modeling_flax_utils.py
        │           ├── modeling_pytorch_flax_utils.py
        │           ├── modeling_utils.py
        │           ├── prior_transformer.py
        │           ├── resnet.py
        │           ├── resnet_flax.py
        │           ├── t5_film_transformer.py
        │           ├── transformer_2d.py
        │           ├── transformer_temporal.py
        │           ├── unet_1d.py
        │           ├── unet_1d_blocks.py
        │           ├── unet_2d.py
        │           ├── unet_2d_blocks.py
        │           ├── unet_2d_blocks_flax.py
        │           ├── unet_2d_condition.py
        │           ├── unet_2d_condition_flax.py
        │           ├── unet_3d_blocks.py
        │           ├── unet_3d_condition.py
        │           ├── vae.py
        │           ├── vae_flax.py
        │           └── vq_model.py
        │       ├── optimization.py
        │       ├── pipeline_utils.py
        │       ├── pipelines
        │           ├── __init__.py
        │           ├── alt_diffusion
        │           │   ├── __init__.py
        │           │   ├── modeling_roberta_series.py
        │           │   ├── pipeline_alt_diffusion.py
        │           │   └── pipeline_alt_diffusion_img2img.py
        │           ├── audio_diffusion
        │           │   ├── __init__.py
        │           │   ├── mel.py
        │           │   └── pipeline_audio_diffusion.py
        │           ├── audioldm
        │           │   ├── __init__.py
        │           │   └── pipeline_audioldm.py
        │           ├── dance_diffusion
        │           │   ├── __init__.py
        │           │   └── pipeline_dance_diffusion.py
        │           ├── ddim
        │           │   ├── __init__.py
        │           │   └── pipeline_ddim.py
        │           ├── ddpm
        │           │   ├── __init__.py
        │           │   └── pipeline_ddpm.py
        │           ├── dit
        │           │   ├── __init__.py
        │           │   └── pipeline_dit.py
        │           ├── latent_diffusion
        │           │   ├── __init__.py
        │           │   ├── pipeline_latent_diffusion.py
        │           │   └── pipeline_latent_diffusion_superresolution.py
        │           ├── latent_diffusion_uncond
        │           │   ├── __init__.py
        │           │   └── pipeline_latent_diffusion_uncond.py
        │           ├── onnx_utils.py
        │           ├── paint_by_example
        │           │   ├── __init__.py
        │           │   ├── image_encoder.py
        │           │   └── pipeline_paint_by_example.py
        │           ├── pipeline_flax_utils.py
        │           ├── pipeline_utils.py
        │           ├── pndm
        │           │   ├── __init__.py
        │           │   └── pipeline_pndm.py
        │           ├── repaint
        │           │   ├── __init__.py
        │           │   └── pipeline_repaint.py
        │           ├── score_sde_ve
        │           │   ├── __init__.py
        │           │   └── pipeline_score_sde_ve.py
        │           ├── semantic_stable_diffusion
        │           │   ├── __init__.py
        │           │   └── pipeline_semantic_stable_diffusion.py
        │           ├── spectrogram_diffusion
        │           │   ├── __init__.py
        │           │   ├── continous_encoder.py
        │           │   ├── midi_utils.py
        │           │   ├── notes_encoder.py
        │           │   └── pipeline_spectrogram_diffusion.py
        │           ├── stable_diffusion
        │           │   ├── __init__.py
        │           │   ├── convert_from_ckpt.py
        │           │   ├── pipeline_cycle_diffusion.py
        │           │   ├── pipeline_flax_stable_diffusion.py
        │           │   ├── pipeline_flax_stable_diffusion_controlnet.py
        │           │   ├── pipeline_flax_stable_diffusion_img2img.py
        │           │   ├── pipeline_flax_stable_diffusion_inpaint.py
        │           │   ├── pipeline_onnx_stable_diffusion.py
        │           │   ├── pipeline_onnx_stable_diffusion_img2img.py
        │           │   ├── pipeline_onnx_stable_diffusion_inpaint.py
        │           │   ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py
        │           │   ├── pipeline_onnx_stable_diffusion_upscale.py
        │           │   ├── pipeline_stable_diffusion.py
        │           │   ├── pipeline_stable_diffusion_attend_and_excite.py
        │           │   ├── pipeline_stable_diffusion_controlnet.py
        │           │   ├── pipeline_stable_diffusion_depth2img.py
        │           │   ├── pipeline_stable_diffusion_image_variation.py
        │           │   ├── pipeline_stable_diffusion_img2img.py
        │           │   ├── pipeline_stable_diffusion_inpaint.py
        │           │   ├── pipeline_stable_diffusion_inpaint_legacy.py
        │           │   ├── pipeline_stable_diffusion_instruct_pix2pix.py
        │           │   ├── pipeline_stable_diffusion_k_diffusion.py
        │           │   ├── pipeline_stable_diffusion_latent_upscale.py
        │           │   ├── pipeline_stable_diffusion_model_editing.py
        │           │   ├── pipeline_stable_diffusion_panorama.py
        │           │   ├── pipeline_stable_diffusion_pix2pix_zero.py
        │           │   ├── pipeline_stable_diffusion_sag.py
        │           │   ├── pipeline_stable_diffusion_upscale.py
        │           │   ├── pipeline_stable_unclip.py
        │           │   ├── pipeline_stable_unclip_img2img.py
        │           │   ├── safety_checker.py
        │           │   ├── safety_checker_flax.py
        │           │   └── stable_unclip_image_normalizer.py
        │           ├── stable_diffusion_safe
        │           │   ├── __init__.py
        │           │   ├── pipeline_stable_diffusion_safe.py
        │           │   └── safety_checker.py
        │           ├── stochastic_karras_ve
        │           │   ├── __init__.py
        │           │   └── pipeline_stochastic_karras_ve.py
        │           ├── text_to_video_synthesis
        │           │   ├── __init__.py
        │           │   └── pipeline_text_to_video_synth.py
        │           ├── unclip
        │           │   ├── __init__.py
        │           │   ├── pipeline_unclip.py
        │           │   ├── pipeline_unclip_image_variation.py
        │           │   └── text_proj.py
        │           ├── versatile_diffusion
        │           │   ├── __init__.py
        │           │   ├── modeling_text_unet.py
        │           │   ├── pipeline_versatile_diffusion.py
        │           │   ├── pipeline_versatile_diffusion_dual_guided.py
        │           │   ├── pipeline_versatile_diffusion_image_variation.py
        │           │   └── pipeline_versatile_diffusion_text_to_image.py
        │           └── vq_diffusion
        │           │   ├── __init__.py
        │           │   └── pipeline_vq_diffusion.py
        │       ├── schedulers
        │           ├── __init__.py
        │           ├── scheduling_ddim.py
        │           ├── scheduling_ddim_flax.py
        │           ├── scheduling_ddim_inverse.py
        │           ├── scheduling_ddpm.py
        │           ├── scheduling_ddpm_flax.py
        │           ├── scheduling_deis_multistep.py
        │           ├── scheduling_dpmsolver_multistep.py
        │           ├── scheduling_dpmsolver_multistep_flax.py
        │           ├── scheduling_dpmsolver_singlestep.py
        │           ├── scheduling_euler_ancestral_discrete.py
        │           ├── scheduling_euler_discrete.py
        │           ├── scheduling_heun_discrete.py
        │           ├── scheduling_ipndm.py
        │           ├── scheduling_k_dpm_2_ancestral_discrete.py
        │           ├── scheduling_k_dpm_2_discrete.py
        │           ├── scheduling_karras_ve.py
        │           ├── scheduling_karras_ve_flax.py
        │           ├── scheduling_lms_discrete.py
        │           ├── scheduling_lms_discrete_flax.py
        │           ├── scheduling_pndm.py
        │           ├── scheduling_pndm_flax.py
        │           ├── scheduling_repaint.py
        │           ├── scheduling_sde_ve.py
        │           ├── scheduling_sde_ve_flax.py
        │           ├── scheduling_sde_vp.py
        │           ├── scheduling_unclip.py
        │           ├── scheduling_unipc_multistep.py
        │           ├── scheduling_utils.py
        │           ├── scheduling_utils_flax.py
        │           └── scheduling_vq_diffusion.py
        │       ├── training_utils.py
        │       └── utils
        │           ├── __init__.py
        │           ├── accelerate_utils.py
        │           ├── constants.py
        │           ├── deprecation_utils.py
        │           ├── doc_utils.py
        │           ├── dummy_flax_and_transformers_objects.py
        │           ├── dummy_flax_objects.py
        │           ├── dummy_note_seq_objects.py
        │           ├── dummy_onnx_objects.py
        │           ├── dummy_pt_objects.py
        │           ├── dummy_torch_and_librosa_objects.py
        │           ├── dummy_torch_and_scipy_objects.py
        │           ├── dummy_torch_and_transformers_and_k_diffusion_objects.py
        │           ├── dummy_torch_and_transformers_and_onnx_objects.py
        │           ├── dummy_torch_and_transformers_objects.py
        │           ├── dummy_transformers_and_torch_and_note_seq_objects.py
        │           ├── dynamic_modules_utils.py
        │           ├── hub_utils.py
        │           ├── import_utils.py
        │           ├── logging.py
        │           ├── model_card_template.md
        │           ├── outputs.py
        │           ├── pil_utils.py
        │           ├── testing_utils.py
        │           └── torch_utils.py
    ├── docker
        ├── diffusers-flax-cpu
        │   └── Dockerfile
        ├── diffusers-flax-tpu
        │   └── Dockerfile
        ├── diffusers-onnxruntime-cpu
        │   └── Dockerfile
        ├── diffusers-onnxruntime-cuda
        │   └── Dockerfile
        ├── diffusers-pytorch-cpu
        │   └── Dockerfile
        └── diffusers-pytorch-cuda
        │   └── Dockerfile
    ├── docs
        ├── README.md
        ├── TRANSLATING.md
        └── source
        │   ├── _config.py
        │   ├── en
        │       ├── _toctree.yml
        │       ├── api
        │       │   ├── configuration.mdx
        │       │   ├── diffusion_pipeline.mdx
        │       │   ├── experimental
        │       │   │   └── rl.mdx
        │       │   ├── loaders.mdx
        │       │   ├── logging.mdx
        │       │   ├── models.mdx
        │       │   ├── outputs.mdx
        │       │   ├── pipelines
        │       │   │   ├── alt_diffusion.mdx
        │       │   │   ├── audio_diffusion.mdx
        │       │   │   ├── audioldm.mdx
        │       │   │   ├── cycle_diffusion.mdx
        │       │   │   ├── dance_diffusion.mdx
        │       │   │   ├── ddim.mdx
        │       │   │   ├── ddpm.mdx
        │       │   │   ├── dit.mdx
        │       │   │   ├── latent_diffusion.mdx
        │       │   │   ├── latent_diffusion_uncond.mdx
        │       │   │   ├── overview.mdx
        │       │   │   ├── paint_by_example.mdx
        │       │   │   ├── pndm.mdx
        │       │   │   ├── repaint.mdx
        │       │   │   ├── score_sde_ve.mdx
        │       │   │   ├── semantic_stable_diffusion.mdx
        │       │   │   ├── spectrogram_diffusion.mdx
        │       │   │   ├── stable_diffusion
        │       │   │   │   ├── attend_and_excite.mdx
        │       │   │   │   ├── controlnet.mdx
        │       │   │   │   ├── depth2img.mdx
        │       │   │   │   ├── image_variation.mdx
        │       │   │   │   ├── img2img.mdx
        │       │   │   │   ├── inpaint.mdx
        │       │   │   │   ├── latent_upscale.mdx
        │       │   │   │   ├── model_editing.mdx
        │       │   │   │   ├── overview.mdx
        │       │   │   │   ├── panorama.mdx
        │       │   │   │   ├── pix2pix.mdx
        │       │   │   │   ├── pix2pix_zero.mdx
        │       │   │   │   ├── self_attention_guidance.mdx
        │       │   │   │   ├── text2img.mdx
        │       │   │   │   └── upscale.mdx
        │       │   │   ├── stable_diffusion_2.mdx
        │       │   │   ├── stable_diffusion_safe.mdx
        │       │   │   ├── stable_unclip.mdx
        │       │   │   ├── stochastic_karras_ve.mdx
        │       │   │   ├── text_to_video.mdx
        │       │   │   ├── unclip.mdx
        │       │   │   ├── versatile_diffusion.mdx
        │       │   │   └── vq_diffusion.mdx
        │       │   └── schedulers
        │       │   │   ├── ddim.mdx
        │       │   │   ├── ddim_inverse.mdx
        │       │   │   ├── ddpm.mdx
        │       │   │   ├── deis.mdx
        │       │   │   ├── dpm_discrete.mdx
        │       │   │   ├── dpm_discrete_ancestral.mdx
        │       │   │   ├── euler.mdx
        │       │   │   ├── euler_ancestral.mdx
        │       │   │   ├── heun.mdx
        │       │   │   ├── ipndm.mdx
        │       │   │   ├── lms_discrete.mdx
        │       │   │   ├── multistep_dpm_solver.mdx
        │       │   │   ├── overview.mdx
        │       │   │   ├── pndm.mdx
        │       │   │   ├── repaint.mdx
        │       │   │   ├── score_sde_ve.mdx
        │       │   │   ├── score_sde_vp.mdx
        │       │   │   ├── singlestep_dpm_solver.mdx
        │       │   │   ├── stochastic_karras_ve.mdx
        │       │   │   ├── unipc.mdx
        │       │   │   └── vq_diffusion.mdx
        │       ├── conceptual
        │       │   ├── contribution.mdx
        │       │   ├── ethical_guidelines.mdx
        │       │   ├── evaluation.mdx
        │       │   └── philosophy.mdx
        │       ├── imgs
        │       │   ├── access_request.png
        │       │   └── diffusers_library.jpg
        │       ├── index.mdx
        │       ├── installation.mdx
        │       ├── optimization
        │       │   ├── fp16.mdx
        │       │   ├── habana.mdx
        │       │   ├── mps.mdx
        │       │   ├── onnx.mdx
        │       │   ├── open_vino.mdx
        │       │   ├── opt_overview.mdx
        │       │   ├── torch2.0.mdx
        │       │   └── xformers.mdx
        │       ├── quicktour.mdx
        │       ├── stable_diffusion.mdx
        │       ├── training
        │       │   ├── controlnet.mdx
        │       │   ├── dreambooth.mdx
        │       │   ├── instructpix2pix.mdx
        │       │   ├── lora.mdx
        │       │   ├── overview.mdx
        │       │   ├── text2image.mdx
        │       │   ├── text_inversion.mdx
        │       │   └── unconditional_training.mdx
        │       ├── tutorials
        │       │   ├── basic_training.mdx
        │       │   └── tutorial_overview.mdx
        │       └── using-diffusers
        │       │   ├── audio.mdx
        │       │   ├── conditional_image_generation.mdx
        │       │   ├── contribute_pipeline.mdx
        │       │   ├── controlling_generation.mdx
        │       │   ├── custom_pipeline_examples.mdx
        │       │   ├── custom_pipeline_overview.mdx
        │       │   ├── depth2img.mdx
        │       │   ├── img2img.mdx
        │       │   ├── inpaint.mdx
        │       │   ├── kerascv.mdx
        │       │   ├── loading.mdx
        │       │   ├── loading_overview.mdx
        │       │   ├── other-modalities.mdx
        │       │   ├── pipeline_overview.mdx
        │       │   ├── reproducibility.mdx
        │       │   ├── reusing_seeds.mdx
        │       │   ├── rl.mdx
        │       │   ├── schedulers.mdx
        │       │   ├── stable_diffusion_jax_how_to.mdx
        │       │   ├── unconditional_image_generation.mdx
        │       │   ├── using_safetensors
        │       │   ├── using_safetensors.mdx
        │       │   ├── weighted_prompts.mdx
        │       │   └── write_own_pipeline.mdx
        │   ├── ko
        │       ├── _toctree.yml
        │       ├── in_translation.mdx
        │       ├── index.mdx
        │       ├── installation.mdx
        │       └── quicktour.mdx
        │   └── zh
        │       ├── _toctree.yml
        │       ├── index.mdx
        │       ├── installation.mdx
        │       └── quicktour.mdx
    ├── examples
        ├── README.md
        ├── community
        │   ├── README.md
        │   ├── bit_diffusion.py
        │   ├── checkpoint_merger.py
        │   ├── clip_guided_stable_diffusion.py
        │   ├── clip_guided_stable_diffusion_img2img.py
        │   ├── composable_stable_diffusion.py
        │   ├── ddim_noise_comparative_analysis.py
        │   ├── imagic_stable_diffusion.py
        │   ├── img2img_inpainting.py
        │   ├── interpolate_stable_diffusion.py
        │   ├── lpw_stable_diffusion.py
        │   ├── lpw_stable_diffusion_onnx.py
        │   ├── magic_mix.py
        │   ├── multilingual_stable_diffusion.py
        │   ├── one_step_unet.py
        │   ├── sd_text2img_k_diffusion.py
        │   ├── seed_resize_stable_diffusion.py
        │   ├── speech_to_image_diffusion.py
        │   ├── stable_diffusion_comparison.py
        │   ├── stable_diffusion_controlnet_img2img.py
        │   ├── stable_diffusion_controlnet_inpaint.py
        │   ├── stable_diffusion_controlnet_inpaint_img2img.py
        │   ├── stable_diffusion_mega.py
        │   ├── stable_unclip.py
        │   ├── text_inpainting.py
        │   ├── tiled_upscaling.py
        │   ├── unclip_image_interpolation.py
        │   ├── unclip_text_interpolation.py
        │   └── wildcard_stable_diffusion.py
        ├── conftest.py
        ├── controlnet
        │   ├── README.md
        │   ├── requirements.txt
        │   ├── requirements_flax.txt
        │   ├── train_controlnet.py
        │   └── train_controlnet_flax.py
        ├── dreambooth
        │   ├── README.md
        │   ├── requirements.txt
        │   ├── requirements_flax.txt
        │   ├── train_dreambooth.py
        │   ├── train_dreambooth_flax.py
        │   └── train_dreambooth_lora.py
        ├── inference
        │   ├── README.md
        │   ├── image_to_image.py
        │   └── inpainting.py
        ├── instruct_pix2pix
        │   ├── README.md
        │   ├── requirements.txt
        │   └── train_instruct_pix2pix.py
        ├── research_projects
        │   ├── README.md
        │   ├── colossalai
        │   │   ├── README.md
        │   │   ├── inference.py
        │   │   ├── requirement.txt
        │   │   └── train_dreambooth_colossalai.py
        │   ├── dreambooth_inpaint
        │   │   ├── README.md
        │   │   ├── requirements.txt
        │   │   ├── train_dreambooth_inpaint.py
        │   │   └── train_dreambooth_inpaint_lora.py
        │   ├── intel_opts
        │   │   ├── README.md
        │   │   ├── inference_bf16.py
        │   │   └── textual_inversion
        │   │   │   ├── README.md
        │   │   │   ├── requirements.txt
        │   │   │   └── textual_inversion_bf16.py
        │   ├── lora
        │   │   ├── README.md
        │   │   ├── requirements.txt
        │   │   └── train_text_to_image_lora.py
        │   ├── mulit_token_textual_inversion
        │   │   ├── README.md
        │   │   ├── multi_token_clip.py
        │   │   ├── requirements.txt
        │   │   ├── requirements_flax.txt
        │   │   ├── textual_inversion.py
        │   │   └── textual_inversion_flax.py
        │   ├── multi_subject_dreambooth
        │   │   ├── README.md
        │   │   ├── requirements.txt
        │   │   └── train_multi_subject_dreambooth.py
        │   └── onnxruntime
        │   │   ├── README.md
        │   │   ├── text_to_image
        │   │       ├── README.md
        │   │       ├── requirements.txt
        │   │       └── train_text_to_image.py
        │   │   ├── textual_inversion
        │   │       ├── README.md
        │   │       ├── requirements.txt
        │   │       └── textual_inversion.py
        │   │   └── unconditional_image_generation
        │   │       ├── README.md
        │   │       ├── requirements.txt
        │   │       └── train_unconditional.py
        ├── rl
        │   ├── README.md
        │   └── run_diffuser_locomotion.py
        ├── test_examples.py
        ├── text_to_image
        │   ├── README.md
        │   ├── requirements.txt
        │   ├── requirements_flax.txt
        │   ├── train_text_to_image.py
        │   ├── train_text_to_image_flax.py
        │   └── train_text_to_image_lora.py
        ├── textual_inversion
        │   ├── README.md
        │   ├── requirements.txt
        │   ├── requirements_flax.txt
        │   ├── textual_inversion.py
        │   └── textual_inversion_flax.py
        └── unconditional_image_generation
        │   ├── README.md
        │   ├── requirements.txt
        │   └── train_unconditional.py
    ├── my_inpainting
        ├── label_list.json
        ├── my_build_synth_data_baseline.py
        ├── my_build_synth_data_baseline_large_num.py
        ├── my_test_inpainting_baseline.py
        ├── my_test_inpainting_baseline_batch_test.py
        ├── my_test_inpainting_char.py
        ├── my_test_inpainting_char_multi.py
        ├── my_test_inpainting_only_pre_prompt_word_multi.py
        ├── my_test_inpainting_pure_word_prefix_prompt_batch_test.py
        ├── my_test_inpainting_with_adapter_char_multi.py
        ├── my_test_inpainting_with_adapter_with_fussion_te_word_multi.py
        ├── my_test_inpainting_with_adapter_with_pre_prompt_word_multi.py
        ├── my_test_inpainting_with_adapter_word_multi.py
        ├── my_test_inpainting_with_adapter_zero_prompt_char_multi.py
        ├── my_test_inpainting_with_char_adapter_char_multi.py
        ├── my_test_inpainting_with_controlnet_batch_test.py
        ├── my_test_inpainting_with_full_controlnet_batch_test.py
        ├── my_test_inpainting_with_full_controlnet_dual_text_batch_test.py
        ├── my_train_only_pre_prompt.sh
        ├── my_train_prefix_prompt.sh
        ├── my_train_with_adapter_with_pre_prompt.sh
        ├── my_train_with_char_adapter.sh
        ├── my_train_with_controlnet.sh
        ├── my_train_with_full_controlnet.sh
        ├── my_train_with_full_controlnet_with_dual_text.sh
        ├── my_train_with_single_adapter.sh
        ├── my_train_with_single_adapter_with_fussion_te.sh
        ├── new_paradigm_any_demo.py
        ├── new_paradigm_build_baseline.py
        ├── new_paradigm_build_baseline_no_crop.py
        ├── new_paradigm_build_demo.py
        ├── new_paradigm_build_dual_text.py
        ├── new_paradigm_build_with_text_vae.py
        ├── new_paradigm_dual_text_encoder.sh
        ├── new_paradigm_train.sh
        ├── src
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   └── __init__.cpython-39.pyc
        │   ├── build_synth_data
        │   │   ├── __pycache__
        │   │   │   ├── batch_utils.cpython-310.pyc
        │   │   │   ├── crop_tools.cpython-310.pyc
        │   │   │   ├── glyph_utils.cpython-310.pyc
        │   │   │   └── rec_inferencer.cpython-310.pyc
        │   │   ├── batch_utils.py
        │   │   ├── crop_tools.py
        │   │   ├── glyph_utils.py
        │   │   └── rec_inferencer.py
        │   ├── dataset
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-310.pyc
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── __init__.cpython-39.pyc
        │   │   │   ├── base_text_dataset.cpython-310.pyc
        │   │   │   ├── batch_utils.cpython-310.pyc
        │   │   │   ├── crop_image_for_test.cpython-310.pyc
        │   │   │   ├── new_paradigm_text_dataset.cpython-310.pyc
        │   │   │   ├── text_dataset.cpython-310.pyc
        │   │   │   ├── text_dataset.cpython-39.pyc
        │   │   │   ├── text_mapper.cpython-310.pyc
        │   │   │   ├── text_mapper.cpython-38.pyc
        │   │   │   ├── text_mapper.cpython-39.pyc
        │   │   │   ├── utils.cpython-310.pyc
        │   │   │   ├── utils.cpython-39.pyc
        │   │   │   ├── zoom_up_pure_text_dataset.cpython-310.pyc
        │   │   │   ├── zoom_up_text_dataset.cpython-310.pyc
        │   │   │   ├── zoom_up_text_dataset.cpython-38.pyc
        │   │   │   ├── zoom_up_text_dataset.cpython-39.pyc
        │   │   │   └── zoom_up_with_blank_text_dataset.cpython-310.pyc
        │   │   ├── base_text_dataset.py
        │   │   ├── batch_utils.py
        │   │   ├── crop_image_for_test.py
        │   │   ├── new_paradigm_text_dataset.py
        │   │   ├── text_dataset.py
        │   │   ├── text_mapper.py
        │   │   ├── utils.py
        │   │   ├── zoom_up_pure_text_dataset.py
        │   │   ├── zoom_up_text_dataset.py
        │   │   └── zoom_up_with_blank_text_dataset.py
        │   ├── engines
        │   │   ├── __init__.py
        │   │   ├── finetune_text_to_image.py
        │   │   ├── finetune_text_to_image_inpainting.py
        │   │   ├── finetune_text_to_image_inpainting_with_adapter_with_pre_prompt.py
        │   │   ├── finetune_text_to_image_inpainting_with_char_adapter.py
        │   │   ├── finetune_text_to_image_inpainting_with_controlnet.py
        │   │   ├── finetune_text_to_image_inpainting_with_full_controlnet.py
        │   │   ├── finetune_text_to_image_inpainting_with_full_controlnet_with_dual_text.py
        │   │   ├── finetune_text_to_image_inpainting_with_pre_prompt.py
        │   │   ├── finetune_text_to_image_inpainting_with_prefix_prompt.py
        │   │   ├── finetune_text_to_image_inpainting_with_single_adapter.py
        │   │   ├── finetune_text_to_image_inpainting_with_single_adapter_with_fussion_te.py
        │   │   ├── new_paradigm_inpainting.py
        │   │   └── new_paradigm_inpainting_dual_text.py
        │   ├── models
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-310.pyc
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── __init__.cpython-39.pyc
        │   │   │   ├── adapter.cpython-310.pyc
        │   │   │   ├── adapter.cpython-39.pyc
        │   │   │   ├── adapter_with_char_embedding.cpython-39.pyc
        │   │   │   ├── adapter_with_fussion_TE.cpython-310.pyc
        │   │   │   ├── adapter_with_fussion_TE.cpython-38.pyc
        │   │   │   ├── adapter_with_fussion_TE.cpython-39.pyc
        │   │   │   ├── adapter_with_pre_prompt.cpython-310.pyc
        │   │   │   ├── attention.cpython-310.pyc
        │   │   │   ├── char_encoder.cpython-310.pyc
        │   │   │   ├── controlnet.cpython-310.pyc
        │   │   │   ├── dual_controlnet.cpython-310.pyc
        │   │   │   ├── fussion_text_embedding.cpython-39.pyc
        │   │   │   ├── modules.cpython-310.pyc
        │   │   │   ├── only_pre_prompt.cpython-310.pyc
        │   │   │   ├── only_prefix_prompt.cpython-310.pyc
        │   │   │   ├── openaimodel.cpython-310.pyc
        │   │   │   ├── transformer_2d_with_controlnet.cpython-310.pyc
        │   │   │   ├── transformer_2d_with_dual_text_controlnet.cpython-310.pyc
        │   │   │   ├── unet_2d_blocks_with_adapter.cpython-310.pyc
        │   │   │   ├── unet_2d_blocks_with_adapter.cpython-38.pyc
        │   │   │   ├── unet_2d_blocks_with_adapter.cpython-39.pyc
        │   │   │   ├── unet_2d_blocks_with_controlnet.cpython-310.pyc
        │   │   │   ├── unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc
        │   │   │   ├── unet_2d_with_adapter.cpython-310.pyc
        │   │   │   ├── unet_2d_with_adapter.cpython-38.pyc
        │   │   │   ├── unet_2d_with_adapter.cpython-39.pyc
        │   │   │   ├── unet_2d_with_controlnet.cpython-310.pyc
        │   │   │   ├── unet_2d_with_dual_text.cpython-310.pyc
        │   │   │   ├── unet_2d_with_dual_text_controlnet.cpython-310.pyc
        │   │   │   └── union_net.cpython-310.pyc
        │   │   ├── adapter.py
        │   │   ├── adapter_with_char_embedding.py
        │   │   ├── adapter_with_fussion_TE.py
        │   │   ├── adapter_with_pre_prompt.py
        │   │   ├── attention.py
        │   │   ├── char_encoder.py
        │   │   ├── controlnet.py
        │   │   ├── dual_controlnet.py
        │   │   ├── fussion_text_embedding.py
        │   │   ├── modules.py
        │   │   ├── only_pre_prompt.py
        │   │   ├── only_prefix_prompt.py
        │   │   ├── openaimodel.py
        │   │   ├── ori_controlnet.py
        │   │   ├── transformer_2d_with_dual_text_controlnet.py
        │   │   ├── unet_2d_blocks_with_adapter.py
        │   │   ├── unet_2d_blocks_with_controlnet.py
        │   │   ├── unet_2d_blocks_with_dual_text_controlnet.py
        │   │   ├── unet_2d_with_adapter.py
        │   │   ├── unet_2d_with_controlnet.py
        │   │   ├── unet_2d_with_dual_text.py
        │   │   ├── unet_2d_with_dual_text_controlnet.py
        │   │   └── union_net.py
        │   ├── pipelines
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-310.pyc
        │   │   │   ├── __init__.cpython-39.pyc
        │   │   │   ├── new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting.cpython-39.pyc
        │   │   │   ├── stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_full_controlnet.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_only_controlnet.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_text_glyph.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_with_adapter.cpython-39.pyc
        │   │   │   ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc
        │   │   │   ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc
        │   │   │   ├── stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc
        │   │   │   └── stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc
        │   │   ├── new_paradigm_inpainting_dual_text_encoder.py
        │   │   ├── stable_diffusion_inpainting.py
        │   │   ├── stable_diffusion_inpainting_dual_text_full_controlnet.py
        │   │   ├── stable_diffusion_inpainting_full_controlnet.py
        │   │   ├── stable_diffusion_inpainting_mask_controlnet.py
        │   │   ├── stable_diffusion_inpainting_only_controlnet.py
        │   │   ├── stable_diffusion_inpainting_only_pre_prompt.py
        │   │   ├── stable_diffusion_inpainting_only_prefix_prompt.py
        │   │   ├── stable_diffusion_inpainting_text_glyph.py
        │   │   ├── stable_diffusion_inpainting_text_vae_text_glyph.py
        │   │   ├── stable_diffusion_inpainting_with_adapter.py
        │   │   ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.py
        │   │   ├── stable_diffusion_inpainting_with_adapter_zero_prompt.py
        │   │   └── stable_diffusion_inpainting_with_char_adapter.py
        │   └── utils
        │   │   ├── ori.png
        │   │   ├── output.png
        │   │   ├── res.png
        │   │   ├── res_area.png
        │   │   ├── res_trilinear.png
        │   │   └── vis_mask.py
        ├── train_vae.py
        └── train_vae.sh
    ├── my_pipeline.py
    ├── pyproject.toml
    ├── requirements.txt
    ├── scripts
        ├── __init__.py
        ├── change_naming_configs_and_checkpoints.py
        ├── conversion_ldm_uncond.py
        ├── convert_dance_diffusion_to_diffusers.py
        ├── convert_ddpm_original_checkpoint_to_diffusers.py
        ├── convert_diffusers_to_original_stable_diffusion.py
        ├── convert_dit_to_diffusers.py
        ├── convert_k_upscaler_to_diffusers.py
        ├── convert_kakao_brain_unclip_to_diffusers.py
        ├── convert_ldm_original_checkpoint_to_diffusers.py
        ├── convert_lora_safetensor_to_diffusers.py
        ├── convert_models_diffuser_to_diffusers.py
        ├── convert_ms_text_to_video_to_diffusers.py
        ├── convert_music_spectrogram_to_diffusers.py
        ├── convert_ncsnpp_original_checkpoint_to_diffusers.py
        ├── convert_original_audioldm_to_diffusers.py
        ├── convert_original_controlnet_to_diffusers.py
        ├── convert_original_stable_diffusion_to_diffusers.py
        ├── convert_stable_diffusion_checkpoint_to_onnx.py
        ├── convert_unclip_txt2img_to_image_variation.py
        ├── convert_vae_diff_to_onnx.py
        ├── convert_vae_pt_to_diffusers.py
        ├── convert_versatile_diffusion_to_diffusers.py
        ├── convert_vq_diffusion_to_diffusers.py
        └── generate_logits.py
    ├── setup.cfg
    ├── setup.py
    ├── src
        ├── diffusers.egg-info
        │   ├── PKG-INFO
        │   ├── SOURCES.txt
        │   ├── dependency_links.txt
        │   ├── entry_points.txt
        │   ├── requires.txt
        │   └── top_level.txt
        └── diffusers
        │   ├── __init__.py
        │   ├── commands
        │       ├── __init__.py
        │       ├── diffusers_cli.py
        │       └── env.py
        │   ├── configuration_utils.py
        │   ├── dependency_versions_check.py
        │   ├── dependency_versions_table.py
        │   ├── experimental
        │       ├── README.md
        │       ├── __init__.py
        │       └── rl
        │       │   ├── __init__.py
        │       │   └── value_guided_sampling.py
        │   ├── image_processor.py
        │   ├── loaders.py
        │   ├── models
        │       ├── README.md
        │       ├── __init__.py
        │       ├── attention.py
        │       ├── attention_flax.py
        │       ├── attention_processor.py
        │       ├── autoencoder_kl.py
        │       ├── controlnet.py
        │       ├── controlnet_flax.py
        │       ├── cross_attention.py
        │       ├── dual_transformer_2d.py
        │       ├── embeddings.py
        │       ├── embeddings_flax.py
        │       ├── modeling_flax_pytorch_utils.py
        │       ├── modeling_flax_utils.py
        │       ├── modeling_pytorch_flax_utils.py
        │       ├── modeling_utils.py
        │       ├── prior_transformer.py
        │       ├── resnet.py
        │       ├── resnet_flax.py
        │       ├── t5_film_transformer.py
        │       ├── transformer_2d.py
        │       ├── transformer_temporal.py
        │       ├── unet_1d.py
        │       ├── unet_1d_blocks.py
        │       ├── unet_2d.py
        │       ├── unet_2d_blocks.py
        │       ├── unet_2d_blocks_flax.py
        │       ├── unet_2d_condition.py
        │       ├── unet_2d_condition_flax.py
        │       ├── unet_3d_blocks.py
        │       ├── unet_3d_condition.py
        │       ├── vae.py
        │       ├── vae_flax.py
        │       └── vq_model.py
        │   ├── optimization.py
        │   ├── pipeline_utils.py
        │   ├── pipelines
        │       ├── README.md
        │       ├── __init__.py
        │       ├── alt_diffusion
        │       │   ├── __init__.py
        │       │   ├── modeling_roberta_series.py
        │       │   ├── pipeline_alt_diffusion.py
        │       │   └── pipeline_alt_diffusion_img2img.py
        │       ├── audio_diffusion
        │       │   ├── __init__.py
        │       │   ├── mel.py
        │       │   └── pipeline_audio_diffusion.py
        │       ├── audioldm
        │       │   ├── __init__.py
        │       │   └── pipeline_audioldm.py
        │       ├── dance_diffusion
        │       │   ├── __init__.py
        │       │   └── pipeline_dance_diffusion.py
        │       ├── ddim
        │       │   ├── __init__.py
        │       │   └── pipeline_ddim.py
        │       ├── ddpm
        │       │   ├── __init__.py
        │       │   └── pipeline_ddpm.py
        │       ├── dit
        │       │   ├── __init__.py
        │       │   └── pipeline_dit.py
        │       ├── latent_diffusion
        │       │   ├── __init__.py
        │       │   ├── pipeline_latent_diffusion.py
        │       │   └── pipeline_latent_diffusion_superresolution.py
        │       ├── latent_diffusion_uncond
        │       │   ├── __init__.py
        │       │   └── pipeline_latent_diffusion_uncond.py
        │       ├── onnx_utils.py
        │       ├── paint_by_example
        │       │   ├── __init__.py
        │       │   ├── image_encoder.py
        │       │   └── pipeline_paint_by_example.py
        │       ├── pipeline_flax_utils.py
        │       ├── pipeline_utils.py
        │       ├── pndm
        │       │   ├── __init__.py
        │       │   └── pipeline_pndm.py
        │       ├── repaint
        │       │   ├── __init__.py
        │       │   └── pipeline_repaint.py
        │       ├── score_sde_ve
        │       │   ├── __init__.py
        │       │   └── pipeline_score_sde_ve.py
        │       ├── semantic_stable_diffusion
        │       │   ├── __init__.py
        │       │   └── pipeline_semantic_stable_diffusion.py
        │       ├── spectrogram_diffusion
        │       │   ├── __init__.py
        │       │   ├── continous_encoder.py
        │       │   ├── midi_utils.py
        │       │   ├── notes_encoder.py
        │       │   └── pipeline_spectrogram_diffusion.py
        │       ├── stable_diffusion
        │       │   ├── README.md
        │       │   ├── __init__.py
        │       │   ├── convert_from_ckpt.py
        │       │   ├── pipeline_cycle_diffusion.py
        │       │   ├── pipeline_flax_stable_diffusion.py
        │       │   ├── pipeline_flax_stable_diffusion_controlnet.py
        │       │   ├── pipeline_flax_stable_diffusion_img2img.py
        │       │   ├── pipeline_flax_stable_diffusion_inpaint.py
        │       │   ├── pipeline_onnx_stable_diffusion.py
        │       │   ├── pipeline_onnx_stable_diffusion_img2img.py
        │       │   ├── pipeline_onnx_stable_diffusion_inpaint.py
        │       │   ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py
        │       │   ├── pipeline_onnx_stable_diffusion_upscale.py
        │       │   ├── pipeline_stable_diffusion.py
        │       │   ├── pipeline_stable_diffusion_attend_and_excite.py
        │       │   ├── pipeline_stable_diffusion_controlnet.py
        │       │   ├── pipeline_stable_diffusion_depth2img.py
        │       │   ├── pipeline_stable_diffusion_image_variation.py
        │       │   ├── pipeline_stable_diffusion_img2img.py
        │       │   ├── pipeline_stable_diffusion_inpaint.py
        │       │   ├── pipeline_stable_diffusion_inpaint_legacy.py
        │       │   ├── pipeline_stable_diffusion_instruct_pix2pix.py
        │       │   ├── pipeline_stable_diffusion_k_diffusion.py
        │       │   ├── pipeline_stable_diffusion_latent_upscale.py
        │       │   ├── pipeline_stable_diffusion_model_editing.py
        │       │   ├── pipeline_stable_diffusion_panorama.py
        │       │   ├── pipeline_stable_diffusion_pix2pix_zero.py
        │       │   ├── pipeline_stable_diffusion_sag.py
        │       │   ├── pipeline_stable_diffusion_upscale.py
        │       │   ├── pipeline_stable_unclip.py
        │       │   ├── pipeline_stable_unclip_img2img.py
        │       │   ├── safety_checker.py
        │       │   ├── safety_checker_flax.py
        │       │   └── stable_unclip_image_normalizer.py
        │       ├── stable_diffusion_safe
        │       │   ├── __init__.py
        │       │   ├── pipeline_stable_diffusion_safe.py
        │       │   └── safety_checker.py
        │       ├── stochastic_karras_ve
        │       │   ├── __init__.py
        │       │   └── pipeline_stochastic_karras_ve.py
        │       ├── text_to_video_synthesis
        │       │   ├── __init__.py
        │       │   └── pipeline_text_to_video_synth.py
        │       ├── unclip
        │       │   ├── __init__.py
        │       │   ├── pipeline_unclip.py
        │       │   ├── pipeline_unclip_image_variation.py
        │       │   └── text_proj.py
        │       ├── versatile_diffusion
        │       │   ├── __init__.py
        │       │   ├── modeling_text_unet.py
        │       │   ├── pipeline_versatile_diffusion.py
        │       │   ├── pipeline_versatile_diffusion_dual_guided.py
        │       │   ├── pipeline_versatile_diffusion_image_variation.py
        │       │   └── pipeline_versatile_diffusion_text_to_image.py
        │       └── vq_diffusion
        │       │   ├── __init__.py
        │       │   └── pipeline_vq_diffusion.py
        │   ├── schedulers
        │       ├── README.md
        │       ├── __init__.py
        │       ├── scheduling_ddim.py
        │       ├── scheduling_ddim_flax.py
        │       ├── scheduling_ddim_inverse.py
        │       ├── scheduling_ddpm.py
        │       ├── scheduling_ddpm_flax.py
        │       ├── scheduling_deis_multistep.py
        │       ├── scheduling_dpmsolver_multistep.py
        │       ├── scheduling_dpmsolver_multistep_flax.py
        │       ├── scheduling_dpmsolver_singlestep.py
        │       ├── scheduling_euler_ancestral_discrete.py
        │       ├── scheduling_euler_discrete.py
        │       ├── scheduling_heun_discrete.py
        │       ├── scheduling_ipndm.py
        │       ├── scheduling_k_dpm_2_ancestral_discrete.py
        │       ├── scheduling_k_dpm_2_discrete.py
        │       ├── scheduling_karras_ve.py
        │       ├── scheduling_karras_ve_flax.py
        │       ├── scheduling_lms_discrete.py
        │       ├── scheduling_lms_discrete_flax.py
        │       ├── scheduling_pndm.py
        │       ├── scheduling_pndm_flax.py
        │       ├── scheduling_repaint.py
        │       ├── scheduling_sde_ve.py
        │       ├── scheduling_sde_ve_flax.py
        │       ├── scheduling_sde_vp.py
        │       ├── scheduling_unclip.py
        │       ├── scheduling_unipc_multistep.py
        │       ├── scheduling_utils.py
        │       ├── scheduling_utils_flax.py
        │       └── scheduling_vq_diffusion.py
        │   ├── training_utils.py
        │   └── utils
        │       ├── __init__.py
        │       ├── accelerate_utils.py
        │       ├── constants.py
        │       ├── deprecation_utils.py
        │       ├── doc_utils.py
        │       ├── dummy_flax_and_transformers_objects.py
        │       ├── dummy_flax_objects.py
        │       ├── dummy_note_seq_objects.py
        │       ├── dummy_onnx_objects.py
        │       ├── dummy_pt_objects.py
        │       ├── dummy_torch_and_librosa_objects.py
        │       ├── dummy_torch_and_scipy_objects.py
        │       ├── dummy_torch_and_transformers_and_k_diffusion_objects.py
        │       ├── dummy_torch_and_transformers_and_onnx_objects.py
        │       ├── dummy_torch_and_transformers_objects.py
        │       ├── dummy_transformers_and_torch_and_note_seq_objects.py
        │       ├── dynamic_modules_utils.py
        │       ├── hub_utils.py
        │       ├── import_utils.py
        │       ├── logging.py
        │       ├── model_card_template.md
        │       ├── outputs.py
        │       ├── pil_utils.py
        │       ├── testing_utils.py
        │       └── torch_utils.py
    ├── tests
    └── utils
        ├── check_config_docstrings.py
        ├── check_copies.py
        ├── check_doc_toc.py
        ├── check_dummies.py
        ├── check_inits.py
        ├── check_repo.py
        ├── check_table.py
        ├── custom_init_isort.py
        ├── get_modified_files.py
        ├── overwrite_expected_slice.py
        ├── print_env.py
        ├── release.py
        └── stale.py


/images/framwork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/images/framwork.png


--------------------------------------------------------------------------------
/mmocr/.circleci/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH="1.8.1"
 2 | ARG CUDA="10.2"
 3 | ARG CUDNN="7"
 4 | 
 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 6 | 
 7 | # To fix GPG key error when running apt-get update
 8 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
 9 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
10 | 
11 | RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx
12 | 


--------------------------------------------------------------------------------
/mmocr/.codespellrc:
--------------------------------------------------------------------------------
1 | [codespell]
2 | skip = *.ipynb
3 | count =
4 | quiet-level = 3
5 | ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist,ned
6 | 


--------------------------------------------------------------------------------
/mmocr/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     */__init__.py
4 | 


--------------------------------------------------------------------------------
/mmocr/.dev_scripts/benchmark_options.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 
3 | third_part_libs = [
4 |     'pip install -r ../requirements/albu.txt',
5 | ]
6 | 
7 | default_floating_range = 0.5
8 | 


--------------------------------------------------------------------------------
/mmocr/.dev_scripts/benchmark_train_models.txt:
--------------------------------------------------------------------------------
 1 | textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
 2 | textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
 3 | textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
 4 | textrecog/abinet/abinet-vision_20e_st-an_mj.py
 5 | textrecog/crnn/crnn_mini-vgg_5e_mj.py
 6 | textrecog/aster/aster_resnet45_6e_st_mj.py
 7 | textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
 8 | textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
 9 | textrecog/svtr/svtr-small_20e_st_mj.py
10 | 


--------------------------------------------------------------------------------
/mmocr/.dev_scripts/covignore.cfg:
--------------------------------------------------------------------------------
 1 | # Each line should be the relative path to the root directory
 2 | # of this repo. Support regular expression as well.
 3 | # For example:
 4 | # mmocr/models/textdet/postprocess/utils.py
 5 | # .*/utils.py
 6 | .*/__init__.py
 7 | 
 8 | # It will be removed after all models have been refactored
 9 | mmocr/utils/bbox_utils.py
10 | 
11 | # Major part is covered, however, it's hard to cover model's output.
12 | mmocr/models/textdet/detectors/mmdet_wrapper.py
13 | 
14 | # It will be removed after KieVisualizer and TextSpotterVisualizer
15 | mmocr/visualization/visualize.py
16 | 
17 | # Add tests for data preparers later
18 | mmocr/datasets/preparers
19 | 


--------------------------------------------------------------------------------
/mmocr/.owners.yml:
--------------------------------------------------------------------------------
 1 | assign:
 2 |   strategy:
 3 |     random
 4 |     # daily-shift-based
 5 |   scedule:
 6 |     '*/1 * * * *'
 7 |   assignees:
 8 |     - gaotongxiao
 9 |     - Harold-lkk
10 | 


--------------------------------------------------------------------------------
/mmocr/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | formats: all
 4 | 
 5 | python:
 6 |     version: 3.7
 7 |     install:
 8 |       - requirements: requirements/docs.txt
 9 |       - requirements: requirements/readthedocs.txt
10 | 


--------------------------------------------------------------------------------
/mmocr/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | title: "OpenMMLab Text Detection, Recognition and Understanding Toolbox"
 4 | authors:
 5 |   - name: "MMOCR Contributors"
 6 | version: 0.3.0
 7 | date-released: 2020-08-15
 8 | repository-code: "https://github.com/open-mmlab/mmocr"
 9 | license: Apache-2.0
10 | 


--------------------------------------------------------------------------------
/mmocr/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements/*.txt
2 | include mmocr/.mim/model-index.yml
3 | include mmocr/.mim/dicts/*.txt
4 | recursive-include mmocr/.mim/configs *.py *.yml
5 | recursive-include mmocr/.mim/tools *.sh *.py
6 | 


--------------------------------------------------------------------------------
/mmocr/README.md:
--------------------------------------------------------------------------------
1 | 该项目基于mmocr框架，请依据requirements.txt搭建环境
2 | 
3 | 在configs/textdet中配置好文本图像数据集的路径后，使用./my_train.sh脚本进行检测实验
4 | 


--------------------------------------------------------------------------------
/mmocr/configs/backbone/oclip/metafile.yml:
--------------------------------------------------------------------------------
 1 | Collections:
 2 | - Name: oCLIP
 3 |   Metadata:
 4 |     Training Data: SynthText
 5 |     Architecture:
 6 |       - CLIPResNet
 7 |   Paper:
 8 |     URL: https://arxiv.org/abs/2203.03911
 9 |     Title: 'Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting'
10 |   README: configs/backbone/oclip/README.md
11 | 
12 | Models:
13 |   Weights: https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth
14 | 


--------------------------------------------------------------------------------
/mmocr/configs/kie/_base_/datasets/wildreceipt.py:
--------------------------------------------------------------------------------
 1 | wildreceipt_data_root = 'data/wildreceipt/'
 2 | 
 3 | wildreceipt_train = dict(
 4 |     type='WildReceiptDataset',
 5 |     data_root=wildreceipt_data_root,
 6 |     metainfo=wildreceipt_data_root + 'class_list.txt',
 7 |     ann_file='train.txt',
 8 |     pipeline=None)
 9 | 
10 | wildreceipt_test = dict(
11 |     type='WildReceiptDataset',
12 |     data_root=wildreceipt_data_root,
13 |     metainfo=wildreceipt_data_root + 'class_list.txt',
14 |     ann_file='test.txt',
15 |     test_mode=True,
16 |     pipeline=None)
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/kie/_base_/schedules/schedule_adam_60e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(
 3 |     type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001))
 4 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1)
 5 | val_cfg = dict(type='ValLoop')
 6 | test_cfg = dict(type='TestLoop')
 7 | # learning rate
 8 | param_scheduler = [
 9 |     dict(type='MultiStepLR', milestones=[40, 50], end=60),
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/ctw1500.py:
--------------------------------------------------------------------------------
 1 | ctw1500_textdet_data_root = 'data/ctw1500'
 2 | 
 3 | ctw1500_textdet_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=ctw1500_textdet_data_root,
 6 |     ann_file='textdet_train.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | ctw1500_textdet_test = dict(
11 |     type='OCRDataset',
12 |     data_root=ctw1500_textdet_data_root,
13 |     ann_file='textdet_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
 1 | icdar2013_textdet_data_root = 'data/icdar2013'
 2 | 
 3 | icdar2013_textdet_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2013_textdet_data_root,
 6 |     ann_file='textdet_train.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | icdar2013_textdet_test = dict(
11 |     type='OCRDataset',
12 |     data_root=icdar2013_textdet_data_root,
13 |     ann_file='textdet_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/icdar2017.py:
--------------------------------------------------------------------------------
 1 | icdar2017_textdet_data_root = 'data/mlt2017'
 2 | 
 3 | icdar2017_textdet_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2017_textdet_data_root,
 6 |     ann_file='textdet_test.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | icdar2017_textdet_test = dict(
11 |     type='OCRDataset',
12 |     data_root=icdar2017_textdet_data_root,
13 |     ann_file='textdet_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/synthtext.py:
--------------------------------------------------------------------------------
1 | synthtext_textdet_data_root = 'data/synthtext'
2 | 
3 | synthtext_textdet_train = dict(
4 |     type='OCRDataset',
5 |     data_root=synthtext_textdet_data_root,
6 |     ann_file='textdet_train.json',
7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
8 |     pipeline=None)
9 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
 1 | totaltext_textdet_data_root = 'data/totaltext'
 2 | 
 3 | totaltext_textdet_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=totaltext_textdet_data_root,
 6 |     ann_file='textdet_train.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | totaltext_textdet_test = dict(
11 |     type='OCRDataset',
12 |     data_root=totaltext_textdet_data_root,
13 |     ann_file='textdet_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/datasets/toy_data.py:
--------------------------------------------------------------------------------
 1 | toy_det_data_root = 'tests/data/det_toy_dataset'
 2 | 
 3 | toy_det_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=toy_det_data_root,
 6 |     ann_file='instances_training.json',
 7 |     data_prefix=dict(img_path='imgs/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 
11 | toy_det_test = dict(
12 |     type='OCRDataset',
13 |     data_root=toy_det_data_root,
14 |     ann_file='instances_test.json',
15 |     data_prefix=dict(img_path='imgs/'),
16 |     test_mode=True,
17 |     pipeline=None)
18 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/pretrain_runtime.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'default_runtime.py'
 2 | 
 3 | default_hooks = dict(
 4 |     logger=dict(type='LoggerHook', interval=1000),
 5 |     checkpoint=dict(
 6 |         type='CheckpointHook',
 7 |         interval=10000,
 8 |         by_epoch=False,
 9 |         max_keep_ckpts=1),
10 | )
11 | 
12 | # Evaluation
13 | val_evaluator = None
14 | test_evaluator = None
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_adam_600e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3))
 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=10)
 4 | val_cfg = dict(type='ValLoop')
 5 | test_cfg = dict(type='TestLoop')
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(type='PolyLR', power=0.9, end=60),
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_100k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(
 3 |     type='OptimWrapper',
 4 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
 5 | 
 6 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
 7 | # test_cfg = None
 8 | # val_cfg = None
 9 | val_cfg = dict(type='ValLoop')
10 | test_cfg = dict(type='TestLoop')
11 | # learning policy
12 | param_scheduler = [
13 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, by_epoch=False, end=100000),
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_1200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(
 3 |     type='OptimWrapper',
 4 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
 5 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1200, val_interval=100)
 6 | val_cfg = dict(type='ValLoop')
 7 | test_cfg = dict(type='TestLoop')
 8 | # learning policy
 9 | param_scheduler = [
10 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, end=1200),
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/_base_/schedules/schedule_sgd_base.py:
--------------------------------------------------------------------------------
 1 | # Note: This schedule config serves as a base config for other schedules.
 2 | # Users would have to at least fill in "max_epochs" and "val_interval"
 3 | # in order to use this config in their experiments.
 4 | 
 5 | # optimizer
 6 | optim_wrapper = dict(
 7 |     type='OptimWrapper',
 8 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=20)
10 | val_cfg = dict(type='ValLoop')
11 | test_cfg = dict(type='TestLoop')
12 | # learning policy
13 | param_scheduler = [
14 |     dict(type='ConstantLR', factor=1.0),
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='CLIPResNet',
 9 |     init_cfg=dict(
10 |         type='Pretrained',
11 |         checkpoint='https://download.openmmlab.com/'
12 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 | 
14 | _base_.train_dataloader.num_workers = 24
15 | _base_.optim_wrapper.optimizer.lr = 0.002
16 | 
17 | param_scheduler = [
18 |     dict(type='LinearLR', end=100, start_factor=0.001),
19 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnet/dbnet_resnet50_1200e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='mmdet.ResNet',
 9 |     depth=50,
10 |     num_stages=4,
11 |     out_indices=(0, 1, 2, 3),
12 |     frozen_stages=-1,
13 |     norm_cfg=dict(type='BN', requires_grad=True),
14 |     norm_eval=True,
15 |     style='pytorch',
16 |     init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
17 | 
18 | _base_.train_dataloader.num_workers = 24
19 | _base_.optim_wrapper.optimizer.lr = 0.002
20 | 
21 | param_scheduler = [
22 |     dict(type='LinearLR', end=100, start_factor=0.001),
23 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='CLIPResNet',
 9 |     init_cfg=dict(
10 |         type='Pretrained',
11 |         checkpoint='https://download.openmmlab.com/'
12 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 | 
14 | _base_.train_dataloader.num_workers = 24
15 | _base_.optim_wrapper.optimizer.lr = 0.002
16 | 
17 | param_scheduler = [
18 |     dict(type='LinearLR', end=200, start_factor=0.001),
19 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='mmdet.ResNet',
 9 |     depth=50,
10 |     num_stages=4,
11 |     out_indices=(0, 1, 2, 3),
12 |     frozen_stages=-1,
13 |     norm_cfg=dict(type='BN', requires_grad=True),
14 |     norm_eval=True,
15 |     style='pytorch',
16 |     init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
17 | 
18 | _base_.train_dataloader.num_workers = 24
19 | _base_.optim_wrapper.optimizer.lr = 0.003
20 | 
21 | param_scheduler = [
22 |     dict(type='LinearLR', end=200, start_factor=0.001),
23 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'drrg_resnet50_fpn-unet_1200e_ctw1500.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='CLIPResNet',
 9 |     init_cfg=dict(
10 |         type='Pretrained',
11 |         checkpoint='https://download.openmmlab.com/'
12 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 | 
14 | param_scheduler = [
15 |     dict(type='LinearLR', end=100, start_factor=0.001),
16 |     dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
17 | ]
18 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/_base_fcenet_resnet50-dcnv2_fpn.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_fcenet_resnet50_fpn.py',
 3 | ]
 4 | 
 5 | model = dict(
 6 |     backbone=dict(
 7 |         norm_eval=True,
 8 |         style='pytorch',
 9 |         dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
10 |         stage_with_dcn=(False, True, True, True)),
11 |     det_head=dict(
12 |         module_loss=dict(
13 |             type='FCEModuleLoss',
14 |             num_sample=50,
15 |             level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
16 |         postprocessor=dict(text_repr_type='poly', alpha=1.0, beta=2.0)))
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.backbone = dict(
 8 |     type='CLIPResNet',
 9 |     out_indices=(1, 2, 3),
10 |     init_cfg=dict(
11 |         type='Pretrained',
12 |         checkpoint='https://download.openmmlab.com/'
13 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 | 
15 | _base_.train_dataloader.num_workers = 24
16 | _base_.optim_wrapper.optimizer.lr = 0.0005
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'fcenet_resnet50_fpn_1500e_icdar2015.py',
 3 | ]
 4 | load_from = None
 5 | 
 6 | _base_.model.backbone = dict(
 7 |     type='CLIPResNet',
 8 |     out_indices=(1, 2, 3),
 9 |     init_cfg=dict(
10 |         type='Pretrained',
11 |         checkpoint='https://download.openmmlab.com/'
12 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
13 | 
14 | _base_.train_dataloader.batch_size = 16
15 | _base_.train_dataloader.num_workers = 24
16 | _base_.optim_wrapper.optimizer.lr = 0.0005
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'mask-rcnn_resnet50_fpn_160e_ctw1500.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.cfg.backbone = dict(
 8 |     _scope_='mmocr',
 9 |     type='CLIPResNet',
10 |     init_cfg=dict(
11 |         type='Pretrained',
12 |         checkpoint='https://download.openmmlab.com/'
13 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 | 
15 | _base_.optim_wrapper.optimizer.lr = 0.02
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
 3 | ]
 4 | 
 5 | load_from = None
 6 | 
 7 | _base_.model.cfg.backbone = dict(
 8 |     _scope_='mmocr',
 9 |     type='CLIPResNet',
10 |     init_cfg=dict(
11 |         type='Pretrained',
12 |         checkpoint='https://download.openmmlab.com/'
13 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
14 | 
15 | _base_.optim_wrapper.optimizer.lr = 0.02
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2017.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
 3 |     '../_base_/datasets/icdar2017.py',
 4 | ]
 5 | 
 6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train
 7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test
 8 | # use the same pipeline as icdar2015
 9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline
10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline
11 | 
12 | train_dataloader = dict(dataset=icdar2017_textdet_train)
13 | val_dataloader = dict(dataset=icdar2017_textdet_test)
14 | test_dataloader = val_dataloader
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/panet/_base_panet_resnet50_fpem-ffm.py:
--------------------------------------------------------------------------------
 1 | _base_ = '_base_panet_resnet18_fpem-ffm.py'
 2 | 
 3 | model = dict(
 4 |     type='PANet',
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='mmdet.ResNet',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=True),
13 |         norm_eval=True,
14 |         style='caffe',
15 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
16 |     ),
17 |     neck=dict(in_channels=[256, 512, 1024, 2048]),
18 |     det_head=dict(postprocessor=dict(text_repr_type='poly')))
19 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'psenet_resnet50_fpnf_600e_ctw1500.py',
 3 | ]
 4 | 
 5 | _base_.model.backbone = dict(
 6 |     type='CLIPResNet',
 7 |     init_cfg=dict(
 8 |         type='Pretrained',
 9 |         checkpoint='https://download.openmmlab.com/'
10 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'psenet_resnet50_fpnf_600e_icdar2015.py',
 3 | ]
 4 | 
 5 | _base_.model.backbone = dict(
 6 |     type='CLIPResNet',
 7 |     init_cfg=dict(
 8 |         type='Pretrained',
 9 |         checkpoint='https://download.openmmlab.com/'
10 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2017.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'psenet_resnet50_fpnf_600e_icdar2015.py',
 3 |     '../_base_/datasets/icdar2017.py',
 4 | ]
 5 | 
 6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train
 7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test
 8 | # use the same pipeline as icdar2015
 9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline
10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline
11 | 
12 | train_dataloader = dict(dataset=icdar2017_textdet_train)
13 | val_dataloader = dict(dataset=icdar2017_textdet_test)
14 | test_dataloader = val_dataloader
15 | 
16 | auto_scale_lr = dict(base_batch_size=64 * 4)
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'textsnake_resnet50_fpn-unet_1200e_ctw1500.py',
 3 | ]
 4 | 
 5 | _base_.model.backbone = dict(
 6 |     type='CLIPResNet',
 7 |     init_cfg=dict(
 8 |         type='Pretrained',
 9 |         checkpoint='https://download.openmmlab.com/'
10 |         'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
11 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/coco_text_v1.py:
--------------------------------------------------------------------------------
1 | cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1'
2 | 
3 | cocotextv1_textrecog_train = dict(
4 |     type='OCRDataset',
5 |     data_root=cocotextv1_textrecog_data_root,
6 |     ann_file='train_labels.json',
7 |     test_mode=False,
8 |     pipeline=None)
9 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/cute80.py:
--------------------------------------------------------------------------------
1 | cute80_textrecog_data_root = 'data/cute80'
2 | 
3 | cute80_textrecog_test = dict(
4 |     type='OCRDataset',
5 |     data_root=cute80_textrecog_data_root,
6 |     ann_file='textrecog_test.json',
7 |     test_mode=True,
8 |     pipeline=None)
9 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2011.py:
--------------------------------------------------------------------------------
1 | icdar2011_textrecog_data_root = 'data/rec/icdar_2011/'
2 | 
3 | icdar2011_textrecog_train = dict(
4 |     type='OCRDataset',
5 |     data_root=icdar2011_textrecog_data_root,
6 |     ann_file='train_labels.json',
7 |     test_mode=False,
8 |     pipeline=None)
9 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
 1 | icdar2013_textrecog_data_root = 'data/icdar2013'
 2 | 
 3 | icdar2013_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2013_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | icdar2013_textrecog_test = dict(
10 |     type='OCRDataset',
11 |     data_root=icdar2013_textrecog_data_root,
12 |     ann_file='textrecog_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 
16 | icdar2013_857_textrecog_test = dict(
17 |     type='OCRDataset',
18 |     data_root=icdar2013_textrecog_data_root,
19 |     ann_file='textrecog_test_857.json',
20 |     test_mode=True,
21 |     pipeline=None)
22 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
 1 | icdar2015_textrecog_data_root = 'data/icdar2015'
 2 | 
 3 | icdar2015_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2015_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | icdar2015_textrecog_test = dict(
10 |     type='OCRDataset',
11 |     data_root=icdar2015_textrecog_data_root,
12 |     ann_file='textrecog_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 
16 | icdar2015_1811_textrecog_test = dict(
17 |     type='OCRDataset',
18 |     data_root=icdar2015_textrecog_data_root,
19 |     ann_file='textrecog_test_1811.json',
20 |     test_mode=True,
21 |     pipeline=None)
22 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/iiit5k.py:
--------------------------------------------------------------------------------
 1 | iiit5k_textrecog_data_root = 'data/iiit5k'
 2 | 
 3 | iiit5k_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=iiit5k_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | iiit5k_textrecog_test = dict(
10 |     type='OCRDataset',
11 |     data_root=iiit5k_textrecog_data_root,
12 |     ann_file='textrecog_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/mjsynth.py:
--------------------------------------------------------------------------------
 1 | mjsynth_textrecog_data_root = 'data/mjsynth'
 2 | 
 3 | mjsynth_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=mjsynth_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | mjsynth_sub_textrecog_train = dict(
10 |     type='OCRDataset',
11 |     data_root=mjsynth_textrecog_data_root,
12 |     ann_file='subset_textrecog_train.json',
13 |     pipeline=None)
14 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/svt.py:
--------------------------------------------------------------------------------
 1 | svt_textrecog_data_root = 'data/svt'
 2 | 
 3 | svt_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=svt_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | svt_textrecog_test = dict(
10 |     type='OCRDataset',
11 |     data_root=svt_textrecog_data_root,
12 |     ann_file='textrecog_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/svtp.py:
--------------------------------------------------------------------------------
 1 | svtp_textrecog_data_root = 'data/svtp'
 2 | 
 3 | svtp_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=svtp_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | svtp_textrecog_test = dict(
10 |     type='OCRDataset',
11 |     data_root=svtp_textrecog_data_root,
12 |     ann_file='textrecog_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/synthtext.py:
--------------------------------------------------------------------------------
 1 | synthtext_textrecog_data_root = 'data/synthtext'
 2 | 
 3 | synthtext_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=synthtext_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     pipeline=None)
 8 | 
 9 | synthtext_sub_textrecog_train = dict(
10 |     type='OCRDataset',
11 |     data_root=synthtext_textrecog_data_root,
12 |     ann_file='subset_textrecog_train.json',
13 |     pipeline=None)
14 | 
15 | synthtext_an_textrecog_train = dict(
16 |     type='OCRDataset',
17 |     data_root=synthtext_textrecog_data_root,
18 |     ann_file='alphanumeric_textrecog_train.json',
19 |     pipeline=None)
20 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/synthtext_add.py:
--------------------------------------------------------------------------------
1 | synthtext_add_textrecog_data_root = 'data/rec/synthtext_add/'
2 | 
3 | synthtext_add_textrecog_train = dict(
4 |     type='OCRDataset',
5 |     data_root=synthtext_add_textrecog_data_root,
6 |     ann_file='train_labels.json',
7 |     test_mode=False,
8 |     pipeline=None)
9 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
 1 | totaltext_textrecog_data_root = 'data/totaltext/'
 2 | 
 3 | totaltext_textrecog_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=totaltext_textrecog_data_root,
 6 |     ann_file='textrecog_train.json',
 7 |     test_mode=False,
 8 |     pipeline=None)
 9 | 
10 | totaltext_textrecog_test = dict(
11 |     type='OCRDataset',
12 |     data_root=totaltext_textrecog_data_root,
13 |     ann_file='textrecog_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/datasets/toy_data.py:
--------------------------------------------------------------------------------
 1 | toy_data_root = 'tests/data/rec_toy_dataset/'
 2 | 
 3 | toy_rec_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=toy_data_root,
 6 |     data_prefix=dict(img_path='imgs/'),
 7 |     ann_file='labels.json',
 8 |     pipeline=None,
 9 |     test_mode=False)
10 | 
11 | toy_rec_test = dict(
12 |     type='OCRDataset',
13 |     data_root=toy_data_root,
14 |     data_prefix=dict(img_path='imgs/'),
15 |     ann_file='labels.json',
16 |     pipeline=None,
17 |     test_mode=True)
18 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adadelta_5e.py:
--------------------------------------------------------------------------------
 1 | optim_wrapper = dict(
 2 |     type='OptimWrapper', optimizer=dict(type='Adadelta', lr=1.0))
 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
 4 | val_cfg = dict(type='ValLoop')
 5 | test_cfg = dict(type='TestLoop')
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(type='ConstantLR', factor=1.0),
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adam_base.py:
--------------------------------------------------------------------------------
 1 | # Note: This schedule config serves as a base config for other schedules.
 2 | # Users would have to at least fill in "max_epochs" and "val_interval"
 3 | # in order to use this config in their experiments.
 4 | 
 5 | # optimizer
 6 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=3e-4))
 7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=1)
 8 | val_cfg = dict(type='ValLoop')
 9 | test_cfg = dict(type='TestLoop')
10 | # learning policy
11 | param_scheduler = [
12 |     dict(type='ConstantLR', factor=1.0),
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adam_step_5e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3))
 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
 4 | val_cfg = dict(type='ValLoop')
 5 | test_cfg = dict(type='TestLoop')
 6 | # learning policy
 7 | param_scheduler = [
 8 |     dict(type='MultiStepLR', milestones=[3, 4], end=5),
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/_base_/schedules/schedule_adamw_cos_6e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(
 3 |     type='OptimWrapper',
 4 |     optimizer=dict(
 5 |         type='AdamW',
 6 |         lr=4e-4,
 7 |         betas=(0.9, 0.999),
 8 |         eps=1e-08,
 9 |         weight_decay=0.05))
10 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=6, val_interval=1)
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 | 
14 | # learning policy
15 | param_scheduler = [
16 |     dict(
17 |         type='CosineAnnealingLR',
18 |         T_max=6,
19 |         eta_min=4e-6,
20 |         convert_to_iter_based=True)
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/abinet/_base_abinet.py:
--------------------------------------------------------------------------------
 1 | _base_ = '_base_abinet-vision.py'
 2 | 
 3 | model = dict(
 4 |     decoder=dict(
 5 |         d_model=512,
 6 |         num_iters=3,
 7 |         language_decoder=dict(
 8 |             type='ABILanguageDecoder',
 9 |             d_model=512,
10 |             n_head=8,
11 |             d_inner=2048,
12 |             n_layers=4,
13 |             dropout=0.1,
14 |             detach_tokens=True,
15 |             use_self_attn=False,
16 |         )), )
17 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     'nrtr_resnet31-1by16-1by8_6e_st_mj.py',
3 | ]
4 | 
5 | model = dict(backbone=dict(last_stage_pool=False))
6 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/sar/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     'sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py',
3 | ]
4 | 
5 | model = dict(decoder=dict(type='SequentialSARDecoder'))
6 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['satrn_shallow_5e_st_mj.py']
 2 | 
 3 | model = dict(
 4 |     backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=256),
 5 |     encoder=dict(
 6 |         type='SATRNEncoder',
 7 |         n_layers=6,
 8 |         n_head=8,
 9 |         d_k=256 // 8,
10 |         d_v=256 // 8,
11 |         d_model=256,
12 |         n_position=100,
13 |         d_inner=256 * 4,
14 |         dropout=0.1),
15 |     decoder=dict(
16 |         type='NRTRDecoder',
17 |         n_layers=6,
18 |         d_embedding=256,
19 |         n_head=8,
20 |         d_model=256,
21 |         d_inner=256 * 4,
22 |         d_k=256 // 8,
23 |         d_v=256 // 8))
24 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-base_20e_st_mj.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'svtr-tiny_20e_st_mj.py',
 3 | ]
 4 | 
 5 | model = dict(
 6 |     preprocessor=dict(output_image_size=(48, 160), ),
 7 |     encoder=dict(
 8 |         img_size=[48, 160],
 9 |         max_seq_len=40,
10 |         out_channels=256,
11 |         embed_dims=[128, 256, 384],
12 |         depth=[3, 6, 9],
13 |         num_heads=[4, 8, 12],
14 |         mixer_types=['Local'] * 8 + ['Global'] * 10),
15 |     decoder=dict(in_channels=256))
16 | 
17 | train_dataloader = dict(batch_size=256, )
18 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-large_20e_st_mj.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'svtr-tiny_20e_st_mj.py',
 3 | ]
 4 | 
 5 | model = dict(
 6 |     preprocessor=dict(output_image_size=(48, 160), ),
 7 |     encoder=dict(
 8 |         img_size=[48, 160],
 9 |         max_seq_len=40,
10 |         out_channels=384,
11 |         embed_dims=[192, 256, 512],
12 |         depth=[3, 9, 9],
13 |         num_heads=[6, 8, 16],
14 |         mixer_types=['Local'] * 10 + ['Global'] * 11),
15 |     decoder=dict(in_channels=384))
16 | 
17 | train_dataloader = dict(batch_size=128, )
18 | 
19 | optim_wrapper = dict(optimizer=dict(lr=2.5 / (10**4)))
20 | 


--------------------------------------------------------------------------------
/mmocr/configs/textrecog/svtr/svtr-small_20e_st_mj.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     'svtr-tiny_20e_st_mj.py',
 3 | ]
 4 | 
 5 | model = dict(
 6 |     encoder=dict(
 7 |         embed_dims=[96, 192, 256],
 8 |         depth=[3, 6, 6],
 9 |         num_heads=[3, 6, 8],
10 |         mixer_types=['Local'] * 8 + ['Global'] * 7))
11 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cocotextv2/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker'
5 | 
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cocotextv2/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 | 
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/ctw1500/textrecog.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['textdet.py']
 2 | 
 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
 5 | 
 6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
 7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
 8 | 
 9 | config_generator = dict(type='TextRecogConfigGenerator')
10 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/ctw1500/textspotting.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['textdet.py']
 2 | 
 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
 5 | 
 6 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
 7 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
 8 | 
 9 | _base_.test_preparer.obtainer.files.append(
10 |     dict(
11 |         url='https://download.openmmlab.com/mmocr/data/1.x/textspotting/'
12 |         'ctw1500/lexicons.zip',
13 |         save_name='ctw1500_lexicons.zip',
14 |         md5='168150ca45da161917bf35a20e45b8d6',
15 |         content=['lexicons'],
16 |         mapping=[['ctw1500_lexicons/lexicons', 'lexicons']]))
17 | 
18 | _base_.delete.append('ctw1500_lexicons')
19 | config_generator = dict(type='TextSpottingConfigGenerator')
20 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/cute80/sample_anno.md:
--------------------------------------------------------------------------------
 1 | **Text Recognition**
 2 | 
 3 | ```text
 4 | # timage/img_name text 1 text
 5 | 
 6 | timage/001.jpg RONALDO 1 RONALDO
 7 | timage/002.jpg 7 1 7
 8 | timage/003.jpg SEACREST 1 SEACREST
 9 | timage/004.jpg BEACH 1 BEACH
10 | ```
11 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/funsd/textrecog.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['textdet.py']
 2 | 
 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
 5 | 
 6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
 7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
 8 | 
 9 | config_generator = dict(type='TextRecogConfigGenerator')
10 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/funsd/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
3 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
4 | 
5 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
6 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
7 | 
8 | config_generator = dict(type='TextSpottingConfigGenerator')
9 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/icdar2013/sample_anno.md:
--------------------------------------------------------------------------------
 1 | **Text Detection**
 2 | 
 3 | ```text
 4 | # train split
 5 | # x1 y1 x2 y2 "transcript"
 6 | 
 7 | 158 128 411 181 "Footpath"
 8 | 443 128 501 169 "To"
 9 | 64 200 363 243 "Colchester"
10 | 
11 | # test split
12 | # x1, y1, x2, y2, "transcript"
13 | 
14 | 38, 43, 920, 215, "Tiredness"
15 | 275, 264, 665, 450, "kills"
16 | 0, 699, 77, 830, "A"
17 | ```
18 | 
19 | **Text Recognition**
20 | 
21 | ```text
22 | # img_name, "text"
23 | 
24 | word_1.png, "PROPER"
25 | word_2.png, "FOOD"
26 | word_3.png, "PRONTO"
27 | ```
28 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/icdar2015/sample_anno.md:
--------------------------------------------------------------------------------
 1 | **Text Detection**
 2 | 
 3 | ```text
 4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans
 5 | 
 6 | 377,117,463,117,465,130,378,130,Genaxis Theatre
 7 | 493,115,519,115,519,131,493,131,[06]
 8 | 374,155,409,155,409,170,374,170,###
 9 | ```
10 | 
11 | **Text Recognition**
12 | 
13 | ```text
14 | # img_name, "text"
15 | 
16 | word_1.png, "Genaxis Theatre"
17 | word_2.png, "[06]"
18 | word_3.png, "62-03"
19 | ```
20 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/iiit5k/metafile.yml:
--------------------------------------------------------------------------------
 1 | Name: 'IIIT5K'
 2 | Paper:
 3 |   Title: Scene Text Recognition using Higher Order Language Priors
 4 |   URL: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/Home/mishraBMVC12.pdf
 5 |   Venue: BMVC
 6 |   Year: '2012'
 7 |   BibTeX: '@InProceedings{MishraBMVC12,
 8 |   author    = "Mishra, A. and Alahari, K. and Jawahar, C.~V.",
 9 |   title     = "Scene Text Recognition using Higher Order Language Priors",
10 |   booktitle = "BMVC",
11 |   year      = "2012"}'
12 | Data:
13 |   Website: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html
14 |   Language:
15 |     - English
16 |   Scene:
17 |     - Natural Scene
18 |   Granularity:
19 |     - Word
20 |   Tasks:
21 |     - textrecog
22 |   License:
23 |     Type: N/A
24 |     Link: N/A
25 |   Format: .txt
26 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/iiit5k/sample_anno.md:
--------------------------------------------------------------------------------
 1 | **Text Recognition**
 2 | 
 3 | ```text
 4 | # img_name, "text"
 5 | 
 6 | train/1009_2.png You
 7 | train/1017_1.png Rescue
 8 | train/1017_2.png mission
 9 | ```
10 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/mjsynth/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 | 
3 | ```txt
4 | ./3000/7/182_slinking_71711.jpg 71711
5 | ./3000/7/182_REMODELERS_64541.jpg 64541
6 | ```
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/sample_anno.md:
--------------------------------------------------------------------------------
 1 | **Text Detection, Text Recognition and Text Spotting**
 2 | 
 3 | ```text
 4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans
 5 | 
 6 | 72,25,326,25,326,64,72,64,TAN WOON YANN
 7 | 50,82,440,82,440,121,50,121,BOOK TA .K(TAMAN DAYA) SDN BND
 8 | 205,121,285,121,285,139,205,139,789417-W
 9 | ```
10 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
7 | 
8 | config_generator = dict(type='TextRecogConfigGenerator')
9 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/sroie/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
6 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
7 | 
8 | config_generator = dict(type='TextSpottingConfigGenerator')
9 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/metafile.yml:
--------------------------------------------------------------------------------
 1 | Name: 'Street View Text Dataset (SVT)'
 2 | Paper:
 3 |   Title: Word Spotting in the Wild
 4 |   URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf
 5 |   Venue: ECCV
 6 |   Year: '2010'
 7 |   BibTeX: '@inproceedings{wang2010word,
 8 |   title={Word spotting in the wild},
 9 |   author={Wang, Kai and Belongie, Serge},
10 |   booktitle={European conference on computer vision},
11 |   pages={591--604},
12 |   year={2010},
13 |   organization={Springer}}'
14 | Data:
15 |   Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset
16 |   Language:
17 |     - English
18 |   Scene:
19 |     - Natural Scene
20 |   Granularity:
21 |     - Word
22 |   Tasks:
23 |     - textdet
24 |     - textrecog
25 |     - textspotting
26 |   License:
27 |     Type: N/A
28 |     Link: N/A
29 |   Format: .xml
30 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
5 | 
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svt/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 | 
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/svtp/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Recognition**
2 | 
3 | ```txt
4 | 13_15_0_par.jpg WYNDHAM
5 | 13_15_1_par.jpg HOTEL
6 | 12_16_0_par.jpg UNITED
7 | ```
8 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/synthtext/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
5 | 
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/textocr/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker'
5 | 
6 | config_generator = dict(type='TextRecogConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/textocr/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.val_preparer.packer.type = 'TextSpottingPacker'
5 | 
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/totaltext/sample_anno.md:
--------------------------------------------------------------------------------
1 | **Text Detection/Spotting**
2 | 
3 | ```text
4 | x: [[259 313 389 427 354 302]], y: [[542 462 417 459 507 582]], ornt: [u'c'], transcriptions: [u'PAUL']
5 | x: [[400 478 494 436]], y: [[398 380 448 465]], ornt: [u'#'], transcriptions: [u'#']
6 | ```
7 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/totaltext/textrecog.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker'
6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker'
7 | 
8 | config_generator = dict(type='TextRecogConfigGenerator')
9 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textdet.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['kie.py']
 2 | 
 3 | _base_.train_preparer.update(
 4 |     dict(
 5 |         parser=dict(type='WildreceiptTextDetAnnParser'),
 6 |         packer=dict(type='TextDetPacker'),
 7 |         dumper=dict(type='JsonDumper')))
 8 | _base_.test_preparer.update(
 9 |     dict(
10 |         parser=dict(type='WildreceiptTextDetAnnParser'),
11 |         packer=dict(type='TextDetPacker'),
12 |         dumper=dict(type='JsonDumper')))
13 | 
14 | config_generator = dict(type='TextDetConfigGenerator')
15 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textrecog.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['textdet.py']
 2 | 
 3 | _base_.train_preparer.update(
 4 |     dict(
 5 |         parser=dict(type='WildreceiptTextDetAnnParser'),
 6 |         packer=dict(type='TextRecogCropPacker'),
 7 |         dumper=dict(type='JsonDumper')))
 8 | 
 9 | _base_.test_preparer.update(
10 |     dict(
11 |         parser=dict(type='WildreceiptTextDetAnnParser'),
12 |         packer=dict(type='TextRecogCropPacker'),
13 |         dumper=dict(type='JsonDumper')))
14 | 
15 | config_generator = dict(type='TextRecogConfigGenerator')
16 | 


--------------------------------------------------------------------------------
/mmocr/dataset_zoo/wildreceipt/textspotting.py:
--------------------------------------------------------------------------------
1 | _base_ = ['textdet.py']
2 | 
3 | _base_.train_preparer.packer.type = 'TextSpottingPacker'
4 | _base_.test_preparer.packer.type = 'TextSpottingPacker'
5 | 
6 | config_generator = dict(type='TextSpottingConfigGenerator')
7 | 


--------------------------------------------------------------------------------
/mmocr/demo/demo_densetext_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_densetext_det.jpg


--------------------------------------------------------------------------------
/mmocr/demo/demo_kie.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_kie.jpeg


--------------------------------------------------------------------------------
/mmocr/demo/demo_text_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_det.jpg


--------------------------------------------------------------------------------
/mmocr/demo/demo_text_ocr.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_ocr.jpg


--------------------------------------------------------------------------------
/mmocr/demo/demo_text_recog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_recog.jpg


--------------------------------------------------------------------------------
/mmocr/demo/resources/demo_kie_pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/demo_kie_pred.png


--------------------------------------------------------------------------------
/mmocr/demo/resources/det_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/det_vis.png


--------------------------------------------------------------------------------
/mmocr/demo/resources/kie_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/kie_vis.png


--------------------------------------------------------------------------------
/mmocr/demo/resources/log_analysis_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/log_analysis_demo.png


--------------------------------------------------------------------------------
/mmocr/demo/resources/rec_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/rec_vis.png


--------------------------------------------------------------------------------
/mmocr/dicts/english_digits_symbols.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 | A
38 | B
39 | C
40 | D
41 | E
42 | F
43 | G
44 | H
45 | I
46 | J
47 | K
48 | L
49 | M
50 | N
51 | O
52 | P
53 | Q
54 | R
55 | S
56 | T
57 | U
58 | V
59 | W
60 | X
61 | Y
62 | Z
63 | !
64 | "
65 | #
66 | $
67 | %
68 | &
69 | '
70 | (
71 | )
72 | *
73 | +
74 | ,
75 | -
76 | .
77 | /
78 | :
79 | ;
80 | <
81 | =
82 | >
83 | ?
84 | @
85 | [
86 | \
87 | ]
88 | _
89 | `
90 | ~


--------------------------------------------------------------------------------
/mmocr/dicts/english_digits_symbols_space.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 | A
38 | B
39 | C
40 | D
41 | E
42 | F
43 | G
44 | H
45 | I
46 | J
47 | K
48 | L
49 | M
50 | N
51 | O
52 | P
53 | Q
54 | R
55 | S
56 | T
57 | U
58 | V
59 | W
60 | X
61 | Y
62 | Z
63 | !
64 | "
65 | #
66 | $
67 | %
68 | &
69 | '
70 | (
71 | )
72 | *
73 | +
74 | ,
75 | -
76 | .
77 | /
78 | :
79 | ;
80 | <
81 | =
82 | >
83 | ?
84 | @
85 | [
86 | \
87 | ]
88 | _
89 | `
90 | ~
91 |  


--------------------------------------------------------------------------------
/mmocr/dicts/lower_english_digits.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z


--------------------------------------------------------------------------------
/mmocr/dicts/lower_english_digits_space.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | a
12 | b
13 | c
14 | d
15 | e
16 | f
17 | g
18 | h
19 | i
20 | j
21 | k
22 | l
23 | m
24 | n
25 | o
26 | p
27 | q
28 | r
29 | s
30 | t
31 | u
32 | v
33 | w
34 | x
35 | y
36 | z
37 |  


--------------------------------------------------------------------------------
/mmocr/dicts/sdmgr_dict.txt:
--------------------------------------------------------------------------------
 1 | /
 2 | \
 3 | .
 4 | $
 5 | £
 6 | €
 7 | ¥
 8 | :
 9 | -
10 | ,
11 | *
12 | #
13 | (
14 | )
15 | %
16 | @
17 | !
18 | '
19 | &
20 | =
21 | >
22 | +
23 | "
24 | ×
25 | ?
26 | <
27 | [
28 | ]
29 | _
30 | 0
31 | 1
32 | 2
33 | 3
34 | 4
35 | 5
36 | 6
37 | 7
38 | 8
39 | 9
40 | a
41 | b
42 | c
43 | d
44 | e
45 | f
46 | g
47 | h
48 | i
49 | j
50 | k
51 | l
52 | m
53 | n
54 | o
55 | p
56 | q
57 | r
58 | s
59 | t
60 | u
61 | v
62 | w
63 | x
64 | y
65 | z
66 | A
67 | B
68 | C
69 | D
70 | E
71 | F
72 | G
73 | H
74 | I
75 | J
76 | K
77 | L
78 | M
79 | N
80 | O
81 | P
82 | Q
83 | R
84 | S
85 | T
86 | U
87 | V
88 | W
89 | X
90 | Y
91 | Z


--------------------------------------------------------------------------------
/mmocr/docker/serve/config.properties:
--------------------------------------------------------------------------------
1 | inference_address=http://0.0.0.0:8080
2 | management_address=http://0.0.0.0:8081
3 | metrics_address=http://0.0.0.0:8082
4 | model_store=/home/model-server/model-store
5 | load_models=all
6 | 


--------------------------------------------------------------------------------
/mmocr/docker/serve/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [[ "$1" = "serve" ]]; then
 5 |     shift 1
 6 |     torchserve --start --ts-config /home/model-server/config.properties
 7 | else
 8 |     eval "$@"
 9 | fi
10 | 
11 | # prevent docker exit
12 | tail -f /dev/null
13 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 |     background-image: url("../images/mmocr.png");
3 |     background-size: 110px 40px;
4 |     height: 40px;
5 |     width: 110px;
6 | }
7 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/_static/images/mmocr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/en/_static/images/mmocr.png


--------------------------------------------------------------------------------
/mmocr/docs/en/_static/js/collapsed.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = ['Migration Guides', 'API Reference']
2 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline}}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 | 
11 | 
12 | ..
13 |   autogenerated from source/_templates/classtemplate.rst
14 |   note it does not have :inherited-members:
15 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/api/apis.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.apis
 5 | ===================================
 6 | 
 7 | .. contents:: mmocr.apis
 8 |    :depth: 2
 9 |    :local:
10 |    :backlinks: top
11 | 
12 | .. currentmodule:: mmocr.apis.inferencers
13 | 
14 | Inferencers
15 | ---------------------------------------------
16 | 
17 | .. autosummary::
18 |    :toctree: generated
19 |    :nosignatures:
20 |    :template: classtemplate.rst
21 | 
22 |    MMOCRInferencer
23 |    TextDetInferencer
24 |    TextRecInferencer
25 |    TextSpotInferencer
26 |    KIEInferencer
27 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/api/engine.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.engine
 5 | ===================================
 6 | 
 7 | .. contents:: mmocr.engine
 8 |    :depth: 2
 9 |    :local:
10 |    :backlinks: top
11 | 
12 | .. currentmodule:: mmocr.engine.hooks
13 | 
14 | Hooks
15 | ---------------------------------------------
16 | 
17 | .. autosummary::
18 |    :toctree: generated
19 |    :nosignatures:
20 |    :template: classtemplate.rst
21 | 
22 |    VisualizationHook
23 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/api/structures.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.structures
 5 | ===================================
 6 | 
 7 | .. currentmodule:: mmocr.structures
 8 | .. autosummary::
 9 |    :toctree: generated
10 |    :nosignatures:
11 |    :template: classtemplate.rst
12 | 
13 |    TextDetDataSample
14 |    TextRecogDataSample
15 |    KIEDataSample
16 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/api/visualization.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.visualization
 5 | ===================================
 6 | 
 7 | .. currentmodule:: mmocr.visualization
 8 | 
 9 | .. autosummary::
10 |    :toctree: generated
11 |    :nosignatures:
12 |    :template: classtemplate.rst
13 | 
14 |    BaseLocalVisualizer
15 |    TextDetLocalVisualizer
16 |    TextRecogLocalVisualizer
17 |    TextSpottingLocalVisualizer
18 |    KIELocalVisualizer
19 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/convention.md:
--------------------------------------------------------------------------------
1 | # Convention\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/data_flow.md:
--------------------------------------------------------------------------------
1 | # Data Flow\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/engine.md:
--------------------------------------------------------------------------------
1 | # Engine\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/models.md:
--------------------------------------------------------------------------------
1 | # Models\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/overview.md:
--------------------------------------------------------------------------------
1 | # Overview & Features\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/basic_concepts/visualizers.md:
--------------------------------------------------------------------------------
1 | # Visualizers\[coming soon\]
2 | 
3 | Coming Soon!
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/docutils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/migration/model.md:
--------------------------------------------------------------------------------
1 | # Pretrained Model Migration
2 | 
3 | Due to the extensive refactoring and fixing of the model structure in the new version, MMOCR 1.x does not support load weights trained by the old version. We have updated the pre-training weights and logs of all models on our website.
4 | 
5 | In addition, we are working on the development of a weight migration tool for text detection tasks and plan to release it in the near future. Since the text recognition and key information extraction models are too much modified and the migration is lossy, we do not plan to support them accordingly for the time being. If you have specific requirements, please feel free to raise an [Issue](https://github.com/open-mmlab/mmocr/issues).
6 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/mmocr/docs/en/switch_language.md:
--------------------------------------------------------------------------------
1 | ## <a href='https://mmocr.readthedocs.io/en/dev-1.x/'>English</a>
2 | 
3 | ## <a href='https://mmocr.readthedocs.io/zh_CN/dev-1.x/'>简体中文</a>
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 |     background-image: url("../images/mmocr.png");
3 |     background-size: 110px 40px;
4 |     height: 40px;
5 |     width: 110px;
6 | }
7 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/images/mmocr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/zh_cn/_static/images/mmocr.png


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_static/js/collapsed.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = ['MMOCR 0.x 迁移指南', 'API 文档']
2 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline}}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 | 
11 | 
12 | ..
13 |   autogenerated from source/_templates/classtemplate.rst
14 |   note it does not have :inherited-members:
15 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/apis.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.apis
 5 | ===================================
 6 | 
 7 | .. contents:: mmocr.apis
 8 |    :depth: 2
 9 |    :local:
10 |    :backlinks: top
11 | 
12 | .. currentmodule:: mmocr.apis.inferencers
13 | 
14 | Inferencers
15 | ---------------------------------------------
16 | 
17 | .. autosummary::
18 |    :toctree: generated
19 |    :nosignatures:
20 |    :template: classtemplate.rst
21 | 
22 |    MMOCRInferencer
23 |    TextDetInferencer
24 |    TextRecInferencer
25 |    TextSpotInferencer
26 |    KIEInferencer
27 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/engine.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.engine
 5 | ===================================
 6 | 
 7 | .. contents:: mmocr.engine
 8 |    :depth: 2
 9 |    :local:
10 |    :backlinks: top
11 | 
12 | .. currentmodule:: mmocr.engine.hooks
13 | 
14 | Hooks
15 | ---------------------------------------------
16 | 
17 | .. autosummary::
18 |    :toctree: generated
19 |    :nosignatures:
20 |    :template: classtemplate.rst
21 | 
22 |    VisualizationHook
23 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/structures.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.structures
 5 | ===================================
 6 | 
 7 | .. currentmodule:: mmocr.structures
 8 | .. autosummary::
 9 |    :toctree: generated
10 |    :nosignatures:
11 |    :template: classtemplate.rst
12 | 
13 |    TextDetDataSample
14 |    TextRecogDataSample
15 |    KIEDataSample
16 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/api/visualization.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | mmocr.visualization
 5 | ===================================
 6 | 
 7 | .. currentmodule:: mmocr.visualization
 8 | 
 9 | .. autosummary::
10 |    :toctree: generated
11 |    :nosignatures:
12 |    :template: classtemplate.rst
13 | 
14 |    BaseLocalVisualizer
15 |    TextDetLocalVisualizer
16 |    TextRecogLocalVisualizer
17 |    TextSpottingLocalVisualizer
18 |    KIELocalVisualizer
19 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/convention.md:
--------------------------------------------------------------------------------
1 | # 开发默认约定\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/data_flow.md:
--------------------------------------------------------------------------------
1 | # 数据流\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/engine.md:
--------------------------------------------------------------------------------
1 | # 引擎\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/models.md:
--------------------------------------------------------------------------------
1 | # 模型\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/overview.md:
--------------------------------------------------------------------------------
1 | # 设计理念与特性\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/basic_concepts/visualizers.md:
--------------------------------------------------------------------------------
1 | # 可视化组件\[待更新\]
2 | 
3 | 待更新
4 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/cp_origin_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation
 4 | 
 5 | for filename in $(find ../en/ -name '*.md' -printf "%P\n");
 6 | do
 7 |     mkdir -p $(dirname $filename)
 8 |     cp -n ../en/$filename ./$filename
 9 | done
10 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/docutils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/migration/model.md:
--------------------------------------------------------------------------------
1 | # 预训练模型迁移指南
2 | 
3 | 由于在新版本中我们对模型的结构进行了大量的重构和修复，MMOCR 1.x 并不能直接读入旧版的预训练权重。我们在网站上同步更新了所有模型的预训练权重和log，供有需要的用户使用。
4 | 
5 | 此外，我们正在进行针对文本检测任务的权重迁移工具的开发，并计划于近期版本内发布。由于文本识别和关键信息提取模型改动过大，且迁移是有损的，我们暂时不计划作相应支持。如果您有具体的需求，欢迎通过 [Issue](https://github.com/open-mmlab/mmocr/issues) 向我们提问。
6 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/notes/branches.md:
--------------------------------------------------------------------------------
 1 | # 分支
 2 | 
 3 | 本文档旨在全面解释 MMOCR 中每个分支的目的和功能。
 4 | 
 5 | ## 分支概述
 6 | 
 7 | ### 1. `main`
 8 | 
 9 | `main` 分支是 MMOCR 项目的默认分支。它包含了 MMOCR 的最新稳定版本，目前包含了 MMOCR 1.x（例如 v1.0.0）的代码。`main` 分支确保用户能够使用最新和最可靠的软件版本。
10 | 
11 | ### 2. `dev-1.x`
12 | 
13 | `dev-1.x` 分支用于开发 MMOCR 的下一个版本。此分支将在发版前进行依赖性测试，通过的提交将会合成到新版本中，并被发布到 `main` 分支。通过设置单独的开发分支，项目可以在不影响 `main` 分支稳定性的情况下继续发展。**所有 PR 应合并到 `dev-1.x` 分支。**
14 | 
15 | ### 3. `0.x`
16 | 
17 | `0.x` 分支用作 MMOCR 0.x（例如 v0.6.3）的存档。此分支将不再积极接受更新或改进，但它仍可作为历史参考，或供尚未升级到 MMOCR 1.x 的用户使用。
18 | 
19 | ### 4. `1.x`
20 | 
21 | 它是 `main` 分支的别名，旨在实现从兼容性时期平稳过渡。它将在 2023 年的年中删除。
22 | 
23 | ```{note}
24 | 分支映射在 2023.04.06 发生了变化。有关旧分支映射和迁移指南，请参阅[分支迁移指南](../migration/branches.md)。
25 | ```
26 | 


--------------------------------------------------------------------------------
/mmocr/docs/zh_cn/switch_language.md:
--------------------------------------------------------------------------------
1 | ## <a href='https://mmocr.readthedocs.io/en/dev-1.x/'>English</a>
2 | 
3 | ## <a href='https://mmocr.readthedocs.io/zh_CN/dev-1.x/'>简体中文</a>
4 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/apis/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .inferencers import *  # NOQA
3 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/apis/inferencers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .kie_inferencer import KIEInferencer
 3 | from .mmocr_inferencer import MMOCRInferencer
 4 | from .textdet_inferencer import TextDetInferencer
 5 | from .textrec_inferencer import TextRecInferencer
 6 | from .textspot_inferencer import TextSpotInferencer
 7 | 
 8 | __all__ = [
 9 |     'TextDetInferencer', 'TextRecInferencer', 'KIEInferencer',
10 |     'MMOCRInferencer', 'TextSpotInferencer'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dataset_wrapper import ConcatDataset
 3 | from .icdar_dataset import IcdarDataset
 4 | from .ocr_dataset import OCRDataset
 5 | from .recog_lmdb_dataset import RecogLMDBDataset
 6 | from .recog_text_dataset import RecogTextDataset
 7 | from .samplers import *  # NOQA
 8 | from .transforms import *  # NOQA
 9 | from .wildreceipt_dataset import WildReceiptDataset
10 | 
11 | __all__ = [
12 |     'IcdarDataset', 'OCRDataset', 'RecogLMDBDataset', 'RecogTextDataset',
13 |     'WildReceiptDataset', 'ConcatDataset'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .config_generators import *  # noqa
 3 | from .data_preparer import DatasetPreparer
 4 | from .dumpers import *  # noqa
 5 | from .gatherers import *  # noqa
 6 | from .obtainers import *  # noqa
 7 | from .packers import *  # noqa
 8 | from .parsers import *  # noqa
 9 | 
10 | __all__ = ['DatasetPreparer']
11 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/config_generators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import BaseDatasetConfigGenerator
 3 | from .textdet_config_generator import TextDetConfigGenerator
 4 | from .textrecog_config_generator import TextRecogConfigGenerator
 5 | from .textspotting_config_generator import TextSpottingConfigGenerator
 6 | 
 7 | __all__ = [
 8 |     'BaseDatasetConfigGenerator', 'TextDetConfigGenerator',
 9 |     'TextRecogConfigGenerator', 'TextSpottingConfigGenerator'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import BaseDumper
 3 | from .json_dumper import JsonDumper
 4 | from .lmdb_dumper import TextRecogLMDBDumper
 5 | from .wild_receipt_openset_dumper import WildreceiptOpensetDumper
 6 | 
 7 | __all__ = [
 8 |     'BaseDumper', 'JsonDumper', 'WildreceiptOpensetDumper',
 9 |     'TextRecogLMDBDumper'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/json_dumper.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | from typing import Dict
 4 | 
 5 | import mmengine
 6 | 
 7 | from mmocr.registry import DATA_DUMPERS
 8 | from .base import BaseDumper
 9 | 
10 | 
11 | @DATA_DUMPERS.register_module()
12 | class JsonDumper(BaseDumper):
13 |     """Dumper for json file."""
14 | 
15 |     def dump(self, data: Dict) -> None:
16 |         """Dump data to json file.
17 | 
18 |         Args:
19 |             data (Dict): Data to be dumped.
20 |         """
21 | 
22 |         filename = f'{self.task}_{self.split}.json'
23 |         dst_file = osp.join(self.data_root, filename)
24 |         mmengine.dump(data, dst_file)
25 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/dumpers/wild_receipt_openset_dumper.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | from typing import List
 4 | 
 5 | from mmocr.registry import DATA_DUMPERS
 6 | from mmocr.utils import list_to_file
 7 | from .base import BaseDumper
 8 | 
 9 | 
10 | @DATA_DUMPERS.register_module()
11 | class WildreceiptOpensetDumper(BaseDumper):
12 | 
13 |     def dump(self, data: List):
14 |         """Dump data to txt file.
15 | 
16 |         Args:
17 |             data (List): Data to be dumped.
18 |         """
19 | 
20 |         filename = f'openset_{self.split}.txt'
21 |         dst_file = osp.join(self.data_root, filename)
22 |         list_to_file(dst_file, data)
23 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/gatherers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 
3 | from .base import BaseGatherer
4 | from .mono_gatherer import MonoGatherer
5 | from .naf_gatherer import NAFGatherer
6 | from .pair_gatherer import PairGatherer
7 | 
8 | __all__ = ['BaseGatherer', 'MonoGatherer', 'PairGatherer', 'NAFGatherer']
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/obtainers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .naive_data_obtainer import NaiveDataObtainer
3 | 
4 | __all__ = ['NaiveDataObtainer']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/preparers/packers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import BasePacker
 3 | from .textdet_packer import TextDetPacker
 4 | from .textrecog_packer import TextRecogCropPacker, TextRecogPacker
 5 | from .textspotting_packer import TextSpottingPacker
 6 | from .wildreceipt_packer import WildReceiptPacker
 7 | 
 8 | __all__ = [
 9 |     'BasePacker', 'TextDetPacker', 'TextRecogPacker', 'TextRecogCropPacker',
10 |     'TextSpottingPacker', 'WildReceiptPacker'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .batch_aug import BatchAugSampler
3 | 
4 | __all__ = ['BatchAugSampler']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .hooks import *  # NOQA
3 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/engine/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .visualization_hook import VisualizationHook
3 | 
4 | __all__ = ['VisualizationHook']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .evaluator import *  # NOQA
3 | from .metrics import *  # NOQA
4 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .multi_datasets_evaluator import MultiDatasetsEvaluator
3 | 
4 | __all__ = ['MultiDatasetsEvaluator']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/functional/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .hmean import compute_hmean
3 | 
4 | __all__ = ['compute_hmean']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/evaluation/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .f_metric import F1Metric
 3 | from .hmean_iou_metric import HmeanIOUMetric
 4 | from .recog_metric import CharMetric, OneMinusNEDMetric, WordMetric
 5 | 
 6 | __all__ = [
 7 |     'WordMetric', 'CharMetric', 'OneMinusNEDMetric', 'HmeanIOUMetric',
 8 |     'F1Metric'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import *  # NOQA
3 | from .kie import *  # NOQA
4 | from .textdet import *  # NOQA
5 | from .textrecog import *  # NOQA
6 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .backbones import *  # NOQA
3 | from .dictionary import *  # NOQA
4 | from .layers import *  # NOQA
5 | from .losses import *  # NOQA
6 | from .modules import *  # NOQA
7 | from .plugins import *  # NOQA
8 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .clip_resnet import CLIPResNet
3 | from .unet import UNet
4 | 
5 | __all__ = ['UNet', 'CLIPResNet']
6 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/dictionary/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 
3 | from .dictionary import Dictionary
4 | 
5 | __all__ = ['Dictionary']
6 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .transformer_layers import TFDecoderLayer, TFEncoderLayer
3 | 
4 | __all__ = ['TFEncoderLayer', 'TFDecoderLayer']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .bce_loss import (MaskedBalancedBCELoss, MaskedBalancedBCEWithLogitsLoss,
 3 |                        MaskedBCELoss, MaskedBCEWithLogitsLoss)
 4 | from .ce_loss import CrossEntropyLoss
 5 | from .dice_loss import MaskedDiceLoss, MaskedSquareDiceLoss
 6 | from .l1_loss import MaskedSmoothL1Loss, SmoothL1Loss
 7 | 
 8 | __all__ = [
 9 |     'MaskedBalancedBCEWithLogitsLoss', 'MaskedDiceLoss', 'MaskedSmoothL1Loss',
10 |     'MaskedSquareDiceLoss', 'MaskedBCEWithLogitsLoss', 'SmoothL1Loss',
11 |     'CrossEntropyLoss', 'MaskedBalancedBCELoss', 'MaskedBCELoss'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/losses/ce_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | 
 4 | from mmocr.registry import MODELS
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class CrossEntropyLoss(nn.CrossEntropyLoss):
 9 |     """Cross entropy loss."""
10 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .transformer_module import (MultiHeadAttention, PositionalEncoding,
 3 |                                  PositionwiseFeedForward,
 4 |                                  ScaledDotProductAttention)
 5 | 
 6 | __all__ = [
 7 |     'ScaledDotProductAttention', 'MultiHeadAttention',
 8 |     'PositionwiseFeedForward', 'PositionalEncoding'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/common/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import AvgPool2d
3 | 
4 | __all__ = ['AvgPool2d']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .extractors import *  # NOQA
3 | from .heads import *  # NOQA
4 | from .module_losses import *  # NOQA
5 | from .postprocessors import *  # NOQA
6 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/extractors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr import SDMGR
3 | 
4 | __all__ = ['SDMGR']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_head import SDMGRHead
3 | 
4 | __all__ = ['SDMGRHead']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/module_losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_module_loss import SDMGRModuleLoss
3 | 
4 | __all__ = ['SDMGRModuleLoss']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/kie/postprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sdmgr_postprocessor import SDMGRPostProcessor
3 | 
4 | __all__ = ['SDMGRPostProcessor']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessors import *  # NOQA
3 | from .detectors import *  # NOQA
4 | from .heads import *  # NOQA
5 | from .module_losses import *  # NOQA
6 | from .necks import *  # NOQA
7 | from .postprocessors import *  # NOQA
8 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessor import TextDetDataPreprocessor
3 | 
4 | __all__ = ['TextDetDataPreprocessor']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dbnet import DBNet
 3 | from .drrg import DRRG
 4 | from .fcenet import FCENet
 5 | from .mmdet_wrapper import MMDetWrapper
 6 | from .panet import PANet
 7 | from .psenet import PSENet
 8 | from .single_stage_text_detector import SingleStageTextDetector
 9 | from .textsnake import TextSnake
10 | 
11 | __all__ = [
12 |     'SingleStageTextDetector', 'DBNet', 'PANet', 'PSENet', 'TextSnake',
13 |     'FCENet', 'DRRG', 'MMDetWrapper'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/dbnet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class DBNet(SingleStageTextDetector):
 8 |     """The class for implementing DBNet text detector: Real-time Scene Text
 9 |     Detection with Differentiable Binarization.
10 | 
11 |     [https://arxiv.org/abs/1911.08947].
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/drrg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class DRRG(SingleStageTextDetector):
 8 |     """The class for implementing DRRG text detector. Deep Relational Reasoning
 9 |     Graph Network for Arbitrary Shape Text Detection.
10 | 
11 |     [https://arxiv.org/abs/2003.07493]
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/fcenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class FCENet(SingleStageTextDetector):
 8 |     """The class for implementing FCENet text detector
 9 |     FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text
10 |         Detection
11 | 
12 |     [https://arxiv.org/abs/2104.10442]
13 |     """
14 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/panet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class PANet(SingleStageTextDetector):
 8 |     """The class for implementing PANet text detector:
 9 | 
10 |     Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel
11 |     Aggregation Network [https://arxiv.org/abs/1908.05900].
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/psenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class PSENet(SingleStageTextDetector):
 8 |     """The class for implementing PSENet text detector: Shape Robust Text
 9 |     Detection with Progressive Scale Expansion Network.
10 | 
11 |     [https://arxiv.org/abs/1806.02559].
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/detectors/textsnake.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .single_stage_text_detector import SingleStageTextDetector
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class TextSnake(SingleStageTextDetector):
 8 |     """The class for implementing TextSnake text detector: TextSnake: A
 9 |     Flexible Representation for Detecting Text of Arbitrary Shapes.
10 | 
11 |     [https://arxiv.org/abs/1807.01544]
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import BaseTextDetHead
 3 | from .db_head import DBHead
 4 | from .drrg_head import DRRGHead
 5 | from .fce_head import FCEHead
 6 | from .pan_head import PANHead
 7 | from .pse_head import PSEHead
 8 | from .textsnake_head import TextSnakeHead
 9 | 
10 | __all__ = [
11 |     'PSEHead', 'PANHead', 'DBHead', 'FCEHead', 'TextSnakeHead', 'DRRGHead',
12 |     'BaseTextDetHead'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/module_losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .db_module_loss import DBModuleLoss
 3 | from .drrg_module_loss import DRRGModuleLoss
 4 | from .fce_module_loss import FCEModuleLoss
 5 | from .pan_module_loss import PANModuleLoss
 6 | from .pse_module_loss import PSEModuleLoss
 7 | from .seg_based_module_loss import SegBasedModuleLoss
 8 | from .textsnake_module_loss import TextSnakeModuleLoss
 9 | 
10 | __all__ = [
11 |     'PANModuleLoss', 'PSEModuleLoss', 'DBModuleLoss', 'TextSnakeModuleLoss',
12 |     'FCEModuleLoss', 'DRRGModuleLoss', 'SegBasedModuleLoss'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .fpem_ffm import FPEM_FFM
3 | from .fpn_cat import FPNC
4 | from .fpn_unet import FPN_UNet
5 | from .fpnf import FPNF
6 | 
7 | __all__ = ['FPEM_FFM', 'FPNF', 'FPNC', 'FPN_UNet']
8 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textdet/postprocessors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import BaseTextDetPostProcessor
 3 | from .db_postprocessor import DBPostprocessor
 4 | from .drrg_postprocessor import DRRGPostprocessor
 5 | from .fce_postprocessor import FCEPostprocessor
 6 | from .pan_postprocessor import PANPostprocessor
 7 | from .pse_postprocessor import PSEPostprocessor
 8 | from .textsnake_postprocessor import TextSnakePostprocessor
 9 | 
10 | __all__ = [
11 |     'PSEPostprocessor', 'PANPostprocessor', 'DBPostprocessor',
12 |     'DRRGPostprocessor', 'FCEPostprocessor', 'TextSnakePostprocessor',
13 |     'BaseTextDetPostProcessor'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .backbones import *  # NOQA
 3 | from .data_preprocessors import *  # NOQA
 4 | from .decoders import *  # NOQA
 5 | from .encoders import *  # NOQA
 6 | from .layers import *  # NOQA
 7 | from .module_losses import *  # NOQA
 8 | from .plugins import *  # NOQA
 9 | from .postprocessors import *  # NOQA
10 | from .preprocessors import *  # NOQA
11 | from .recognizers import *  # NOQA
12 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .mini_vgg import MiniVGG
 3 | from .mobilenet_v2 import MobileNetV2
 4 | from .nrtr_modality_transformer import NRTRModalityTransform
 5 | from .resnet import ResNet
 6 | from .resnet31_ocr import ResNet31OCR
 7 | from .resnet_abi import ResNetABI
 8 | from .shallow_cnn import ShallowCNN
 9 | 
10 | __all__ = [
11 |     'ResNet31OCR', 'MiniVGG', 'NRTRModalityTransform', 'ShallowCNN',
12 |     'ResNetABI', 'ResNet', 'MobileNetV2'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data_preprocessor import TextRecogDataPreprocessor
3 | 
4 | __all__ = ['TextRecogDataPreprocessor']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/encoders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .abi_encoder import ABIEncoder
 3 | from .aster_encoder import ASTEREncoder
 4 | from .base import BaseEncoder
 5 | from .channel_reduction_encoder import ChannelReductionEncoder
 6 | from .nrtr_encoder import NRTREncoder
 7 | from .sar_encoder import SAREncoder
 8 | from .satrn_encoder import SATRNEncoder
 9 | from .svtr_encoder import SVTREncoder
10 | 
11 | __all__ = [
12 |     'SAREncoder', 'NRTREncoder', 'BaseEncoder', 'ChannelReductionEncoder',
13 |     'SATRNEncoder', 'ABIEncoder', 'SVTREncoder', 'ASTEREncoder'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/encoders/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.model import BaseModule
 3 | 
 4 | from mmocr.registry import MODELS
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class BaseEncoder(BaseModule):
 9 |     """Base Encoder class for text recognition."""
10 | 
11 |     def forward(self, feat, **kwargs):
12 |         return feat
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .conv_layer import BasicBlock, Bottleneck
 3 | from .dot_product_attention_layer import DotProductAttentionLayer
 4 | from .lstm_layer import BidirectionalLSTM
 5 | from .position_aware_layer import PositionAwareLayer
 6 | from .robust_scanner_fusion_layer import RobustScannerFusionLayer
 7 | from .satrn_layers import Adaptive2DPositionalEncoding, SATRNEncoderLayer
 8 | 
 9 | __all__ = [
10 |     'BidirectionalLSTM', 'Adaptive2DPositionalEncoding', 'BasicBlock',
11 |     'Bottleneck', 'RobustScannerFusionLayer', 'DotProductAttentionLayer',
12 |     'PositionAwareLayer', 'SATRNEncoderLayer'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/lstm_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class BidirectionalLSTM(nn.Module):
 6 | 
 7 |     def __init__(self, nIn, nHidden, nOut):
 8 |         super().__init__()
 9 | 
10 |         self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
11 |         self.embedding = nn.Linear(nHidden * 2, nOut)
12 | 
13 |     def forward(self, input):
14 |         recurrent, _ = self.rnn(input)
15 |         T, b, h = recurrent.size()
16 |         t_rec = recurrent.view(T * b, h)
17 | 
18 |         output = self.embedding(t_rec)  # [T * b, nOut]
19 |         output = output.view(T, b, -1)
20 | 
21 |         return output
22 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/layers/robust_scanner_fusion_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmengine.model import BaseModule
 5 | 
 6 | 
 7 | class RobustScannerFusionLayer(BaseModule):
 8 | 
 9 |     def __init__(self, dim_model, dim=-1, init_cfg=None):
10 |         super().__init__(init_cfg=init_cfg)
11 | 
12 |         self.dim_model = dim_model
13 |         self.dim = dim
14 | 
15 |         self.linear_layer = nn.Linear(dim_model * 2, dim_model * 2)
16 |         self.glu_layer = nn.GLU(dim=dim)
17 | 
18 |     def forward(self, x0, x1):
19 |         assert x0.size() == x1.size()
20 |         fusion_input = torch.cat([x0, x1], self.dim)
21 |         output = self.linear_layer(fusion_input)
22 |         output = self.glu_layer(output)
23 | 
24 |         return output
25 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/module_losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .abi_module_loss import ABIModuleLoss
 3 | from .base import BaseTextRecogModuleLoss
 4 | from .ce_module_loss import CEModuleLoss
 5 | from .ctc_module_loss import CTCModuleLoss
 6 | 
 7 | __all__ = [
 8 |     'BaseTextRecogModuleLoss', 'CEModuleLoss', 'CTCModuleLoss', 'ABIModuleLoss'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .common import GCAModule, Maxpool2d
3 | 
4 | __all__ = ['Maxpool2d', 'GCAModule']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/postprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .attn_postprocessor import AttentionPostprocessor
3 | from .base import BaseTextRecogPostprocessor
4 | from .ctc_postprocessor import CTCPostProcessor
5 | 
6 | __all__ = [
7 |     'BaseTextRecogPostprocessor', 'AttentionPostprocessor', 'CTCPostProcessor'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .tps_preprocessor import STN, TPStransform
3 | 
4 | __all__ = ['TPStransform', 'STN']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/preprocessors/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.model import BaseModule
 3 | 
 4 | from mmocr.registry import MODELS
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class BasePreprocessor(BaseModule):
 9 |     """Base Preprocessor class for text recognition."""
10 | 
11 |     def forward(self, x, **kwargs):
12 |         return x
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .abinet import ABINet
 3 | from .aster import ASTER
 4 | from .base import BaseRecognizer
 5 | from .crnn import CRNN
 6 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
 7 | from .encoder_decoder_recognizer_tta import EncoderDecoderRecognizerTTAModel
 8 | from .master import MASTER
 9 | from .nrtr import NRTR
10 | from .robust_scanner import RobustScanner
11 | from .sar import SARNet
12 | from .satrn import SATRN
13 | from .svtr import SVTR
14 | 
15 | __all__ = [
16 |     'BaseRecognizer', 'EncoderDecoderRecognizer', 'CRNN', 'SARNet', 'NRTR',
17 |     'RobustScanner', 'SATRN', 'ABINet', 'MASTER', 'SVTR', 'ASTER',
18 |     'EncoderDecoderRecognizerTTAModel'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/abinet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class ABINet(EncoderDecoderRecognizer):
 8 |     """Implementation of `Read Like Humans: Autonomous, Bidirectional and
 9 |     Iterative LanguageModeling for Scene Text Recognition.
10 | 
11 |     <https://arxiv.org/pdf/2103.06495.pdf>`_
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/aster.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class ASTER(EncoderDecoderRecognizer):
 8 |     """Implement  `ASTER: An Attentional Scene Text Recognizer with Flexible
 9 |     Rectification.
10 | 
11 |     <https://ieeexplore.ieee.org/abstract/document/8395027/`
12 |     """
13 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/crnn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 | 
5 | 
6 | @MODELS.register_module()
7 | class CRNN(EncoderDecoderRecognizer):
8 |     """CTC-loss based recognizer."""
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/master.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 | 
5 | 
6 | @MODELS.register_module()
7 | class MASTER(EncoderDecoderRecognizer):
8 |     """Implementation of `MASTER <https://arxiv.org/abs/1910.02562>`_"""
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/nrtr.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 | 
5 | 
6 | @MODELS.register_module()
7 | class NRTR(EncoderDecoderRecognizer):
8 |     """Implementation of `NRTR <https://arxiv.org/pdf/1806.00926.pdf>`_"""
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/robust_scanner.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class RobustScanner(EncoderDecoderRecognizer):
 8 |     """Implementation of `RobustScanner.
 9 | 
10 |     <https://arxiv.org/pdf/2007.07542.pdf>
11 |     """
12 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/sar.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 | 
5 | 
6 | @MODELS.register_module()
7 | class SARNet(EncoderDecoderRecognizer):
8 |     """Implementation of `SAR <https://arxiv.org/abs/1811.00751>`_"""
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/satrn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
4 | 
5 | 
6 | @MODELS.register_module()
7 | class SATRN(EncoderDecoderRecognizer):
8 |     """Implementation of `SATRN <https://arxiv.org/abs/1910.04396>`_"""
9 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/models/textrecog/recognizers/svtr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmocr.registry import MODELS
 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class SVTR(EncoderDecoderRecognizer):
 8 |     """A PyTorch implementation of : `SVTR: Scene Text Recognition with a
 9 |     Single Visual Model <https://arxiv.org/abs/2205.00159>`_"""
10 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .kie_data_sample import KIEDataSample
 3 | from .textdet_data_sample import TextDetDataSample
 4 | from .textrecog_data_sample import TextRecogDataSample
 5 | from .textspotting_data_sample import TextSpottingDataSample
 6 | 
 7 | __all__ = [
 8 |     'TextDetDataSample', 'TextRecogDataSample', 'KIEDataSample',
 9 |     'TextSpottingDataSample'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/testing/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .data import create_dummy_dict_file, create_dummy_textdet_inputs
3 | 
4 | __all__ = ['create_dummy_dict_file', 'create_dummy_textdet_inputs']
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.utils import get_git_hash
 3 | from mmengine.utils.dl_utils import collect_env as collect_base_env
 4 | 
 5 | import mmocr
 6 | 
 7 | 
 8 | def collect_env():
 9 |     """Collect the information of the running environments."""
10 |     env_info = collect_base_env()
11 |     env_info['MMOCR'] = mmocr.__version__ + '+' + get_git_hash()[:7]
12 |     return env_info
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     for name, val in collect_env().items():
17 |         print(f'{name}: {val}')
18 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/version.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | 
3 | __version__ = '1.0.0'
4 | short_version = __version__
5 | 


--------------------------------------------------------------------------------
/mmocr/mmocr/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_visualizer import BaseLocalVisualizer
 3 | from .kie_visualizer import KIELocalVisualizer
 4 | from .textdet_visualizer import TextDetLocalVisualizer
 5 | from .textrecog_visualizer import TextRecogLocalVisualizer
 6 | from .textspotting_visualizer import TextSpottingLocalVisualizer
 7 | 
 8 | __all__ = [
 9 |     'BaseLocalVisualizer', 'KIELocalVisualizer', 'TextDetLocalVisualizer',
10 |     'TextRecogLocalVisualizer', 'TextSpottingLocalVisualizer'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmocr/model-index.yml:
--------------------------------------------------------------------------------
 1 | Import:
 2 |   - configs/textdet/dbnet/metafile.yml
 3 |   - configs/textdet/dbnetpp/metafile.yml
 4 |   - configs/textdet/maskrcnn/metafile.yml
 5 |   - configs/textdet/drrg/metafile.yml
 6 |   - configs/textdet/fcenet/metafile.yml
 7 |   - configs/textdet/panet/metafile.yml
 8 |   - configs/textdet/psenet/metafile.yml
 9 |   - configs/textdet/textsnake/metafile.yml
10 |   - configs/textrecog/abinet/metafile.yml
11 |   - configs/textrecog/aster/metafile.yml
12 |   - configs/textrecog/crnn/metafile.yml
13 |   - configs/textrecog/master/metafile.yml
14 |   - configs/textrecog/nrtr/metafile.yml
15 |   - configs/textrecog/svtr/metafile.yml
16 |   - configs/textrecog/robust_scanner/metafile.yml
17 |   - configs/textrecog/sar/metafile.yml
18 |   - configs/textrecog/satrn/metafile.yml
19 |   - configs/kie/sdmgr/metafile.yml
20 | 


--------------------------------------------------------------------------------
/mmocr/my_test.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python tools/test.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py output/new_10k_synthtext/epoch_1.pth --save-preds
2 | 


--------------------------------------------------------------------------------
/mmocr/my_train.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python tools/train.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py --work-dir output/new_SD_base_10000_curve --amp
2 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | from .metric import *  # NOQA
4 | from .model import *  # NOQA
5 | from .utils import *  # NOQA
6 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/metric/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .e2e_hmean_iou_metric import E2EHmeanIOUMetric
3 | 
4 | __all__ = ['E2EHmeanIOUMetric']
5 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/model/abcnet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .two_stage_text_spotting import TwoStageTextSpotter
4 | 
5 | 
6 | @MODELS.register_module()
7 | class ABCNet(TwoStageTextSpotter):
8 |     """CTC-loss based recognizer."""
9 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/model/abcnet_rec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.models.textrecog import EncoderDecoderRecognizer
3 | from mmocr.registry import MODELS
4 | 
5 | 
6 | @MODELS.register_module()
7 | class ABCNetRec(EncoderDecoderRecognizer):
8 |     """CTC-loss based recognizer."""
9 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/abcnet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .bezier_utils import bezier2poly, poly2bezier
3 | 
4 | __all__ = ['poly2bezier', 'bezier2poly']
5 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
 1 | icdar2015_textspotting_data_root = 'data/icdar2015'
 2 | 
 3 | icdar2015_textspotting_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2015_textspotting_data_root,
 6 |     ann_file='textspotting_train.json',
 7 |     pipeline=None)
 8 | 
 9 | icdar2015_textspotting_test = dict(
10 |     type='OCRDataset',
11 |     data_root=icdar2015_textspotting_data_root,
12 |     ann_file='textspotting_test.json',
13 |     test_mode=True,
14 |     # indices=50,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/_base_/schedules/schedule_sgd_500e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optim_wrapper = dict(
 3 |     type='OptimWrapper',
 4 |     optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001),
 5 |     clip_grad=dict(type='value', clip_value=1))
 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=20)
 7 | val_cfg = dict(type='ValLoop')
 8 | test_cfg = dict(type='TestLoop')
 9 | # learning policy
10 | param_scheduler = [
11 |     dict(type='LinearLR', end=1000, start_factor=0.001, by_epoch=False),
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/config/abcnet_v2/abcnet-v2_resnet50_bifpn_500e_icdar2015.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_abcnet-v2_resnet50_bifpn.py',
 3 |     '../_base_/datasets/icdar2015.py',
 4 |     '../_base_/default_runtime.py',
 5 | ]
 6 | 
 7 | # dataset settings
 8 | icdar2015_textspotting_test = _base_.icdar2015_textspotting_test
 9 | icdar2015_textspotting_test.pipeline = _base_.test_pipeline
10 | 
11 | val_dataloader = dict(
12 |     batch_size=1,
13 |     num_workers=4,
14 |     persistent_workers=True,
15 |     sampler=dict(type='DefaultSampler', shuffle=False),
16 |     dataset=icdar2015_textspotting_test)
17 | 
18 | test_dataloader = val_dataloader
19 | 
20 | val_cfg = dict(type='ValLoop')
21 | test_cfg = dict(type='TestLoop')
22 | 
23 | custom_imports = dict(imports=['abcnet'], allow_failed_imports=False)
24 | 


--------------------------------------------------------------------------------
/mmocr/projects/ABCNet/dicts/abcnet.txt:
--------------------------------------------------------------------------------
 1 |  
 2 | !
 3 | "
 4 | #
 5 | $
 6 | %
 7 | &
 8 | '
 9 | (
10 | )
11 | *
12 | +
13 | ,
14 | -
15 | .
16 | /
17 | 0
18 | 1
19 | 2
20 | 3
21 | 4
22 | 5
23 | 6
24 | 7
25 | 8
26 | 9
27 | :
28 | ;
29 | <
30 | =
31 | >
32 | ?
33 | @
34 | A
35 | B
36 | C
37 | D
38 | E
39 | F
40 | G
41 | H
42 | I
43 | J
44 | K
45 | L
46 | M
47 | N
48 | O
49 | P
50 | Q
51 | R
52 | S
53 | T
54 | U
55 | V
56 | W
57 | X
58 | Y
59 | Z
60 | [
61 | \
62 | ]
63 | ^
64 | _
65 | `
66 | a
67 | b
68 | c
69 | d
70 | e
71 | f
72 | g
73 | h
74 | i
75 | j
76 | k
77 | l
78 | m
79 | n
80 | o
81 | p
82 | q
83 | r
84 | s
85 | t
86 | u
87 | v
88 | w
89 | x
90 | y
91 | z
92 | {
93 | |
94 | }
95 | ~


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/ctw1500-spts.py:
--------------------------------------------------------------------------------
 1 | ctw1500_textspotting_data_root = 'data/CTW1500'
 2 | 
 3 | ctw1500_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=ctw1500_textspotting_data_root,
 6 |     ann_file='annotations/train_ctw1500_maxlen25_v2.json',
 7 |     data_prefix=dict(img_path='ctwtrain_text_image/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 
11 | ctw1500_textspotting_test = dict(
12 |     type='AdelDataset',
13 |     data_root=ctw1500_textspotting_data_root,
14 |     ann_file='annotations/test_ctw1500_maxlen25.json',
15 |     data_prefix=dict(img_path='ctwtest_text_image/'),
16 |     test_mode=True,
17 |     pipeline=None)
18 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2013-spts.py:
--------------------------------------------------------------------------------
 1 | icdar2013_textspotting_data_root = 'spts-data/icdar2013'
 2 | 
 3 | icdar2013_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=icdar2013_textspotting_data_root,
 6 |     ann_file='ic13_train.json',
 7 |     data_prefix=dict(img_path='train_images/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 
11 | icdar2013_textspotting_test = dict(
12 |     type='AdelDataset',
13 |     data_root=icdar2013_textspotting_data_root,
14 |     data_prefix=dict(img_path='test_images/'),
15 |     ann_file='ic13_test.json',
16 |     test_mode=True,
17 |     pipeline=None)
18 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2013.py:
--------------------------------------------------------------------------------
 1 | icdar2013_textspotting_data_root = 'data/icdar2013'
 2 | 
 3 | icdar2013_textspotting_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2013_textspotting_data_root,
 6 |     ann_file='textspotting_train.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | icdar2013_textspotting_test = dict(
11 |     type='OCRDataset',
12 |     data_root=icdar2013_textspotting_data_root,
13 |     ann_file='textspotting_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2015-spts.py:
--------------------------------------------------------------------------------
 1 | icdar2015_textspotting_data_root = 'spts-data/icdar2015'
 2 | 
 3 | icdar2015_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=icdar2015_textspotting_data_root,
 6 |     ann_file='ic15_train.json',
 7 |     data_prefix=dict(img_path='train_images/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 
11 | icdar2015_textspotting_test = dict(
12 |     type='AdelDataset',
13 |     data_root=icdar2015_textspotting_data_root,
14 |     data_prefix=dict(img_path='test_images/'),
15 |     ann_file='ic15_test.json',
16 |     test_mode=True,
17 |     pipeline=None)
18 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/icdar2015.py:
--------------------------------------------------------------------------------
 1 | icdar2015_textspotting_data_root = 'data/icdar2015'
 2 | 
 3 | icdar2015_textspotting_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=icdar2015_textspotting_data_root,
 6 |     ann_file='textspotting_train.json',
 7 |     pipeline=None)
 8 | 
 9 | icdar2015_textspotting_test = dict(
10 |     type='OCRDataset',
11 |     data_root=icdar2015_textspotting_data_root,
12 |     ann_file='textspotting_test.json',
13 |     test_mode=True,
14 |     pipeline=None)
15 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/mlt-spts.py:
--------------------------------------------------------------------------------
 1 | mlt_textspotting_data_root = 'spts-data/mlt2017'
 2 | 
 3 | mlt_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=mlt_textspotting_data_root,
 6 |     ann_file='train.json',
 7 |     data_prefix=dict(img_path='MLT_train_images/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/syntext1-spts.py:
--------------------------------------------------------------------------------
 1 | syntext1_textspotting_data_root = 'spts-data/syntext1'
 2 | 
 3 | syntext1_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=syntext1_textspotting_data_root,
 6 |     ann_file='train.json',
 7 |     data_prefix=dict(img_path='syntext_word_eng/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/syntext2-spts.py:
--------------------------------------------------------------------------------
 1 | syntext2_textspotting_data_root = 'spts-data/syntext2'
 2 | 
 3 | syntext2_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=syntext2_textspotting_data_root,
 6 |     ann_file='train.json',
 7 |     data_prefix=dict(img_path='emcs_imgs/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/totaltext-spts.py:
--------------------------------------------------------------------------------
 1 | totaltext_textspotting_data_root = 'spts-data/totaltext'
 2 | 
 3 | totaltext_textspotting_train = dict(
 4 |     type='AdelDataset',
 5 |     data_root=totaltext_textspotting_data_root,
 6 |     ann_file='train.json',
 7 |     data_prefix=dict(img_path='train_images/'),
 8 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 9 |     pipeline=None)
10 | 
11 | totaltext_textspotting_test = dict(
12 |     type='AdelDataset',
13 |     data_root=totaltext_textspotting_data_root,
14 |     ann_file='test.json',
15 |     data_prefix=dict(img_path='test_images/'),
16 |     test_mode=True,
17 |     pipeline=None)
18 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/config/_base_/datasets/totaltext.py:
--------------------------------------------------------------------------------
 1 | totaltext_textspotting_data_root = 'data/totaltext'
 2 | 
 3 | totaltext_textspotting_train = dict(
 4 |     type='OCRDataset',
 5 |     data_root=totaltext_textspotting_data_root,
 6 |     ann_file='textspotting_train.json',
 7 |     filter_cfg=dict(filter_empty_gt=True, min_size=32),
 8 |     pipeline=None)
 9 | 
10 | totaltext_textspotting_test = dict(
11 |     type='OCRDataset',
12 |     data_root=totaltext_textspotting_data_root,
13 |     ann_file='textspotting_test.json',
14 |     test_mode=True,
15 |     pipeline=None)
16 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/dicts/spts.txt:
--------------------------------------------------------------------------------
 1 |  
 2 | !
 3 | "
 4 | #
 5 | $
 6 | %
 7 | &
 8 | '
 9 | (
10 | )
11 | *
12 | +
13 | ,
14 | -
15 | .
16 | /
17 | 0
18 | 1
19 | 2
20 | 3
21 | 4
22 | 5
23 | 6
24 | 7
25 | 8
26 | 9
27 | :
28 | ;
29 | <
30 | =
31 | >
32 | ?
33 | @
34 | A
35 | B
36 | C
37 | D
38 | E
39 | F
40 | G
41 | H
42 | I
43 | J
44 | K
45 | L
46 | M
47 | N
48 | O
49 | P
50 | Q
51 | R
52 | S
53 | T
54 | U
55 | V
56 | W
57 | X
58 | Y
59 | Z
60 | [
61 | \
62 | ]
63 | ^
64 | _
65 | `
66 | a
67 | b
68 | c
69 | d
70 | e
71 | f
72 | g
73 | h
74 | i
75 | j
76 | k
77 | l
78 | m
79 | n
80 | o
81 | p
82 | q
83 | r
84 | s
85 | t
86 | u
87 | v
88 | w
89 | x
90 | y
91 | z
92 | {
93 | |
94 | }
95 | ~


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | from .datasets import *  # NOQA
4 | from .metric import *  # NOQA
5 | from .model import *  # NOQA
6 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .adel_dataset import AdelDataset
 3 | from .transforms.spts_transforms import (Bezier2Polygon, ConvertText,
 4 |                                          LoadOCRAnnotationsWithBezier,
 5 |                                          Polygon2Bezier, RescaleToShortSide)
 6 | 
 7 | __all__ = [
 8 |     'AdelDataset', 'LoadOCRAnnotationsWithBezier', 'Bezier2Polygon',
 9 |     'Polygon2Bezier', 'ConvertText', 'RescaleToShortSide'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/metric/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .e2e_point_metric import E2EPointMetric
3 | 
4 | __all__ = ['E2EPointMetric']
5 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .spts import SPTS
 3 | from .spts_decoder import SPTSDecoder
 4 | from .spts_dictionary import SPTSDictionary
 5 | from .spts_encoder import SPTSEncoder
 6 | from .spts_module_loss import SPTSModuleLoss
 7 | from .spts_postprocessor import SPTSPostprocessor
 8 | 
 9 | __all__ = [
10 |     'SPTSEncoder', 'SPTSDecoder', 'SPTSPostprocessor', 'SPTS',
11 |     'SPTSDictionary', 'SPTSModuleLoss'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmocr/projects/SPTS/spts/model/spts.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmocr.registry import MODELS
3 | from .encoder_decoder_text_spotter import EncoderDecoderTextSpotter
4 | 
5 | 
6 | @MODELS.register_module()
7 | class SPTS(EncoderDecoderTextSpotter):
8 |     """SPTS."""
9 | 


--------------------------------------------------------------------------------
/mmocr/projects/example_project/configs/dbnet_dummy-resnet_fpnc_1200e_icdar2015.py:
--------------------------------------------------------------------------------
1 | _base_ = ['mmocr::textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py']
2 | 
3 | custom_imports = dict(imports=['dummy'])
4 | 
5 | _base_.model.backbone.type = 'DummyResNet'
6 | 


--------------------------------------------------------------------------------
/mmocr/projects/example_project/dummy/__init__.py:
--------------------------------------------------------------------------------
1 | from .dummy_resnet import DummyResNet
2 | 
3 | __all__ = ['DummyResNet']
4 | 


--------------------------------------------------------------------------------
/mmocr/projects/example_project/dummy/dummy_resnet.py:
--------------------------------------------------------------------------------
 1 | from mmdet.models.backbones import ResNet
 2 | 
 3 | from mmocr.registry import MODELS
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class DummyResNet(ResNet):
 8 |     """Implements a dummy ResNet wrapper for demonstration purpose.
 9 | 
10 |     Args:
11 |         **kwargs: All the arguments are passed to the parent class.
12 |     """
13 | 
14 |     def __init__(self, **kwargs) -> None:
15 |         print('Hello world!')
16 |         super().__init__(**kwargs)
17 | 


--------------------------------------------------------------------------------
/mmocr/projects/selected.txt:
--------------------------------------------------------------------------------
1 | projects/ABCNet/README.md
2 | projects/ABCNet/README_V2.md
3 | projects/SPTS/README.md
4 | 


--------------------------------------------------------------------------------
/mmocr/requirements/albu.txt:
--------------------------------------------------------------------------------
1 | albumentations>=1.1.0 --no-binary qudida,albumentations
2 | 


--------------------------------------------------------------------------------
/mmocr/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmocr
2 | numpy
3 | pyclipper
4 | torch>=1.1
5 | 


--------------------------------------------------------------------------------
/mmocr/requirements/docs.txt:
--------------------------------------------------------------------------------
 1 | docutils==0.16.0
 2 | markdown>=3.4.0
 3 | myst-parser
 4 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 5 | sphinx==4.0.2
 6 | sphinx-tabs
 7 | sphinx_copybutton
 8 | sphinx_markdown_tables>=0.0.16
 9 | tabulate
10 | 


--------------------------------------------------------------------------------
/mmocr/requirements/mminstall.txt:
--------------------------------------------------------------------------------
1 | mmcv>=2.0.0rc4,<2.1.0
2 | mmdet>=3.0.0rc5,<3.1.0
3 | mmengine>=0.7.0, <1.0.0
4 | 


--------------------------------------------------------------------------------
/mmocr/requirements/optional.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/requirements/optional.txt


--------------------------------------------------------------------------------
/mmocr/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
 1 | imgaug
 2 | kwarray
 3 | lmdb
 4 | matplotlib
 5 | mmcv>=2.0.0rc1
 6 | mmdet>=3.0.0rc0
 7 | mmengine>=0.1.0
 8 | pyclipper
 9 | rapidfuzz>=2.0.0
10 | regex
11 | scikit-image
12 | scipy
13 | shapely
14 | titlecase
15 | torch
16 | torchvision
17 | 


--------------------------------------------------------------------------------
/mmocr/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | imgaug
 2 | lmdb
 3 | matplotlib
 4 | numpy
 5 | opencv-python >=4.2.0.32, != 4.5.5.*  # avoid Github security alert
 6 | pyclipper
 7 | pycocotools
 8 | rapidfuzz>=2.0.0
 9 | scikit-image
10 | 


--------------------------------------------------------------------------------
/mmocr/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | interrogate
 5 | isort
 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 7 | kwarray
 8 | lanms-neo==1.0.2
 9 | parameterized
10 | pytest
11 | pytest-cov
12 | pytest-runner
13 | ubelt
14 | xdoctest >= 0.10.0
15 | yapf
16 | 


--------------------------------------------------------------------------------
/mmocr/resources/illustration.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/illustration.jpg


--------------------------------------------------------------------------------
/mmocr/resources/kie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/kie.jpg


--------------------------------------------------------------------------------
/mmocr/resources/mmocr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/mmocr-logo.png


--------------------------------------------------------------------------------
/mmocr/resources/textdet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textdet.jpg


--------------------------------------------------------------------------------
/mmocr/resources/textrecog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textrecog.jpg


--------------------------------------------------------------------------------
/mmocr/resources/verification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/verification.png


--------------------------------------------------------------------------------
/mmocr/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal=1
 3 | 
 4 | [yapf]
 5 | based_on_style = pep8
 6 | blank_line_before_nested_class_or_def = true
 7 | split_before_expression_after_opening_paren = true
 8 | split_penalty_import_names=0
 9 | SPLIT_PENALTY_AFTER_OPENING_BRACKET=800
10 | 
11 | [isort]
12 | line_length = 79
13 | multi_line_output = 0
14 | extra_standard_library = setuptools
15 | known_first_party = mmocr
16 | known_third_party = PIL,cv2,imgaug,lanms,lmdb,matplotlib,mmcv,mmdet,numpy,packaging,pyclipper,pytest,pytorch_sphinx_theme,rapidfuzz,requests,scipy,shapely,skimage,titlecase,torch,torchvision,ts,yaml,mmengine
17 | no_lines_before = STDLIB,LOCALFOLDER
18 | default_section = THIRDPARTY
19 | 
20 | [style]
21 | BASED_ON_STYLE = pep8
22 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
23 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
24 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_evaluation/test_functional/test_hmean.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | from mmocr.evaluation.functional import compute_hmean
 5 | 
 6 | 
 7 | class TestHmean(TestCase):
 8 | 
 9 |     def test_compute_hmean(self):
10 |         with self.assertRaises(AssertionError):
11 |             compute_hmean(0, 0, 0.0, 0)
12 |         with self.assertRaises(AssertionError):
13 |             compute_hmean(0, 0, 0, 0.0)
14 |         with self.assertRaises(AssertionError):
15 |             compute_hmean([1], 0, 0, 0)
16 |         with self.assertRaises(AssertionError):
17 |             compute_hmean(0, [1], 0, 0)
18 | 
19 |         _, _, hmean = compute_hmean(2, 2, 2, 2)
20 |         self.assertEqual(hmean, 1)
21 | 
22 |         _, _, hmean = compute_hmean(0, 0, 2, 2)
23 |         self.assertEqual(hmean, 0)
24 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_common/test_modules/test_transformer_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.common.modules import PositionalEncoding
 7 | 
 8 | 
 9 | class TestPositionalEncoding(TestCase):
10 | 
11 |     def test_forward(self):
12 |         pos_encoder = PositionalEncoding()
13 |         x = torch.rand(1, 30, 512)
14 |         out = pos_encoder(x)
15 |         assert out.size() == x.size()
16 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_common/test_plugins/test_avgpool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.common.plugins import AvgPool2d
 7 | 
 8 | 
 9 | class TestAvgPool2d(TestCase):
10 | 
11 |     def setUp(self) -> None:
12 |         self.img = torch.rand(1, 3, 32, 100)
13 | 
14 |     def test_avgpool2d(self):
15 |         avgpool2d = AvgPool2d(kernel_size=2, stride=2)
16 |         self.assertEqual(avgpool2d(self.img).shape, torch.Size([1, 3, 16, 50]))
17 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_heads/test_pse_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textdet.heads import PSEHead
 7 | 
 8 | 
 9 | class TestPSEHead(TestCase):
10 | 
11 |     def setUp(self):
12 |         self.feature = torch.randn((2, 10, 40, 50))
13 | 
14 |     def test_init(self):
15 |         with self.assertRaises(TypeError):
16 |             PSEHead(in_channels=1)
17 | 
18 |         with self.assertRaises(TypeError):
19 |             PSEHead(out_channels='out')
20 | 
21 |     def test_forward(self):
22 |         pse_head = PSEHead(in_channels=[10], hidden_dim=128, out_channel=7)
23 |         results = pse_head(self.feature)
24 |         self.assertEqual(results.shape, (2, 7, 40, 50))
25 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_heads/test_textsnake_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textdet.heads import TextSnakeHead
 7 | 
 8 | 
 9 | class TestTextSnakeHead(TestCase):
10 | 
11 |     def test_init(self):
12 |         with self.assertRaises(AssertionError):
13 |             TextSnakeHead(in_channels='test')
14 | 
15 |     def test_forward(self):
16 |         ts_head = TextSnakeHead(in_channels=10)
17 |         data = torch.randn((2, 10, 40, 50))
18 |         results = ts_head(data, None)
19 |         self.assertEqual(results.shape, (2, 5, 40, 50))
20 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textdet/test_necks/test_fpnf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import unittest
 3 | 
 4 | import torch
 5 | from parameterized import parameterized
 6 | 
 7 | from mmocr.models.textdet.necks import FPNF
 8 | 
 9 | 
10 | class TestFPNF(unittest.TestCase):
11 | 
12 |     def setUp(self):
13 |         in_channels = [256, 512, 1024, 2048]
14 |         size = [112, 56, 28, 14]
15 |         inputs = []
16 |         for i in range(4):
17 |             inputs.append(torch.rand(1, in_channels[i], size[i], size[i]))
18 |         self.inputs = inputs
19 | 
20 |     @parameterized.expand([('concat'), ('add')])
21 |     def test_forward(self, fusion_type):
22 |         fpnf = FPNF(fusion_type=fusion_type)
23 |         outputs = fpnf.forward(self.inputs)
24 |         self.assertListEqual(list(outputs.size()), [1, 256, 112, 112])
25 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_mini_vgg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.backbones import MiniVGG
 7 | 
 8 | 
 9 | class TestMiniVGG(TestCase):
10 | 
11 |     def test_forward(self):
12 | 
13 |         model = MiniVGG()
14 |         model.init_weights()
15 |         model.train()
16 | 
17 |         imgs = torch.randn(1, 3, 32, 160)
18 |         feats = model(imgs)
19 |         self.assertEqual(feats.shape, torch.Size([1, 512, 1, 41]))
20 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_mobilenet_v2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.backbones import MobileNetV2
 7 | 
 8 | 
 9 | class TestMobileNetV2(TestCase):
10 | 
11 |     def setUp(self) -> None:
12 |         self.img = torch.rand(1, 3, 32, 160)
13 | 
14 |     def test_mobilenetv2(self):
15 |         mobilenet_v2 = MobileNetV2()
16 |         self.assertEqual(
17 |             mobilenet_v2(self.img).shape, torch.Size([1, 1280, 1, 43]))
18 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_nrtr_modality_transformer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import unittest
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.backbones import NRTRModalityTransform
 7 | 
 8 | 
 9 | class TestNRTRBackbone(unittest.TestCase):
10 | 
11 |     def setUp(self):
12 |         self.img = torch.randn(2, 3, 32, 100)
13 | 
14 |     def test_encoder(self):
15 |         nrtr_backbone = NRTRModalityTransform()
16 |         nrtr_backbone.init_weights()
17 |         nrtr_backbone.train()
18 |         out_enc = nrtr_backbone(self.img)
19 |         self.assertEqual(out_enc.shape, torch.Size([2, 512, 1, 25]))
20 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_backbones/test_shallow_cnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import unittest
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.backbones import ShallowCNN
 7 | 
 8 | 
 9 | class TestShallowCNN(unittest.TestCase):
10 | 
11 |     def setUp(self):
12 |         self.imgs = torch.randn(1, 1, 32, 100)
13 | 
14 |     def test_shallow_cnn(self):
15 | 
16 |         model = ShallowCNN()
17 |         model.init_weights()
18 |         model.train()
19 | 
20 |         feat = model(self.imgs)
21 |         self.assertEqual(feat.shape, torch.Size([1, 512, 8, 25]))
22 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_encoders/test_abi_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.encoders.abi_encoder import ABIEncoder
 7 | 
 8 | 
 9 | class TestABIEncoder(TestCase):
10 | 
11 |     def test_init(self):
12 |         with self.assertRaises(AssertionError):
13 |             ABIEncoder(d_model=512, n_head=10)
14 | 
15 |     def test_forward(self):
16 |         model = ABIEncoder()
17 |         x = torch.randn(10, 512, 8, 32)
18 |         self.assertEqual(model(x, None).shape, torch.Size([10, 512, 8, 32]))
19 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_encoders/test_aster_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import unittest
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.encoders import ASTEREncoder
 7 | 
 8 | 
 9 | class TestASTEREncoder(unittest.TestCase):
10 | 
11 |     def test_encoder(self):
12 |         encoder = ASTEREncoder(10)
13 |         feat = torch.randn(2, 10, 1, 25)
14 |         out = encoder(feat)
15 |         self.assertEqual(out.shape, torch.Size([2, 25, 10]))
16 | 


--------------------------------------------------------------------------------
/mmocr/tests/test_models/test_textrecog/test_plugins/test_maxpool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from unittest import TestCase
 3 | 
 4 | import torch
 5 | 
 6 | from mmocr.models.textrecog.plugins import Maxpool2d
 7 | 
 8 | 
 9 | class TestMaxpool2d(TestCase):
10 | 
11 |     def setUp(self) -> None:
12 |         self.img = torch.rand(1, 3, 32, 100)
13 | 
14 |     def test_maxpool2d(self):
15 |         maxpool2d = Maxpool2d(kernel_size=2, stride=2)
16 |         self.assertEqual(maxpool2d(self.img).shape, torch.Size([1, 3, 16, 50]))
17 | 


--------------------------------------------------------------------------------
/mmocr/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/mmocr/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/usr/bin/env bash
 3 | 
 4 | CONFIG=$1
 5 | GPUS=$2
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/train.py \
19 |     $CONFIG \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/mmocr/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | export PYTHONPATH=`pwd`:$PYTHONPATH
 5 | 
 6 | PARTITION=$1
 7 | JOB_NAME=$2
 8 | CONFIG=$3
 9 | CHECKPOINT=$4
10 | GPUS=${GPUS:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --kill-on-bad-exit=1 \
21 |     ${SRUN_ARGS} \
22 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/mmocr/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | export MASTER_PORT=$((12000 + $RANDOM % 20000))
 3 | 
 4 | set -x
 5 | 
 6 | PARTITION=$1
 7 | JOB_NAME=$2
 8 | CONFIG=$3
 9 | WORK_DIR=$4
10 | GPUS=${GPUS:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
13 | PY_ARGS=${@:5}
14 | SRUN_ARGS=${SRUN_ARGS:-""}
15 | 
16 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
17 | srun -p ${PARTITION} \
18 |     --job-name=${JOB_NAME} \
19 |     --gres=gpu:${GPUS_PER_NODE} \
20 |     --ntasks=${GPUS} \
21 |     --ntasks-per-node=${GPUS_PER_NODE} \
22 |     --cpus-per-task=${CPUS_PER_TASK} \
23 |     --kill-on-bad-exit=1 \
24 |     ${SRUN_ARGS} \
25 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/textfussion/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include src/diffusers/utils/model_card_template.md
3 | 


--------------------------------------------------------------------------------
/textfussion/README.md:
--------------------------------------------------------------------------------
1 | 该项目主要基于diffusers==0.15.0.dev0框架，请依照requirements.txt进行环境的搭建
2 | 
3 | 准备好文本图像训练集后，通过./my_inpainting/new_paradigm_train.sh脚本进行生成模型的训练
4 | 
5 | 完成模型训练后，通过./my_inpainting/my_build_synth_data_baseline.py脚本，制作合成数据集
6 | 


--------------------------------------------------------------------------------
/textfussion/_typos.toml:
--------------------------------------------------------------------------------
 1 | # Files for typos
 2 | # Instruction:  https://github.com/marketplace/actions/typos-action#getting-started
 3 | 
 4 | [default.extend-identifiers]
 5 | 
 6 | [default.extend-words]
 7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py
 8 | nd="np" # nd may be np (numpy)
 9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py
10 | 
11 | 
12 | [files]
13 | extend-exclude = ["_typos.toml"]
14 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .rl import ValueGuidedRLPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/experimental/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .value_guided_sampling import ValueGuidedRLPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .mel import Mel
2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline
3 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
 1 | from ...utils import (
 2 |     OptionalDependencyNotAvailable,
 3 |     is_torch_available,
 4 |     is_transformers_available,
 5 |     is_transformers_version,
 6 | )
 7 | 
 8 | 
 9 | try:
10 |     if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
11 |         raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 |     from ...utils.dummy_torch_and_transformers_objects import (
14 |         AudioLDMPipeline,
15 |     )
16 | else:
17 |     from .pipeline_audioldm import AudioLDMPipeline
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddim import DDIMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddpm import DDPMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dit import DiTPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_transformers_available
2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
3 | 
4 | 
5 | if is_transformers_available():
6 |     from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_latent_diffusion_uncond import LDMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional, Union
 3 | 
 4 | import numpy as np
 5 | import PIL
 6 | from PIL import Image
 7 | 
 8 | from ...utils import is_torch_available, is_transformers_available
 9 | 
10 | 
11 | if is_transformers_available() and is_torch_available():
12 |     from .image_encoder import PaintByExampleImageEncoder
13 |     from .pipeline_paint_by_example import PaintByExamplePipeline
14 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_pndm import PNDMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_repaint import RePaintPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
 1 | from ...utils import (
 2 |     OptionalDependencyNotAvailable,
 3 |     is_torch_available,
 4 |     is_transformers_available,
 5 |     is_transformers_version,
 6 | )
 7 | 
 8 | 
 9 | try:
10 |     if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 |         raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 |     from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
14 | else:
15 |     from .pipeline_unclip import UnCLIPPipeline
16 |     from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
17 |     from .text_proj import UnCLIPTextProjModel
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_torch_available, is_transformers_available
2 | 
3 | 
4 | if is_transformers_available() and is_torch_available():
5 |     from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline
6 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_note_seq_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class MidiProcessor(metaclass=DummyObject):
 6 |     _backends = ["note_seq"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["note_seq"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["note_seq"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["note_seq"])
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_onnx_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class OnnxRuntimeModel(metaclass=DummyObject):
 6 |     _backends = ["onnx"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["onnx"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["onnx"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["onnx"])
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class LMSDiscreteScheduler(metaclass=DummyObject):
 6 |     _backends = ["torch", "scipy"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["torch", "scipy"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["torch", "scipy"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["torch", "scipy"])
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
 6 |     _backends = ["torch", "transformers", "k_diffusion"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["torch", "transformers", "k_diffusion"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["torch", "transformers", "k_diffusion"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["torch", "transformers", "k_diffusion"])
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject):
 6 |     _backends = ["transformers", "torch", "note_seq"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["transformers", "torch", "note_seq"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["transformers", "torch", "note_seq"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["transformers", "torch", "note_seq"])
18 | 


--------------------------------------------------------------------------------
/textfussion/build/lib/diffusers/utils/pil_utils.py:
--------------------------------------------------------------------------------
 1 | import PIL.Image
 2 | import PIL.ImageOps
 3 | from packaging import version
 4 | 
 5 | 
 6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
 7 |     PIL_INTERPOLATION = {
 8 |         "linear": PIL.Image.Resampling.BILINEAR,
 9 |         "bilinear": PIL.Image.Resampling.BILINEAR,
10 |         "bicubic": PIL.Image.Resampling.BICUBIC,
11 |         "lanczos": PIL.Image.Resampling.LANCZOS,
12 |         "nearest": PIL.Image.Resampling.NEAREST,
13 |     }
14 | else:
15 |     PIL_INTERPOLATION = {
16 |         "linear": PIL.Image.LINEAR,
17 |         "bilinear": PIL.Image.BILINEAR,
18 |         "bicubic": PIL.Image.BICUBIC,
19 |         "lanczos": PIL.Image.LANCZOS,
20 |         "nearest": PIL.Image.NEAREST,
21 |     }
22 | 


--------------------------------------------------------------------------------
/textfussion/docs/source/_config.py:
--------------------------------------------------------------------------------
1 | # docstyle-ignore
2 | INSTALL_CONTENT = """
3 | # Diffusers installation
4 | ! pip install diffusers transformers datasets accelerate
5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
6 | # ! pip install git+https://github.com/huggingface/diffusers.git
7 | """
8 | 
9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]


--------------------------------------------------------------------------------
/textfussion/docs/source/en/api/experimental/rl.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # TODO
14 | 
15 | Coming soon!


--------------------------------------------------------------------------------
/textfussion/docs/source/en/imgs/access_request.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/access_request.png


--------------------------------------------------------------------------------
/textfussion/docs/source/en/imgs/diffusers_library.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/diffusers_library.jpg


--------------------------------------------------------------------------------
/textfussion/docs/source/en/using-diffusers/audio.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Using Diffusers for audio
14 | 
15 | [`DanceDiffusionPipeline`] and [`AudioDiffusionPipeline`] can be used to generate
16 | audio rapidly! More coming soon!


--------------------------------------------------------------------------------
/textfussion/docs/source/ko/in_translation.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # 번역중
14 | 
15 | 열심히 번역을 진행중입니다. 조금만 기다려주세요.
16 | 감사합니다!


--------------------------------------------------------------------------------
/textfussion/examples/community/one_step_unet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import torch
 3 | 
 4 | from diffusers import DiffusionPipeline
 5 | 
 6 | 
 7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
 8 |     def __init__(self, unet, scheduler):
 9 |         super().__init__()
10 | 
11 |         self.register_modules(unet=unet, scheduler=scheduler)
12 | 
13 |     def __call__(self):
14 |         image = torch.randn(
15 |             (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
16 |         )
17 |         timestep = 1
18 | 
19 |         model_output = self.unet(image, timestep).sample
20 |         scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample
21 | 
22 |         result = scheduler_output - scheduler_output + torch.ones_like(scheduler_output)
23 | 
24 |         return result
25 | 


--------------------------------------------------------------------------------
/textfussion/examples/controlnet/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | datasets
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/controlnet/requirements_flax.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.25.1
 2 | datasets
 3 | flax
 4 | optax
 5 | torch
 6 | torchvision
 7 | ftfy
 8 | tensorboard
 9 | Jinja2
10 | 


--------------------------------------------------------------------------------
/textfussion/examples/dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/dreambooth/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 | 


--------------------------------------------------------------------------------
/textfussion/examples/inference/image_to_image.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from diffusers import StableDiffusionImg2ImgPipeline  # noqa F401
 4 | 
 5 | 
 6 | warnings.warn(
 7 |     "The `image_to_image.py` script is outdated. Please use directly `from diffusers import"
 8 |     " StableDiffusionImg2ImgPipeline` instead."
 9 | )
10 | 


--------------------------------------------------------------------------------
/textfussion/examples/inference/inpainting.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline  # noqa F401
 4 | 
 5 | 
 6 | warnings.warn(
 7 |     "The `inpainting.py` script is outdated. Please use directly `from diffusers import"
 8 |     " StableDiffusionInpaintPipeline` instead."
 9 | )
10 | 


--------------------------------------------------------------------------------
/textfussion/examples/instruct_pix2pix/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/README.md:
--------------------------------------------------------------------------------
 1 | # Research projects
 2 | 
 3 | This folder contains various research projects using 🧨 Diffusers. 
 4 | They are not really maintained by the core maintainers of this library and often require a specific version of Diffusers that is indicated in the requirements file of each folder. 
 5 | Updating them to the most recent version of the library will require some work.
 6 | 
 7 | To use any of them, just run the command
 8 | 
 9 | ```
10 | pip install -r requirements.txt
11 | ```
12 | inside the folder of your choice.
13 | 
14 | If you need help with any of those, please open an issue where you directly ping the author(s), as indicated at the top of the README of each folder.
15 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/colossalai/inference.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from diffusers import StableDiffusionPipeline
 4 | 
 5 | 
 6 | model_id = "path-to-your-trained-model"
 7 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
 8 | 
 9 | prompt = "A photo of sks dog in a bucket"
10 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
11 | 
12 | image.save("dog-bucket.png")
13 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/colossalai/requirement.txt:
--------------------------------------------------------------------------------
1 | diffusers
2 | torch
3 | torchvision
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | transformers


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/dreambooth_inpaint/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.9.0
2 | accelerate
3 | torchvision
4 | transformers>=4.21.0
5 | ftfy
6 | tensorboard
7 | Jinja2
8 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/intel_opts/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.21.0
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | intel_extension_for_pytorch>=1.13
8 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/lora/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
7 | Jinja2
8 | git+https://github.com/huggingface/peft.git


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/mulit_token_textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/mulit_token_textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/multi_subject_dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/README.md:
--------------------------------------------------------------------------------
1 | ## Diffusers examples with ONNXRuntime optimizations
2 | 
3 | **This research project is not actively maintained by the diffusers team. For any questions or comments, please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions.**
4 | 
5 | This aims to provide diffusers examples with ONNXRuntime optimizations for training/fine-tuning unconditional image generation, text to image, and textual inversion. Please see individual directories for more details on how to run each task using ONNXRuntime.


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
7 | modelcards
8 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | modelcards
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/research_projects/onnxruntime/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 | 


--------------------------------------------------------------------------------
/textfussion/examples/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | transformers>=4.25.1
3 | datasets
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/text_to_image/requirements_flax.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.25.1
 2 | datasets
 3 | flax
 4 | optax
 5 | torch
 6 | torchvision
 7 | ftfy
 8 | tensorboard
 9 | Jinja2
10 | 


--------------------------------------------------------------------------------
/textfussion/examples/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 | 


--------------------------------------------------------------------------------
/textfussion/examples/textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 | 


--------------------------------------------------------------------------------
/textfussion/examples/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 | 


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__init__.py


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__init__.py


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/engines/__init__.py


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__init__.py


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__init__.py


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/ori.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/ori.png


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/output.png


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res.png


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res_area.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_area.png


--------------------------------------------------------------------------------
/textfussion/my_inpainting/src/utils/res_trilinear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_trilinear.png


--------------------------------------------------------------------------------
/textfussion/my_inpainting/train_vae.sh:
--------------------------------------------------------------------------------
 1 | export MODEL_NAME="stabilityai/stable-diffusion-2-inpainting"
 2 | export OUTPUT_DIR="output/pretrain_8702_text_vae"
 3 | 
 4 | NCCL_P2P_DISABLE=1 accelerate launch train_vae.py \
 5 | 	--pretrained_model_name_or_path=$MODEL_NAME \
 6 | 	--output_dir=$OUTPUT_DIR \
 7 | 	--resolution=512 \
 8 | 	--train_batch_size=4 \
 9 | 	--gradient_accumulation_steps=1 \
10 | 	--gradient_checkpointing \
11 | 	--learning_rate=5e-6 \
12 | 	--num_train_epochs=3 \
13 | 	--lr_scheduler="constant" \
14 | 	--lr_warmup_steps=3000 \
15 | 	--dataloader_num_workers=8 \
16 | 	--mixed_precision=fp16 \
17 | 


--------------------------------------------------------------------------------
/textfussion/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 119
 3 | target-version = ['py37']
 4 | 
 5 | [tool.ruff]
 6 | # Never enforce `E501` (line length violations).
 7 | ignore = ["C901", "E501", "E741", "W605"]
 8 | select = ["C", "E", "F", "I", "W"]
 9 | line-length = 119
10 | 
11 | # Ignore import violations in all `__init__.py` files.
12 | [tool.ruff.per-file-ignores]
13 | "__init__.py" = ["E402", "F401", "F403", "F811"]
14 | "src/diffusers/utils/dummy_*.py" = ["F401"]
15 | 
16 | [tool.ruff.isort]
17 | lines-after-imports = 2
18 | known-first-party = ["diffusers"]
19 | 


--------------------------------------------------------------------------------
/textfussion/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/scripts/__init__.py


--------------------------------------------------------------------------------
/textfussion/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = accelerate
 7 | known_third_party =
 8 |     numpy
 9 |     torch
10 |     torch_xla
11 | 
12 | line_length = 119
13 | lines_after_imports = 2
14 | multi_line_output = 3
15 | use_parentheses = True
16 | 
17 | [flake8]
18 | ignore = E203, E722, E501, E741, W503, W605
19 | max-line-length = 119
20 | per-file-ignores = __init__.py:F401
21 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | diffusers-cli = diffusers.commands.diffusers_cli:main
3 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | diffusers
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/README.md:
--------------------------------------------------------------------------------
1 | # 🧨 Diffusers Experimental
2 | 
3 | We are adding experimental code to support novel applications and usages of the Diffusers library.
4 | Currently, the following experiments are supported:
5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model.


--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .rl import ValueGuidedRLPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/experimental/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .value_guided_sampling import ValueGuidedRLPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 | 
3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models).


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .mel import Mel
2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline
3 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
 1 | from ...utils import (
 2 |     OptionalDependencyNotAvailable,
 3 |     is_torch_available,
 4 |     is_transformers_available,
 5 |     is_transformers_version,
 6 | )
 7 | 
 8 | 
 9 | try:
10 |     if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
11 |         raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 |     from ...utils.dummy_torch_and_transformers_objects import (
14 |         AudioLDMPipeline,
15 |     )
16 | else:
17 |     from .pipeline_audioldm import AudioLDMPipeline
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddim import DDIMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddpm import DDPMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dit import DiTPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_transformers_available
2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
3 | 
4 | 
5 | if is_transformers_available():
6 |     from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_latent_diffusion_uncond import LDMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional, Union
 3 | 
 4 | import numpy as np
 5 | import PIL
 6 | from PIL import Image
 7 | 
 8 | from ...utils import is_torch_available, is_transformers_available
 9 | 
10 | 
11 | if is_transformers_available() and is_torch_available():
12 |     from .image_encoder import PaintByExampleImageEncoder
13 |     from .pipeline_paint_by_example import PaintByExamplePipeline
14 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_pndm import PNDMPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_repaint import RePaintPipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
2 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
 1 | from ...utils import (
 2 |     OptionalDependencyNotAvailable,
 3 |     is_torch_available,
 4 |     is_transformers_available,
 5 |     is_transformers_version,
 6 | )
 7 | 
 8 | 
 9 | try:
10 |     if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 |         raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 |     from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
14 | else:
15 |     from .pipeline_unclip import UnCLIPPipeline
16 |     from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
17 |     from .text_proj import UnCLIPTextProjModel
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_torch_available, is_transformers_available
2 | 
3 | 
4 | if is_transformers_available() and is_torch_available():
5 |     from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline
6 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/schedulers/README.md:
--------------------------------------------------------------------------------
1 | # Schedulers
2 | 
3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview).


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_note_seq_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class MidiProcessor(metaclass=DummyObject):
 6 |     _backends = ["note_seq"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["note_seq"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["note_seq"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["note_seq"])
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_onnx_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class OnnxRuntimeModel(metaclass=DummyObject):
 6 |     _backends = ["onnx"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["onnx"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["onnx"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["onnx"])
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class LMSDiscreteScheduler(metaclass=DummyObject):
 6 |     _backends = ["torch", "scipy"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["torch", "scipy"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["torch", "scipy"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["torch", "scipy"])
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
 6 |     _backends = ["torch", "transformers", "k_diffusion"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["torch", "transformers", "k_diffusion"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["torch", "transformers", "k_diffusion"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["torch", "transformers", "k_diffusion"])
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject):
 6 |     _backends = ["transformers", "torch", "note_seq"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["transformers", "torch", "note_seq"])
10 | 
11 |     @classmethod
12 |     def from_config(cls, *args, **kwargs):
13 |         requires_backends(cls, ["transformers", "torch", "note_seq"])
14 | 
15 |     @classmethod
16 |     def from_pretrained(cls, *args, **kwargs):
17 |         requires_backends(cls, ["transformers", "torch", "note_seq"])
18 | 


--------------------------------------------------------------------------------
/textfussion/src/diffusers/utils/pil_utils.py:
--------------------------------------------------------------------------------
 1 | import PIL.Image
 2 | import PIL.ImageOps
 3 | from packaging import version
 4 | 
 5 | 
 6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
 7 |     PIL_INTERPOLATION = {
 8 |         "linear": PIL.Image.Resampling.BILINEAR,
 9 |         "bilinear": PIL.Image.Resampling.BILINEAR,
10 |         "bicubic": PIL.Image.Resampling.BICUBIC,
11 |         "lanczos": PIL.Image.Resampling.LANCZOS,
12 |         "nearest": PIL.Image.Resampling.NEAREST,
13 |     }
14 | else:
15 |     PIL_INTERPOLATION = {
16 |         "linear": PIL.Image.LINEAR,
17 |         "bilinear": PIL.Image.BILINEAR,
18 |         "bicubic": PIL.Image.BICUBIC,
19 |         "lanczos": PIL.Image.LANCZOS,
20 |         "nearest": PIL.Image.NEAREST,
21 |     }
22 | 


--------------------------------------------------------------------------------
/textfussion/tests:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------