├── README.md ├── images └── framwork.png ├── mmocr ├── .circleci │ ├── config.yml │ ├── docker │ │ └── Dockerfile │ └── test.yml ├── .codespellrc ├── .coveragerc ├── .dev_scripts │ ├── benchmark_full_models.txt │ ├── benchmark_options.py │ ├── benchmark_train_models.txt │ ├── covignore.cfg │ └── diff_coverage_test.sh ├── .gitignore ├── .owners.yml ├── .pre-commit-config.yaml ├── .pylintrc ├── .readthedocs.yml ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── README.md ├── configs │ ├── backbone │ │ └── oclip │ │ │ ├── README.md │ │ │ └── metafile.yml │ ├── kie │ │ ├── _base_ │ │ │ ├── datasets │ │ │ │ ├── wildreceipt-openset.py │ │ │ │ └── wildreceipt.py │ │ │ ├── default_runtime.py │ │ │ └── schedules │ │ │ │ └── schedule_adam_60e.py │ │ └── sdmgr │ │ │ ├── README.md │ │ │ ├── _base_sdmgr_novisual.py │ │ │ ├── _base_sdmgr_unet16.py │ │ │ ├── metafile.yml │ │ │ ├── sdmgr_novisual_60e_wildreceipt-openset.py │ │ │ ├── sdmgr_novisual_60e_wildreceipt.py │ │ │ └── sdmgr_unet16_60e_wildreceipt.py │ ├── textdet │ │ ├── _base_ │ │ │ ├── datasets │ │ │ │ ├── ctw1500.py │ │ │ │ ├── icdar2013.py │ │ │ │ ├── icdar2015.py │ │ │ │ ├── icdar2017.py │ │ │ │ ├── synthtext.py │ │ │ │ ├── totaltext.py │ │ │ │ └── toy_data.py │ │ │ ├── default_runtime.py │ │ │ ├── pretrain_runtime.py │ │ │ └── schedules │ │ │ │ ├── schedule_adam_600e.py │ │ │ │ ├── schedule_sgd_100k.py │ │ │ │ ├── schedule_sgd_1200e.py │ │ │ │ └── schedule_sgd_base.py │ │ ├── dbnet │ │ │ ├── README.md │ │ │ ├── _base_dbnet_resnet18_fpnc.py │ │ │ ├── _base_dbnet_resnet50-dcnv2_fpnc.py │ │ │ ├── dbnet_resnet18_fpnc_100k_synthtext.py │ │ │ ├── dbnet_resnet18_fpnc_1200e_icdar2015.py │ │ │ ├── dbnet_resnet18_fpnc_1200e_totaltext.py │ │ │ ├── dbnet_resnet50-dcnv2_fpnc_100k_synthtext.py │ │ │ ├── dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py │ │ │ ├── dbnet_resnet50-oclip_1200e_icdar2015.py │ │ │ ├── dbnet_resnet50_1200e_icdar2015.py │ │ │ ├── dbnet_resnet50_120e_synth_train_ic15_test.py │ │ │ ├── metafile.yml │ │ │ ├── synth_data_train_100k_ic15_test.py │ │ │ ├── synth_finetune_from_pretrain_ctw1500.py │ │ │ └── synth_finetune_from_pretrain_ic15_test.py │ │ ├── dbnetpp │ │ │ ├── README.md │ │ │ ├── _base_dbnetpp_resnet50-dcnv2_fpnc.py │ │ │ ├── dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py │ │ │ ├── dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py │ │ │ ├── dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py │ │ │ ├── dbnetpp_resnet50_fpnc_1200e_icdar2015.py │ │ │ └── metafile.yml │ │ ├── drrg │ │ │ ├── README.md │ │ │ ├── _base_drrg_resnet50_fpn-unet.py │ │ │ ├── drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py │ │ │ ├── drrg_resnet50_fpn-unet_1200e_ctw1500.py │ │ │ └── metafile.yml │ │ ├── fcenet │ │ │ ├── README.md │ │ │ ├── _base_fcenet_resnet50-dcnv2_fpn.py │ │ │ ├── _base_fcenet_resnet50_fpn.py │ │ │ ├── fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py │ │ │ ├── fcenet_resnet50-oclip_fpn_1500e_ctw1500.py │ │ │ ├── fcenet_resnet50-oclip_fpn_1500e_icdar2015.py │ │ │ ├── fcenet_resnet50_fpn_1500e_icdar2015.py │ │ │ ├── fcenet_resnet50_fpn_1500e_totaltext.py │ │ │ ├── finetune.py │ │ │ ├── metafile.yml │ │ │ └── synth_data_train_real_data_test.py │ │ ├── maskrcnn │ │ │ ├── README.md │ │ │ ├── _base_mask-rcnn_resnet50_fpn.py │ │ │ ├── mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py │ │ │ ├── mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py │ │ │ ├── mask-rcnn_resnet50_fpn_160e_ctw1500.py │ │ │ ├── mask-rcnn_resnet50_fpn_160e_icdar2015.py │ │ │ ├── mask-rcnn_resnet50_fpn_160e_icdar2017.py │ │ │ └── metafile.yml │ │ ├── panet │ │ │ ├── README.md │ │ │ ├── _base_panet_resnet18_fpem-ffm.py │ │ │ ├── _base_panet_resnet50_fpem-ffm.py │ │ │ ├── metafile.yml │ │ │ ├── panet_resnet18_fpem-ffm_600e_ctw1500.py │ │ │ ├── panet_resnet18_fpem-ffm_600e_icdar2015.py │ │ │ ├── panet_resnet50_fpem-ffm_600e_icdar2017.py │ │ │ └── synth_data_train_real_data_test.py │ │ ├── psenet │ │ │ ├── README.md │ │ │ ├── _base_psenet_resnet50_fpnf.py │ │ │ ├── metafile.yml │ │ │ ├── psenet_resnet50-oclip_fpnf_600e_ctw1500.py │ │ │ ├── psenet_resnet50-oclip_fpnf_600e_icdar2015.py │ │ │ ├── psenet_resnet50_fpnf_600e_ctw1500.py │ │ │ ├── psenet_resnet50_fpnf_600e_icdar2015.py │ │ │ ├── psenet_resnet50_fpnf_600e_icdar2017.py │ │ │ └── psenet_resnet50_synth_train_ic15_test.py │ │ └── textsnake │ │ │ ├── README.md │ │ │ ├── _base_textsnake_resnet50_fpn-unet.py │ │ │ ├── metafile.yml │ │ │ ├── textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py │ │ │ └── textsnake_resnet50_fpn-unet_1200e_ctw1500.py │ └── textrecog │ │ ├── _base_ │ │ ├── datasets │ │ │ ├── coco_text_v1.py │ │ │ ├── cute80.py │ │ │ ├── icdar2011.py │ │ │ ├── icdar2013.py │ │ │ ├── icdar2015.py │ │ │ ├── iiit5k.py │ │ │ ├── mjsynth.py │ │ │ ├── svt.py │ │ │ ├── svtp.py │ │ │ ├── synthtext.py │ │ │ ├── synthtext_add.py │ │ │ ├── totaltext.py │ │ │ └── toy_data.py │ │ ├── default_runtime.py │ │ └── schedules │ │ │ ├── schedule_adadelta_5e.py │ │ │ ├── schedule_adam_base.py │ │ │ ├── schedule_adam_step_5e.py │ │ │ └── schedule_adamw_cos_6e.py │ │ ├── abinet │ │ ├── README.md │ │ ├── _base_abinet-vision.py │ │ ├── _base_abinet.py │ │ ├── abinet-vision_20e_st-an_mj.py │ │ ├── abinet_20e_st-an_mj.py │ │ └── metafile.yml │ │ ├── aster │ │ ├── README.md │ │ ├── _base_aster.py │ │ ├── aster_resnet45_6e_st_mj.py │ │ └── metafile.yml │ │ ├── crnn │ │ ├── README.md │ │ ├── _base_crnn_mini-vgg.py │ │ ├── crnn_mini-vgg_5e_mj.py │ │ ├── crnn_mini-vgg_5e_toy.py │ │ └── metafile.yml │ │ ├── master │ │ ├── README.md │ │ ├── _base_master_resnet31.py │ │ ├── master_resnet31_12e_st_mj_sa.py │ │ ├── master_resnet31_12e_toy.py │ │ └── metafile.yml │ │ ├── nrtr │ │ ├── README.md │ │ ├── _base_nrtr_modality-transform.py │ │ ├── _base_nrtr_resnet31.py │ │ ├── metafile.yml │ │ ├── nrtr_modality-transform_6e_st_mj.py │ │ ├── nrtr_modality-transform_6e_toy.py │ │ ├── nrtr_resnet31-1by16-1by8_6e_st_mj.py │ │ └── nrtr_resnet31-1by8-1by4_6e_st_mj.py │ │ ├── robust_scanner │ │ ├── README.md │ │ ├── _base_robustscanner_resnet31.py │ │ ├── metafile.yml │ │ ├── robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py │ │ └── robustscanner_resnet31_5e_toy.py │ │ ├── sar │ │ ├── README.md │ │ ├── _base_sar_resnet31_parallel-decoder.py │ │ ├── metafile.yml │ │ ├── sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py │ │ ├── sar_resnet31_parallel-decoder_5e_toy.py │ │ └── sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py │ │ ├── satrn │ │ ├── README.md │ │ ├── _base_satrn_shallow.py │ │ ├── metafile.yml │ │ ├── satrn_shallow-small_5e_st_mj.py │ │ └── satrn_shallow_5e_st_mj.py │ │ └── svtr │ │ ├── README.md │ │ ├── _base_svtr-tiny.py │ │ ├── metafile.yml │ │ ├── svtr-base_20e_st_mj.py │ │ ├── svtr-large_20e_st_mj.py │ │ ├── svtr-small_20e_st_mj.py │ │ └── svtr-tiny_20e_st_mj.py ├── dataset_zoo │ ├── cocotextv2 │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── ctw1500 │ │ ├── metafile.yml │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── cute80 │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ └── textrecog.py │ ├── funsd │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── icdar2013 │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── icdar2015 │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── iiit5k │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ └── textrecog.py │ ├── mjsynth │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ └── textrecog.py │ ├── naf │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── sroie │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── svt │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── svtp │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ └── textrecog.py │ ├── synthtext │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── textocr │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ ├── totaltext │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py │ └── wildreceipt │ │ ├── kie.py │ │ ├── metafile.yml │ │ ├── sample_anno.md │ │ ├── textdet.py │ │ ├── textrecog.py │ │ └── textspotting.py ├── demo │ ├── demo_densetext_det.jpg │ ├── demo_kie.jpeg │ ├── demo_text_det.jpg │ ├── demo_text_ocr.jpg │ ├── demo_text_recog.jpg │ └── resources │ │ ├── demo_kie_pred.png │ │ ├── det_vis.png │ │ ├── kie_vis.png │ │ ├── log_analysis_demo.png │ │ └── rec_vis.png ├── dicts │ ├── chinese_english_digits.txt │ ├── english_digits_symbols.txt │ ├── english_digits_symbols_space.txt │ ├── lower_english_digits.txt │ ├── lower_english_digits_space.txt │ └── sdmgr_dict.txt ├── docker │ ├── Dockerfile │ └── serve │ │ ├── Dockerfile │ │ ├── config.properties │ │ └── entrypoint.sh ├── docs │ ├── en │ │ ├── Makefile │ │ ├── _static │ │ │ ├── css │ │ │ │ └── readthedocs.css │ │ │ ├── images │ │ │ │ └── mmocr.png │ │ │ └── js │ │ │ │ ├── collapsed.js │ │ │ │ └── table.js │ │ ├── _templates │ │ │ └── classtemplate.rst │ │ ├── api │ │ │ ├── apis.rst │ │ │ ├── datasets.rst │ │ │ ├── engine.rst │ │ │ ├── evaluation.rst │ │ │ ├── models.rst │ │ │ ├── structures.rst │ │ │ ├── transforms.rst │ │ │ ├── utils.rst │ │ │ └── visualization.rst │ │ ├── basic_concepts │ │ │ ├── convention.md │ │ │ ├── data_flow.md │ │ │ ├── datasets.md │ │ │ ├── engine.md │ │ │ ├── evaluation.md │ │ │ ├── models.md │ │ │ ├── overview.md │ │ │ ├── structures.md │ │ │ ├── transforms.md │ │ │ └── visualizers.md │ │ ├── conf.py │ │ ├── contact.md │ │ ├── dataset_zoo.py │ │ ├── docutils.conf │ │ ├── get_started │ │ │ ├── faq.md │ │ │ ├── install.md │ │ │ ├── overview.md │ │ │ └── quick_run.md │ │ ├── index.rst │ │ ├── make.bat │ │ ├── merge_docs.sh │ │ ├── migration │ │ │ ├── branches.md │ │ │ ├── code.md │ │ │ ├── dataset.md │ │ │ ├── model.md │ │ │ ├── news.md │ │ │ ├── overview.md │ │ │ └── transforms.md │ │ ├── notes │ │ │ ├── branches.md │ │ │ ├── changelog.md │ │ │ ├── changelog_v0.x.md │ │ │ └── contribution_guide.md │ │ ├── project_zoo.py │ │ ├── requirements.txt │ │ ├── stats.py │ │ ├── switch_language.md │ │ ├── user_guides │ │ │ ├── config.md │ │ │ ├── data_prepare │ │ │ │ ├── dataset_preparer.md │ │ │ │ ├── det.md │ │ │ │ ├── kie.md │ │ │ │ └── recog.md │ │ │ ├── dataset_prepare.md │ │ │ ├── inference.md │ │ │ ├── train_test.md │ │ │ ├── useful_tools.md │ │ │ └── visualization.md │ │ └── weight_list.py │ └── zh_cn │ │ ├── Makefile │ │ ├── _static │ │ ├── css │ │ │ └── readthedocs.css │ │ ├── images │ │ │ └── mmocr.png │ │ └── js │ │ │ ├── collapsed.js │ │ │ └── table.js │ │ ├── _templates │ │ └── classtemplate.rst │ │ ├── api │ │ ├── apis.rst │ │ ├── datasets.rst │ │ ├── engine.rst │ │ ├── evaluation.rst │ │ ├── models.rst │ │ ├── structures.rst │ │ ├── transforms.rst │ │ ├── utils.rst │ │ └── visualization.rst │ │ ├── basic_concepts │ │ ├── convention.md │ │ ├── data_flow.md │ │ ├── datasets.md │ │ ├── engine.md │ │ ├── evaluation.md │ │ ├── models.md │ │ ├── overview.md │ │ ├── structures.md │ │ ├── transforms.md │ │ └── visualizers.md │ │ ├── conf.py │ │ ├── contact.md │ │ ├── cp_origin_docs.sh │ │ ├── dataset_zoo.py │ │ ├── docutils.conf │ │ ├── get_started │ │ ├── install.md │ │ ├── overview.md │ │ └── quick_run.md │ │ ├── index.rst │ │ ├── make.bat │ │ ├── merge_docs.sh │ │ ├── migration │ │ ├── branches.md │ │ ├── code.md │ │ ├── dataset.md │ │ ├── model.md │ │ ├── news.md │ │ ├── overview.md │ │ └── transforms.md │ │ ├── notes │ │ ├── branches.md │ │ └── contribution_guide.md │ │ ├── project_zoo.py │ │ ├── stats.py │ │ ├── switch_language.md │ │ ├── user_guides │ │ ├── config.md │ │ ├── data_prepare │ │ │ ├── dataset_preparer.md │ │ │ └── kie.md │ │ ├── dataset_prepare.md │ │ ├── inference.md │ │ ├── train_test.md │ │ ├── useful_tools.md │ │ └── visualization.md │ │ └── weight_list.py ├── mmocr │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ └── inferencers │ │ │ ├── __init__.py │ │ │ ├── base_mmocr_inferencer.py │ │ │ ├── kie_inferencer.py │ │ │ ├── mmocr_inferencer.py │ │ │ ├── textdet_inferencer.py │ │ │ ├── textrec_inferencer.py │ │ │ └── textspot_inferencer.py │ ├── datasets │ │ ├── __init__.py │ │ ├── dataset_wrapper.py │ │ ├── icdar_dataset.py │ │ ├── ocr_dataset.py │ │ ├── preparers │ │ │ ├── __init__.py │ │ │ ├── config_generators │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── textdet_config_generator.py │ │ │ │ ├── textrecog_config_generator.py │ │ │ │ └── textspotting_config_generator.py │ │ │ ├── data_preparer.py │ │ │ ├── dumpers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── json_dumper.py │ │ │ │ ├── lmdb_dumper.py │ │ │ │ └── wild_receipt_openset_dumper.py │ │ │ ├── gatherers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── mono_gatherer.py │ │ │ │ ├── naf_gatherer.py │ │ │ │ └── pair_gatherer.py │ │ │ ├── obtainers │ │ │ │ ├── __init__.py │ │ │ │ └── naive_data_obtainer.py │ │ │ ├── packers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── textdet_packer.py │ │ │ │ ├── textrecog_packer.py │ │ │ │ ├── textspotting_packer.py │ │ │ │ └── wildreceipt_packer.py │ │ │ └── parsers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── coco_parser.py │ │ │ │ ├── ctw1500_parser.py │ │ │ │ ├── funsd_parser.py │ │ │ │ ├── icdar_txt_parser.py │ │ │ │ ├── mjsynth_parser.py │ │ │ │ ├── naf_parser.py │ │ │ │ ├── sroie_parser.py │ │ │ │ ├── svt_parser.py │ │ │ │ ├── synthtext_parser.py │ │ │ │ ├── totaltext_parser.py │ │ │ │ └── wildreceipt_parser.py │ │ ├── recog_lmdb_dataset.py │ │ ├── recog_text_dataset.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ └── batch_aug.py │ │ ├── transforms │ │ │ ├── __init__.py │ │ │ ├── adapters.py │ │ │ ├── formatting.py │ │ │ ├── loading.py │ │ │ ├── ocr_transforms.py │ │ │ ├── textdet_transforms.py │ │ │ ├── textrecog_transforms.py │ │ │ └── wrappers.py │ │ └── wildreceipt_dataset.py │ ├── engine │ │ ├── __init__.py │ │ └── hooks │ │ │ ├── __init__.py │ │ │ └── visualization_hook.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── evaluator │ │ │ ├── __init__.py │ │ │ └── multi_datasets_evaluator.py │ │ ├── functional │ │ │ ├── __init__.py │ │ │ └── hmean.py │ │ └── metrics │ │ │ ├── __init__.py │ │ │ ├── f_metric.py │ │ │ ├── hmean_iou_metric.py │ │ │ └── recog_metric.py │ ├── models │ │ ├── __init__.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── backbones │ │ │ │ ├── __init__.py │ │ │ │ ├── clip_resnet.py │ │ │ │ └── unet.py │ │ │ ├── dictionary │ │ │ │ ├── __init__.py │ │ │ │ └── dictionary.py │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ └── transformer_layers.py │ │ │ ├── losses │ │ │ │ ├── __init__.py │ │ │ │ ├── bce_loss.py │ │ │ │ ├── ce_loss.py │ │ │ │ ├── dice_loss.py │ │ │ │ └── l1_loss.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── transformer_module.py │ │ │ └── plugins │ │ │ │ ├── __init__.py │ │ │ │ └── common.py │ │ ├── kie │ │ │ ├── __init__.py │ │ │ ├── extractors │ │ │ │ ├── __init__.py │ │ │ │ └── sdmgr.py │ │ │ ├── heads │ │ │ │ ├── __init__.py │ │ │ │ └── sdmgr_head.py │ │ │ ├── module_losses │ │ │ │ ├── __init__.py │ │ │ │ └── sdmgr_module_loss.py │ │ │ └── postprocessors │ │ │ │ ├── __init__.py │ │ │ │ └── sdmgr_postprocessor.py │ │ ├── textdet │ │ │ ├── __init__.py │ │ │ ├── data_preprocessors │ │ │ │ ├── __init__.py │ │ │ │ └── data_preprocessor.py │ │ │ ├── detectors │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── dbnet.py │ │ │ │ ├── drrg.py │ │ │ │ ├── fcenet.py │ │ │ │ ├── mmdet_wrapper.py │ │ │ │ ├── panet.py │ │ │ │ ├── psenet.py │ │ │ │ ├── single_stage_text_detector.py │ │ │ │ └── textsnake.py │ │ │ ├── heads │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── db_head.py │ │ │ │ ├── drrg_head.py │ │ │ │ ├── fce_head.py │ │ │ │ ├── pan_head.py │ │ │ │ ├── pse_head.py │ │ │ │ └── textsnake_head.py │ │ │ ├── module_losses │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── db_module_loss.py │ │ │ │ ├── drrg_module_loss.py │ │ │ │ ├── fce_module_loss.py │ │ │ │ ├── pan_module_loss.py │ │ │ │ ├── pse_module_loss.py │ │ │ │ ├── seg_based_module_loss.py │ │ │ │ └── textsnake_module_loss.py │ │ │ ├── necks │ │ │ │ ├── __init__.py │ │ │ │ ├── fpem_ffm.py │ │ │ │ ├── fpn_cat.py │ │ │ │ ├── fpn_unet.py │ │ │ │ └── fpnf.py │ │ │ └── postprocessors │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── db_postprocessor.py │ │ │ │ ├── drrg_postprocessor.py │ │ │ │ ├── fce_postprocessor.py │ │ │ │ ├── pan_postprocessor.py │ │ │ │ ├── pse_postprocessor.py │ │ │ │ └── textsnake_postprocessor.py │ │ └── textrecog │ │ │ ├── __init__.py │ │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── mini_vgg.py │ │ │ ├── mobilenet_v2.py │ │ │ ├── nrtr_modality_transformer.py │ │ │ ├── resnet.py │ │ │ ├── resnet31_ocr.py │ │ │ ├── resnet_abi.py │ │ │ └── shallow_cnn.py │ │ │ ├── data_preprocessors │ │ │ ├── __init__.py │ │ │ └── data_preprocessor.py │ │ │ ├── decoders │ │ │ ├── __init__.py │ │ │ ├── abi_fuser.py │ │ │ ├── abi_language_decoder.py │ │ │ ├── abi_vision_decoder.py │ │ │ ├── aster_decoder.py │ │ │ ├── base.py │ │ │ ├── crnn_decoder.py │ │ │ ├── master_decoder.py │ │ │ ├── nrtr_decoder.py │ │ │ ├── position_attention_decoder.py │ │ │ ├── robust_scanner_fuser.py │ │ │ ├── sar_decoder.py │ │ │ ├── sar_decoder_with_bs.py │ │ │ ├── sequence_attention_decoder.py │ │ │ └── svtr_decoder.py │ │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ ├── abi_encoder.py │ │ │ ├── aster_encoder.py │ │ │ ├── base.py │ │ │ ├── channel_reduction_encoder.py │ │ │ ├── nrtr_encoder.py │ │ │ ├── sar_encoder.py │ │ │ ├── satrn_encoder.py │ │ │ └── svtr_encoder.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── conv_layer.py │ │ │ ├── dot_product_attention_layer.py │ │ │ ├── lstm_layer.py │ │ │ ├── position_aware_layer.py │ │ │ ├── robust_scanner_fusion_layer.py │ │ │ └── satrn_layers.py │ │ │ ├── module_losses │ │ │ ├── __init__.py │ │ │ ├── abi_module_loss.py │ │ │ ├── base.py │ │ │ ├── ce_module_loss.py │ │ │ └── ctc_module_loss.py │ │ │ ├── plugins │ │ │ ├── __init__.py │ │ │ └── common.py │ │ │ ├── postprocessors │ │ │ ├── __init__.py │ │ │ ├── attn_postprocessor.py │ │ │ ├── base.py │ │ │ └── ctc_postprocessor.py │ │ │ ├── preprocessors │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── tps_preprocessor.py │ │ │ └── recognizers │ │ │ ├── __init__.py │ │ │ ├── abinet.py │ │ │ ├── aster.py │ │ │ ├── base.py │ │ │ ├── crnn.py │ │ │ ├── encoder_decoder_recognizer.py │ │ │ ├── encoder_decoder_recognizer_tta.py │ │ │ ├── master.py │ │ │ ├── nrtr.py │ │ │ ├── robust_scanner.py │ │ │ ├── sar.py │ │ │ ├── satrn.py │ │ │ └── svtr.py │ ├── registry.py │ ├── structures │ │ ├── __init__.py │ │ ├── kie_data_sample.py │ │ ├── textdet_data_sample.py │ │ ├── textrecog_data_sample.py │ │ └── textspotting_data_sample.py │ ├── testing │ │ ├── __init__.py │ │ └── data.py │ ├── utils │ │ ├── __init__.py │ │ ├── bbox_utils.py │ │ ├── bezier_utils.py │ │ ├── check_argument.py │ │ ├── collect_env.py │ │ ├── data_converter_utils.py │ │ ├── fileio.py │ │ ├── img_utils.py │ │ ├── mask_utils.py │ │ ├── parsers.py │ │ ├── point_utils.py │ │ ├── polygon_utils.py │ │ ├── processing.py │ │ ├── setup_env.py │ │ ├── string_utils.py │ │ ├── transform_utils.py │ │ └── typing_utils.py │ ├── version.py │ └── visualization │ │ ├── __init__.py │ │ ├── base_visualizer.py │ │ ├── kie_visualizer.py │ │ ├── textdet_visualizer.py │ │ ├── textrecog_visualizer.py │ │ └── textspotting_visualizer.py ├── model-index.yml ├── my_test.sh ├── my_train.sh ├── projects │ ├── ABCNet │ │ ├── README.md │ │ ├── README_V2.md │ │ ├── abcnet │ │ │ ├── __init__.py │ │ │ ├── metric │ │ │ │ ├── __init__.py │ │ │ │ └── e2e_hmean_iou_metric.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── abcnet.py │ │ │ │ ├── abcnet_det_head.py │ │ │ │ ├── abcnet_det_module_loss.py │ │ │ │ ├── abcnet_det_postprocessor.py │ │ │ │ ├── abcnet_postprocessor.py │ │ │ │ ├── abcnet_rec.py │ │ │ │ ├── abcnet_rec_backbone.py │ │ │ │ ├── abcnet_rec_decoder.py │ │ │ │ ├── abcnet_rec_encoder.py │ │ │ │ ├── base_roi_extractor.py │ │ │ │ ├── base_roi_head.py │ │ │ │ ├── bezier_roi_extractor.py │ │ │ │ ├── bifpn.py │ │ │ │ ├── coordinate_head.py │ │ │ │ ├── rec_roi_head.py │ │ │ │ └── two_stage_text_spotting.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── bezier_utils.py │ │ ├── config │ │ │ ├── _base_ │ │ │ │ ├── datasets │ │ │ │ │ └── icdar2015.py │ │ │ │ ├── default_runtime.py │ │ │ │ └── schedules │ │ │ │ │ └── schedule_sgd_500e.py │ │ │ ├── abcnet │ │ │ │ ├── _base_abcnet_resnet50_fpn.py │ │ │ │ └── abcnet_resnet50_fpn_500e_icdar2015.py │ │ │ └── abcnet_v2 │ │ │ │ ├── _base_abcnet-v2_resnet50_bifpn.py │ │ │ │ └── abcnet-v2_resnet50_bifpn_500e_icdar2015.py │ │ └── dicts │ │ │ └── abcnet.txt │ ├── README.md │ ├── SPTS │ │ ├── README.md │ │ ├── config │ │ │ ├── _base_ │ │ │ │ ├── datasets │ │ │ │ │ ├── ctw1500-spts.py │ │ │ │ │ ├── icdar2013-spts.py │ │ │ │ │ ├── icdar2013.py │ │ │ │ │ ├── icdar2015-spts.py │ │ │ │ │ ├── icdar2015.py │ │ │ │ │ ├── mlt-spts.py │ │ │ │ │ ├── syntext1-spts.py │ │ │ │ │ ├── syntext2-spts.py │ │ │ │ │ ├── totaltext-spts.py │ │ │ │ │ └── totaltext.py │ │ │ │ └── default_runtime.py │ │ │ └── spts │ │ │ │ ├── _base_spts_resnet50.py │ │ │ │ ├── _base_spts_resnet50_mmocr.py │ │ │ │ ├── spts_resnet50_8xb8-150e_pretrain-spts.py │ │ │ │ ├── spts_resnet50_8xb8-200e_icdar2013.py │ │ │ │ ├── spts_resnet50_8xb8-200e_icdar2015.py │ │ │ │ └── spts_resnet50_8xb8-200e_totaltext.py │ │ ├── dicts │ │ │ └── spts.txt │ │ ├── spts │ │ │ ├── __init__.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── adel_dataset.py │ │ │ │ └── transforms │ │ │ │ │ └── spts_transforms.py │ │ │ ├── metric │ │ │ │ ├── __init__.py │ │ │ │ └── e2e_point_metric.py │ │ │ └── model │ │ │ │ ├── __init__.py │ │ │ │ ├── base_text_spotter.py │ │ │ │ ├── encoder_decoder_text_spotter.py │ │ │ │ ├── position_embedding.py │ │ │ │ ├── spts.py │ │ │ │ ├── spts_decoder.py │ │ │ │ ├── spts_dictionary.py │ │ │ │ ├── spts_encoder.py │ │ │ │ ├── spts_module_loss.py │ │ │ │ └── spts_postprocessor.py │ │ └── tools │ │ │ └── ckpt_adapter.py │ ├── example_project │ │ ├── README.md │ │ ├── configs │ │ │ └── dbnet_dummy-resnet_fpnc_1200e_icdar2015.py │ │ └── dummy │ │ │ ├── __init__.py │ │ │ └── dummy_resnet.py │ ├── faq.md │ └── selected.txt ├── requirements.txt ├── requirements │ ├── albu.txt │ ├── build.txt │ ├── docs.txt │ ├── mminstall.txt │ ├── optional.txt │ ├── readthedocs.txt │ ├── runtime.txt │ └── tests.txt ├── resources │ ├── illustration.jpg │ ├── kie.jpg │ ├── mmocr-logo.png │ ├── textdet.jpg │ ├── textrecog.jpg │ └── verification.png ├── setup.cfg ├── setup.py ├── tests │ ├── models │ │ └── textrecog │ │ │ └── test_preprocessors │ │ │ └── test_tps_preprocessor.py │ ├── test_apis │ │ └── test_inferencers │ │ │ ├── test_kie_inferencer.py │ │ │ ├── test_mmocr_inferencer.py │ │ │ ├── test_textdet_inferencer.py │ │ │ └── test_textrec_inferencer.py │ ├── test_datasets │ │ ├── test_dataset_wrapper.py │ │ ├── test_icdar_dataset.py │ │ ├── test_preparers │ │ │ ├── test_config_generators │ │ │ │ ├── test_textdet_config_generator.py │ │ │ │ ├── test_textrecog_config_generator.py │ │ │ │ └── test_textspotting_config_generator.py │ │ │ ├── test_data_preparer.py │ │ │ ├── test_dumpers │ │ │ │ └── test_dumpers.py │ │ │ ├── test_gatherers │ │ │ │ ├── test_mono_gatherer.py │ │ │ │ └── test_pair_gatherer.py │ │ │ ├── test_packers │ │ │ │ ├── test_textdet_packer.py │ │ │ │ ├── test_textrecog_packer.py │ │ │ │ └── test_textspotting_packer.py │ │ │ └── test_parsers │ │ │ │ ├── test_ctw1500_parser.py │ │ │ │ ├── test_funsd_parser.py │ │ │ │ ├── test_icdar_txt_parsers.py │ │ │ │ ├── test_naf_parser.py │ │ │ │ ├── test_sroie_parser.py │ │ │ │ ├── test_svt_parsers.py │ │ │ │ ├── test_tt_parsers.py │ │ │ │ └── test_wildreceipt_parsers.py │ │ ├── test_recog_lmdb_dataset.py │ │ ├── test_recog_text_dataset.py │ │ ├── test_samplers │ │ │ └── test_batch_aug.py │ │ ├── test_transforms │ │ │ ├── test_adapters.py │ │ │ ├── test_formatting.py │ │ │ ├── test_loading.py │ │ │ ├── test_ocr_transforms.py │ │ │ ├── test_textdet_transforms.py │ │ │ ├── test_textrecog_transforms.py │ │ │ └── test_wrappers.py │ │ └── test_wildreceipt_dataset.py │ ├── test_engine │ │ └── test_hooks │ │ │ └── test_visualization_hook.py │ ├── test_evaluation │ │ ├── test_evaluator │ │ │ └── test_multi_datasets_evaluator.py │ │ ├── test_functional │ │ │ └── test_hmean.py │ │ └── test_metrics │ │ │ ├── test_f_metric.py │ │ │ ├── test_hmean_iou_metric.py │ │ │ └── test_recog_metric.py │ ├── test_init.py │ ├── test_models │ │ ├── test_common │ │ │ ├── test_backbones │ │ │ │ └── test_clip_resnet.py │ │ │ ├── test_layers │ │ │ │ └── test_transformer_layers.py │ │ │ ├── test_losses │ │ │ │ ├── test_bce_loss.py │ │ │ │ ├── test_dice_loss.py │ │ │ │ └── test_l1_loss.py │ │ │ ├── test_modules │ │ │ │ └── test_transformer_module.py │ │ │ └── test_plugins │ │ │ │ └── test_avgpool.py │ │ ├── test_kie │ │ │ ├── test_extractors │ │ │ │ └── test_sdmgr.py │ │ │ ├── test_heads │ │ │ │ └── test_sdmgr_head.py │ │ │ ├── test_module_losses │ │ │ │ └── test_sdmgr_module_loss.py │ │ │ └── test_postprocessors │ │ │ │ └── test_sdmgr_postprocessor.py │ │ ├── test_textdet │ │ │ ├── test_data_preprocessors │ │ │ │ └── test_textdet_data_preprocessor.py │ │ │ ├── test_detectors │ │ │ │ └── test_drrg.py │ │ │ ├── test_heads │ │ │ │ ├── test_base_head.py │ │ │ │ ├── test_db_head.py │ │ │ │ ├── test_drrg_head.py │ │ │ │ ├── test_fce_head.py │ │ │ │ ├── test_pan_head.py │ │ │ │ ├── test_pse_head.py │ │ │ │ └── test_textsnake_head.py │ │ │ ├── test_module_losses │ │ │ │ ├── test_db_module_loss.py │ │ │ │ ├── test_drrg_module_loss.py │ │ │ │ ├── test_fce_module_loss.py │ │ │ │ ├── test_pan_module_loss.py │ │ │ │ ├── test_pse_module_loss.py │ │ │ │ └── test_textsnake_module_loss.py │ │ │ ├── test_necks │ │ │ │ ├── test_fpem_ffm.py │ │ │ │ ├── test_fpn_cat.py │ │ │ │ ├── test_fpn_unet.py │ │ │ │ └── test_fpnf.py │ │ │ ├── test_postprocessors │ │ │ │ ├── test_base_postprocessor.py │ │ │ │ ├── test_db_postprocessor.py │ │ │ │ ├── test_drrg_postprocessor.py │ │ │ │ ├── test_fce_postprocessor.py │ │ │ │ ├── test_pan_postprocessor.py │ │ │ │ ├── test_pse_postprocessor.py │ │ │ │ └── test_textsnake_postprocessor.py │ │ │ └── test_wrappers │ │ │ │ └── test_mmdet_wrapper.py │ │ └── test_textrecog │ │ │ ├── test_backbones │ │ │ ├── test_mini_vgg.py │ │ │ ├── test_mobilenet_v2.py │ │ │ ├── test_nrtr_modality_transformer.py │ │ │ ├── test_resnet.py │ │ │ ├── test_resnet31_ocr.py │ │ │ ├── test_resnet_abi.py │ │ │ └── test_shallow_cnn.py │ │ │ ├── test_data_preprocessors │ │ │ └── test_data_preprocessor.py │ │ │ ├── test_decoders │ │ │ ├── test_abi_fuser.py │ │ │ ├── test_abi_language_decoder.py │ │ │ ├── test_abi_vision_decoder.py │ │ │ ├── test_aster_decoder.py │ │ │ ├── test_base_decoder.py │ │ │ ├── test_crnn_decoder.py │ │ │ ├── test_master_decoder.py │ │ │ ├── test_nrtr_decoder.py │ │ │ ├── test_position_attention_decoder.py │ │ │ ├── test_robust_scanner_fuser.py │ │ │ ├── test_sar_decoder.py │ │ │ ├── test_sequence_attention_decoder.py │ │ │ └── test_svtr_decoder.py │ │ │ ├── test_dictionary │ │ │ └── test_dictionary.py │ │ │ ├── test_encoders │ │ │ ├── test_abi_encoder.py │ │ │ ├── test_aster_encoder.py │ │ │ ├── test_channel_reduction_encoder.py │ │ │ ├── test_nrtr_encoder.py │ │ │ ├── test_sar_encoder.py │ │ │ ├── test_satrn_decoder.py │ │ │ └── test_svtr_encoder.py │ │ │ ├── test_layers │ │ │ └── test_conv_layer.py │ │ │ ├── test_module_losses │ │ │ ├── test_abi_module_loss.py │ │ │ ├── test_base_recog_module_loss.py │ │ │ ├── test_ce_module_loss.py │ │ │ └── test_ctc_module_loss.py │ │ │ ├── test_plugins │ │ │ ├── test_gcamodule.py │ │ │ └── test_maxpool.py │ │ │ ├── test_postprocessors │ │ │ ├── test_attn_postprocessor.py │ │ │ ├── test_base_textrecog_postprocessor.py │ │ │ └── test_ctc_postprocessor.py │ │ │ └── test_recognizers │ │ │ ├── test_encoder_decoder_recognizer.py │ │ │ └── test_encoder_decoder_recognizer_tta.py │ ├── test_structures │ │ ├── test_kie_data_sample.py │ │ ├── test_textdet_data_sample.py │ │ ├── test_textrecog_data_sample.py │ │ └── test_textspotting_data_sample.py │ ├── test_utils │ │ ├── test_bbox_utils.py │ │ ├── test_check_argument.py │ │ ├── test_data_converter_utils.py │ │ ├── test_fileio.py │ │ ├── test_img_utils.py │ │ ├── test_mask_utils.py │ │ ├── test_parsers.py │ │ ├── test_point_utils.py │ │ ├── test_polygon_utils.py │ │ ├── test_processing.py │ │ ├── test_string_utils.py │ │ └── test_transform_utils.py │ └── test_visualization │ │ ├── test_base_visualizer.py │ │ ├── test_kie_visualizer.py │ │ ├── test_textdet_visualizer.py │ │ ├── test_textrecog_visualizer.py │ │ └── test_textspotting_visualizer.py └── tools │ ├── analysis_tools │ ├── browse_dataset.py │ ├── get_flops.py │ ├── offline_eval.py │ └── print_config.py │ ├── dataset_converters │ ├── common │ │ ├── curvedsyntext_converter.py │ │ └── extract_kaist.py │ ├── kie │ │ └── closeset_to_openset.py │ ├── prepare_dataset.py │ ├── textdet │ │ ├── art_converter.py │ │ ├── bid_converter.py │ │ ├── coco_to_line_dict.py │ │ ├── cocotext_converter.py │ │ ├── data_migrator.py │ │ ├── detext_converter.py │ │ ├── funsd_converter.py │ │ ├── hiertext_converter.py │ │ ├── ic11_converter.py │ │ ├── ilst_converter.py │ │ ├── imgur_converter.py │ │ ├── kaist_converter.py │ │ ├── lsvt_converter.py │ │ ├── lv_converter.py │ │ ├── mtwi_converter.py │ │ ├── naf_converter.py │ │ ├── rctw_converter.py │ │ ├── rects_converter.py │ │ ├── sroie_converter.py │ │ └── vintext_converter.py │ └── textrecog │ │ ├── art_converter.py │ │ ├── bid_converter.py │ │ ├── cocotext_converter.py │ │ ├── data_migrator.py │ │ ├── detext_converter.py │ │ ├── funsd_converter.py │ │ ├── hiertext_converter.py │ │ ├── ic11_converter.py │ │ ├── ilst_converter.py │ │ ├── imgur_converter.py │ │ ├── kaist_converter.py │ │ ├── lmdb_converter.py │ │ ├── lsvt_converter.py │ │ ├── lv_converter.py │ │ ├── mtwi_converter.py │ │ ├── naf_converter.py │ │ ├── openvino_converter.py │ │ ├── rctw_converter.py │ │ ├── rects_converter.py │ │ ├── sroie_converter.py │ │ └── vintext_converter.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── infer.py │ ├── model_converters │ └── publish_model.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ └── train.py └── textfussion ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── PHILOSOPHY.md ├── README.md ├── _typos.toml ├── build └── lib │ └── diffusers │ ├── __init__.py │ ├── commands │ ├── __init__.py │ ├── diffusers_cli.py │ └── env.py │ ├── configuration_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── experimental │ ├── __init__.py │ └── rl │ │ ├── __init__.py │ │ └── value_guided_sampling.py │ ├── image_processor.py │ ├── loaders.py │ ├── models │ ├── __init__.py │ ├── attention.py │ ├── attention_flax.py │ ├── attention_processor.py │ ├── autoencoder_kl.py │ ├── controlnet.py │ ├── controlnet_flax.py │ ├── cross_attention.py │ ├── dual_transformer_2d.py │ ├── embeddings.py │ ├── embeddings_flax.py │ ├── modeling_flax_pytorch_utils.py │ ├── modeling_flax_utils.py │ ├── modeling_pytorch_flax_utils.py │ ├── modeling_utils.py │ ├── prior_transformer.py │ ├── resnet.py │ ├── resnet_flax.py │ ├── t5_film_transformer.py │ ├── transformer_2d.py │ ├── transformer_temporal.py │ ├── unet_1d.py │ ├── unet_1d_blocks.py │ ├── unet_2d.py │ ├── unet_2d_blocks.py │ ├── unet_2d_blocks_flax.py │ ├── unet_2d_condition.py │ ├── unet_2d_condition_flax.py │ ├── unet_3d_blocks.py │ ├── unet_3d_condition.py │ ├── vae.py │ ├── vae_flax.py │ └── vq_model.py │ ├── optimization.py │ ├── pipeline_utils.py │ ├── pipelines │ ├── __init__.py │ ├── alt_diffusion │ │ ├── __init__.py │ │ ├── modeling_roberta_series.py │ │ ├── pipeline_alt_diffusion.py │ │ └── pipeline_alt_diffusion_img2img.py │ ├── audio_diffusion │ │ ├── __init__.py │ │ ├── mel.py │ │ └── pipeline_audio_diffusion.py │ ├── audioldm │ │ ├── __init__.py │ │ └── pipeline_audioldm.py │ ├── dance_diffusion │ │ ├── __init__.py │ │ └── pipeline_dance_diffusion.py │ ├── ddim │ │ ├── __init__.py │ │ └── pipeline_ddim.py │ ├── ddpm │ │ ├── __init__.py │ │ └── pipeline_ddpm.py │ ├── dit │ │ ├── __init__.py │ │ └── pipeline_dit.py │ ├── latent_diffusion │ │ ├── __init__.py │ │ ├── pipeline_latent_diffusion.py │ │ └── pipeline_latent_diffusion_superresolution.py │ ├── latent_diffusion_uncond │ │ ├── __init__.py │ │ └── pipeline_latent_diffusion_uncond.py │ ├── onnx_utils.py │ ├── paint_by_example │ │ ├── __init__.py │ │ ├── image_encoder.py │ │ └── pipeline_paint_by_example.py │ ├── pipeline_flax_utils.py │ ├── pipeline_utils.py │ ├── pndm │ │ ├── __init__.py │ │ └── pipeline_pndm.py │ ├── repaint │ │ ├── __init__.py │ │ └── pipeline_repaint.py │ ├── score_sde_ve │ │ ├── __init__.py │ │ └── pipeline_score_sde_ve.py │ ├── semantic_stable_diffusion │ │ ├── __init__.py │ │ └── pipeline_semantic_stable_diffusion.py │ ├── spectrogram_diffusion │ │ ├── __init__.py │ │ ├── continous_encoder.py │ │ ├── midi_utils.py │ │ ├── notes_encoder.py │ │ └── pipeline_spectrogram_diffusion.py │ ├── stable_diffusion │ │ ├── __init__.py │ │ ├── convert_from_ckpt.py │ │ ├── pipeline_cycle_diffusion.py │ │ ├── pipeline_flax_stable_diffusion.py │ │ ├── pipeline_flax_stable_diffusion_controlnet.py │ │ ├── pipeline_flax_stable_diffusion_img2img.py │ │ ├── pipeline_flax_stable_diffusion_inpaint.py │ │ ├── pipeline_onnx_stable_diffusion.py │ │ ├── pipeline_onnx_stable_diffusion_img2img.py │ │ ├── pipeline_onnx_stable_diffusion_inpaint.py │ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py │ │ ├── pipeline_onnx_stable_diffusion_upscale.py │ │ ├── pipeline_stable_diffusion.py │ │ ├── pipeline_stable_diffusion_attend_and_excite.py │ │ ├── pipeline_stable_diffusion_controlnet.py │ │ ├── pipeline_stable_diffusion_depth2img.py │ │ ├── pipeline_stable_diffusion_image_variation.py │ │ ├── pipeline_stable_diffusion_img2img.py │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ ├── pipeline_stable_diffusion_inpaint_legacy.py │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py │ │ ├── pipeline_stable_diffusion_k_diffusion.py │ │ ├── pipeline_stable_diffusion_latent_upscale.py │ │ ├── pipeline_stable_diffusion_model_editing.py │ │ ├── pipeline_stable_diffusion_panorama.py │ │ ├── pipeline_stable_diffusion_pix2pix_zero.py │ │ ├── pipeline_stable_diffusion_sag.py │ │ ├── pipeline_stable_diffusion_upscale.py │ │ ├── pipeline_stable_unclip.py │ │ ├── pipeline_stable_unclip_img2img.py │ │ ├── safety_checker.py │ │ ├── safety_checker_flax.py │ │ └── stable_unclip_image_normalizer.py │ ├── stable_diffusion_safe │ │ ├── __init__.py │ │ ├── pipeline_stable_diffusion_safe.py │ │ └── safety_checker.py │ ├── stochastic_karras_ve │ │ ├── __init__.py │ │ └── pipeline_stochastic_karras_ve.py │ ├── text_to_video_synthesis │ │ ├── __init__.py │ │ └── pipeline_text_to_video_synth.py │ ├── unclip │ │ ├── __init__.py │ │ ├── pipeline_unclip.py │ │ ├── pipeline_unclip_image_variation.py │ │ └── text_proj.py │ ├── versatile_diffusion │ │ ├── __init__.py │ │ ├── modeling_text_unet.py │ │ ├── pipeline_versatile_diffusion.py │ │ ├── pipeline_versatile_diffusion_dual_guided.py │ │ ├── pipeline_versatile_diffusion_image_variation.py │ │ └── pipeline_versatile_diffusion_text_to_image.py │ └── vq_diffusion │ │ ├── __init__.py │ │ └── pipeline_vq_diffusion.py │ ├── schedulers │ ├── __init__.py │ ├── scheduling_ddim.py │ ├── scheduling_ddim_flax.py │ ├── scheduling_ddim_inverse.py │ ├── scheduling_ddpm.py │ ├── scheduling_ddpm_flax.py │ ├── scheduling_deis_multistep.py │ ├── scheduling_dpmsolver_multistep.py │ ├── scheduling_dpmsolver_multistep_flax.py │ ├── scheduling_dpmsolver_singlestep.py │ ├── scheduling_euler_ancestral_discrete.py │ ├── scheduling_euler_discrete.py │ ├── scheduling_heun_discrete.py │ ├── scheduling_ipndm.py │ ├── scheduling_k_dpm_2_ancestral_discrete.py │ ├── scheduling_k_dpm_2_discrete.py │ ├── scheduling_karras_ve.py │ ├── scheduling_karras_ve_flax.py │ ├── scheduling_lms_discrete.py │ ├── scheduling_lms_discrete_flax.py │ ├── scheduling_pndm.py │ ├── scheduling_pndm_flax.py │ ├── scheduling_repaint.py │ ├── scheduling_sde_ve.py │ ├── scheduling_sde_ve_flax.py │ ├── scheduling_sde_vp.py │ ├── scheduling_unclip.py │ ├── scheduling_unipc_multistep.py │ ├── scheduling_utils.py │ ├── scheduling_utils_flax.py │ └── scheduling_vq_diffusion.py │ ├── training_utils.py │ └── utils │ ├── __init__.py │ ├── accelerate_utils.py │ ├── constants.py │ ├── deprecation_utils.py │ ├── doc_utils.py │ ├── dummy_flax_and_transformers_objects.py │ ├── dummy_flax_objects.py │ ├── dummy_note_seq_objects.py │ ├── dummy_onnx_objects.py │ ├── dummy_pt_objects.py │ ├── dummy_torch_and_librosa_objects.py │ ├── dummy_torch_and_scipy_objects.py │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ ├── dummy_torch_and_transformers_objects.py │ ├── dummy_transformers_and_torch_and_note_seq_objects.py │ ├── dynamic_modules_utils.py │ ├── hub_utils.py │ ├── import_utils.py │ ├── logging.py │ ├── model_card_template.md │ ├── outputs.py │ ├── pil_utils.py │ ├── testing_utils.py │ └── torch_utils.py ├── docker ├── diffusers-flax-cpu │ └── Dockerfile ├── diffusers-flax-tpu │ └── Dockerfile ├── diffusers-onnxruntime-cpu │ └── Dockerfile ├── diffusers-onnxruntime-cuda │ └── Dockerfile ├── diffusers-pytorch-cpu │ └── Dockerfile └── diffusers-pytorch-cuda │ └── Dockerfile ├── docs ├── README.md ├── TRANSLATING.md └── source │ ├── _config.py │ ├── en │ ├── _toctree.yml │ ├── api │ │ ├── configuration.mdx │ │ ├── diffusion_pipeline.mdx │ │ ├── experimental │ │ │ └── rl.mdx │ │ ├── loaders.mdx │ │ ├── logging.mdx │ │ ├── models.mdx │ │ ├── outputs.mdx │ │ ├── pipelines │ │ │ ├── alt_diffusion.mdx │ │ │ ├── audio_diffusion.mdx │ │ │ ├── audioldm.mdx │ │ │ ├── cycle_diffusion.mdx │ │ │ ├── dance_diffusion.mdx │ │ │ ├── ddim.mdx │ │ │ ├── ddpm.mdx │ │ │ ├── dit.mdx │ │ │ ├── latent_diffusion.mdx │ │ │ ├── latent_diffusion_uncond.mdx │ │ │ ├── overview.mdx │ │ │ ├── paint_by_example.mdx │ │ │ ├── pndm.mdx │ │ │ ├── repaint.mdx │ │ │ ├── score_sde_ve.mdx │ │ │ ├── semantic_stable_diffusion.mdx │ │ │ ├── spectrogram_diffusion.mdx │ │ │ ├── stable_diffusion │ │ │ │ ├── attend_and_excite.mdx │ │ │ │ ├── controlnet.mdx │ │ │ │ ├── depth2img.mdx │ │ │ │ ├── image_variation.mdx │ │ │ │ ├── img2img.mdx │ │ │ │ ├── inpaint.mdx │ │ │ │ ├── latent_upscale.mdx │ │ │ │ ├── model_editing.mdx │ │ │ │ ├── overview.mdx │ │ │ │ ├── panorama.mdx │ │ │ │ ├── pix2pix.mdx │ │ │ │ ├── pix2pix_zero.mdx │ │ │ │ ├── self_attention_guidance.mdx │ │ │ │ ├── text2img.mdx │ │ │ │ └── upscale.mdx │ │ │ ├── stable_diffusion_2.mdx │ │ │ ├── stable_diffusion_safe.mdx │ │ │ ├── stable_unclip.mdx │ │ │ ├── stochastic_karras_ve.mdx │ │ │ ├── text_to_video.mdx │ │ │ ├── unclip.mdx │ │ │ ├── versatile_diffusion.mdx │ │ │ └── vq_diffusion.mdx │ │ └── schedulers │ │ │ ├── ddim.mdx │ │ │ ├── ddim_inverse.mdx │ │ │ ├── ddpm.mdx │ │ │ ├── deis.mdx │ │ │ ├── dpm_discrete.mdx │ │ │ ├── dpm_discrete_ancestral.mdx │ │ │ ├── euler.mdx │ │ │ ├── euler_ancestral.mdx │ │ │ ├── heun.mdx │ │ │ ├── ipndm.mdx │ │ │ ├── lms_discrete.mdx │ │ │ ├── multistep_dpm_solver.mdx │ │ │ ├── overview.mdx │ │ │ ├── pndm.mdx │ │ │ ├── repaint.mdx │ │ │ ├── score_sde_ve.mdx │ │ │ ├── score_sde_vp.mdx │ │ │ ├── singlestep_dpm_solver.mdx │ │ │ ├── stochastic_karras_ve.mdx │ │ │ ├── unipc.mdx │ │ │ └── vq_diffusion.mdx │ ├── conceptual │ │ ├── contribution.mdx │ │ ├── ethical_guidelines.mdx │ │ ├── evaluation.mdx │ │ └── philosophy.mdx │ ├── imgs │ │ ├── access_request.png │ │ └── diffusers_library.jpg │ ├── index.mdx │ ├── installation.mdx │ ├── optimization │ │ ├── fp16.mdx │ │ ├── habana.mdx │ │ ├── mps.mdx │ │ ├── onnx.mdx │ │ ├── open_vino.mdx │ │ ├── opt_overview.mdx │ │ ├── torch2.0.mdx │ │ └── xformers.mdx │ ├── quicktour.mdx │ ├── stable_diffusion.mdx │ ├── training │ │ ├── controlnet.mdx │ │ ├── dreambooth.mdx │ │ ├── instructpix2pix.mdx │ │ ├── lora.mdx │ │ ├── overview.mdx │ │ ├── text2image.mdx │ │ ├── text_inversion.mdx │ │ └── unconditional_training.mdx │ ├── tutorials │ │ ├── basic_training.mdx │ │ └── tutorial_overview.mdx │ └── using-diffusers │ │ ├── audio.mdx │ │ ├── conditional_image_generation.mdx │ │ ├── contribute_pipeline.mdx │ │ ├── controlling_generation.mdx │ │ ├── custom_pipeline_examples.mdx │ │ ├── custom_pipeline_overview.mdx │ │ ├── depth2img.mdx │ │ ├── img2img.mdx │ │ ├── inpaint.mdx │ │ ├── kerascv.mdx │ │ ├── loading.mdx │ │ ├── loading_overview.mdx │ │ ├── other-modalities.mdx │ │ ├── pipeline_overview.mdx │ │ ├── reproducibility.mdx │ │ ├── reusing_seeds.mdx │ │ ├── rl.mdx │ │ ├── schedulers.mdx │ │ ├── stable_diffusion_jax_how_to.mdx │ │ ├── unconditional_image_generation.mdx │ │ ├── using_safetensors │ │ ├── using_safetensors.mdx │ │ ├── weighted_prompts.mdx │ │ └── write_own_pipeline.mdx │ ├── ko │ ├── _toctree.yml │ ├── in_translation.mdx │ ├── index.mdx │ ├── installation.mdx │ └── quicktour.mdx │ └── zh │ ├── _toctree.yml │ ├── index.mdx │ ├── installation.mdx │ └── quicktour.mdx ├── examples ├── README.md ├── community │ ├── README.md │ ├── bit_diffusion.py │ ├── checkpoint_merger.py │ ├── clip_guided_stable_diffusion.py │ ├── clip_guided_stable_diffusion_img2img.py │ ├── composable_stable_diffusion.py │ ├── ddim_noise_comparative_analysis.py │ ├── imagic_stable_diffusion.py │ ├── img2img_inpainting.py │ ├── interpolate_stable_diffusion.py │ ├── lpw_stable_diffusion.py │ ├── lpw_stable_diffusion_onnx.py │ ├── magic_mix.py │ ├── multilingual_stable_diffusion.py │ ├── one_step_unet.py │ ├── sd_text2img_k_diffusion.py │ ├── seed_resize_stable_diffusion.py │ ├── speech_to_image_diffusion.py │ ├── stable_diffusion_comparison.py │ ├── stable_diffusion_controlnet_img2img.py │ ├── stable_diffusion_controlnet_inpaint.py │ ├── stable_diffusion_controlnet_inpaint_img2img.py │ ├── stable_diffusion_mega.py │ ├── stable_unclip.py │ ├── text_inpainting.py │ ├── tiled_upscaling.py │ ├── unclip_image_interpolation.py │ ├── unclip_text_interpolation.py │ └── wildcard_stable_diffusion.py ├── conftest.py ├── controlnet │ ├── README.md │ ├── requirements.txt │ ├── requirements_flax.txt │ ├── train_controlnet.py │ └── train_controlnet_flax.py ├── dreambooth │ ├── README.md │ ├── requirements.txt │ ├── requirements_flax.txt │ ├── train_dreambooth.py │ ├── train_dreambooth_flax.py │ └── train_dreambooth_lora.py ├── inference │ ├── README.md │ ├── image_to_image.py │ └── inpainting.py ├── instruct_pix2pix │ ├── README.md │ ├── requirements.txt │ └── train_instruct_pix2pix.py ├── research_projects │ ├── README.md │ ├── colossalai │ │ ├── README.md │ │ ├── inference.py │ │ ├── requirement.txt │ │ └── train_dreambooth_colossalai.py │ ├── dreambooth_inpaint │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── train_dreambooth_inpaint.py │ │ └── train_dreambooth_inpaint_lora.py │ ├── intel_opts │ │ ├── README.md │ │ ├── inference_bf16.py │ │ └── textual_inversion │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── textual_inversion_bf16.py │ ├── lora │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_text_to_image_lora.py │ ├── mulit_token_textual_inversion │ │ ├── README.md │ │ ├── multi_token_clip.py │ │ ├── requirements.txt │ │ ├── requirements_flax.txt │ │ ├── textual_inversion.py │ │ └── textual_inversion_flax.py │ ├── multi_subject_dreambooth │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_multi_subject_dreambooth.py │ └── onnxruntime │ │ ├── README.md │ │ ├── text_to_image │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_text_to_image.py │ │ ├── textual_inversion │ │ ├── README.md │ │ ├── requirements.txt │ │ └── textual_inversion.py │ │ └── unconditional_image_generation │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_unconditional.py ├── rl │ ├── README.md │ └── run_diffuser_locomotion.py ├── test_examples.py ├── text_to_image │ ├── README.md │ ├── requirements.txt │ ├── requirements_flax.txt │ ├── train_text_to_image.py │ ├── train_text_to_image_flax.py │ └── train_text_to_image_lora.py ├── textual_inversion │ ├── README.md │ ├── requirements.txt │ ├── requirements_flax.txt │ ├── textual_inversion.py │ └── textual_inversion_flax.py └── unconditional_image_generation │ ├── README.md │ ├── requirements.txt │ └── train_unconditional.py ├── my_inpainting ├── label_list.json ├── my_build_synth_data_baseline.py ├── my_build_synth_data_baseline_large_num.py ├── my_test_inpainting_baseline.py ├── my_test_inpainting_baseline_batch_test.py ├── my_test_inpainting_char.py ├── my_test_inpainting_char_multi.py ├── my_test_inpainting_only_pre_prompt_word_multi.py ├── my_test_inpainting_pure_word_prefix_prompt_batch_test.py ├── my_test_inpainting_with_adapter_char_multi.py ├── my_test_inpainting_with_adapter_with_fussion_te_word_multi.py ├── my_test_inpainting_with_adapter_with_pre_prompt_word_multi.py ├── my_test_inpainting_with_adapter_word_multi.py ├── my_test_inpainting_with_adapter_zero_prompt_char_multi.py ├── my_test_inpainting_with_char_adapter_char_multi.py ├── my_test_inpainting_with_controlnet_batch_test.py ├── my_test_inpainting_with_full_controlnet_batch_test.py ├── my_test_inpainting_with_full_controlnet_dual_text_batch_test.py ├── my_train_only_pre_prompt.sh ├── my_train_prefix_prompt.sh ├── my_train_with_adapter_with_pre_prompt.sh ├── my_train_with_char_adapter.sh ├── my_train_with_controlnet.sh ├── my_train_with_full_controlnet.sh ├── my_train_with_full_controlnet_with_dual_text.sh ├── my_train_with_single_adapter.sh ├── my_train_with_single_adapter_with_fussion_te.sh ├── new_paradigm_any_demo.py ├── new_paradigm_build_baseline.py ├── new_paradigm_build_baseline_no_crop.py ├── new_paradigm_build_demo.py ├── new_paradigm_build_dual_text.py ├── new_paradigm_build_with_text_vae.py ├── new_paradigm_dual_text_encoder.sh ├── new_paradigm_train.sh ├── src │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── __init__.cpython-39.pyc │ ├── build_synth_data │ │ ├── __pycache__ │ │ │ ├── batch_utils.cpython-310.pyc │ │ │ ├── crop_tools.cpython-310.pyc │ │ │ ├── glyph_utils.cpython-310.pyc │ │ │ └── rec_inferencer.cpython-310.pyc │ │ ├── batch_utils.py │ │ ├── crop_tools.py │ │ ├── glyph_utils.py │ │ └── rec_inferencer.py │ ├── dataset │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_text_dataset.cpython-310.pyc │ │ │ ├── batch_utils.cpython-310.pyc │ │ │ ├── crop_image_for_test.cpython-310.pyc │ │ │ ├── new_paradigm_text_dataset.cpython-310.pyc │ │ │ ├── text_dataset.cpython-310.pyc │ │ │ ├── text_dataset.cpython-39.pyc │ │ │ ├── text_mapper.cpython-310.pyc │ │ │ ├── text_mapper.cpython-38.pyc │ │ │ ├── text_mapper.cpython-39.pyc │ │ │ ├── utils.cpython-310.pyc │ │ │ ├── utils.cpython-39.pyc │ │ │ ├── zoom_up_pure_text_dataset.cpython-310.pyc │ │ │ ├── zoom_up_text_dataset.cpython-310.pyc │ │ │ ├── zoom_up_text_dataset.cpython-38.pyc │ │ │ ├── zoom_up_text_dataset.cpython-39.pyc │ │ │ └── zoom_up_with_blank_text_dataset.cpython-310.pyc │ │ ├── base_text_dataset.py │ │ ├── batch_utils.py │ │ ├── crop_image_for_test.py │ │ ├── new_paradigm_text_dataset.py │ │ ├── text_dataset.py │ │ ├── text_mapper.py │ │ ├── utils.py │ │ ├── zoom_up_pure_text_dataset.py │ │ ├── zoom_up_text_dataset.py │ │ └── zoom_up_with_blank_text_dataset.py │ ├── engines │ │ ├── __init__.py │ │ ├── finetune_text_to_image.py │ │ ├── finetune_text_to_image_inpainting.py │ │ ├── finetune_text_to_image_inpainting_with_adapter_with_pre_prompt.py │ │ ├── finetune_text_to_image_inpainting_with_char_adapter.py │ │ ├── finetune_text_to_image_inpainting_with_controlnet.py │ │ ├── finetune_text_to_image_inpainting_with_full_controlnet.py │ │ ├── finetune_text_to_image_inpainting_with_full_controlnet_with_dual_text.py │ │ ├── finetune_text_to_image_inpainting_with_pre_prompt.py │ │ ├── finetune_text_to_image_inpainting_with_prefix_prompt.py │ │ ├── finetune_text_to_image_inpainting_with_single_adapter.py │ │ ├── finetune_text_to_image_inpainting_with_single_adapter_with_fussion_te.py │ │ ├── new_paradigm_inpainting.py │ │ └── new_paradigm_inpainting_dual_text.py │ ├── models │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── adapter.cpython-310.pyc │ │ │ ├── adapter.cpython-39.pyc │ │ │ ├── adapter_with_char_embedding.cpython-39.pyc │ │ │ ├── adapter_with_fussion_TE.cpython-310.pyc │ │ │ ├── adapter_with_fussion_TE.cpython-38.pyc │ │ │ ├── adapter_with_fussion_TE.cpython-39.pyc │ │ │ ├── adapter_with_pre_prompt.cpython-310.pyc │ │ │ ├── attention.cpython-310.pyc │ │ │ ├── char_encoder.cpython-310.pyc │ │ │ ├── controlnet.cpython-310.pyc │ │ │ ├── dual_controlnet.cpython-310.pyc │ │ │ ├── fussion_text_embedding.cpython-39.pyc │ │ │ ├── modules.cpython-310.pyc │ │ │ ├── only_pre_prompt.cpython-310.pyc │ │ │ ├── only_prefix_prompt.cpython-310.pyc │ │ │ ├── openaimodel.cpython-310.pyc │ │ │ ├── transformer_2d_with_controlnet.cpython-310.pyc │ │ │ ├── transformer_2d_with_dual_text_controlnet.cpython-310.pyc │ │ │ ├── unet_2d_blocks_with_adapter.cpython-310.pyc │ │ │ ├── unet_2d_blocks_with_adapter.cpython-38.pyc │ │ │ ├── unet_2d_blocks_with_adapter.cpython-39.pyc │ │ │ ├── unet_2d_blocks_with_controlnet.cpython-310.pyc │ │ │ ├── unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc │ │ │ ├── unet_2d_with_adapter.cpython-310.pyc │ │ │ ├── unet_2d_with_adapter.cpython-38.pyc │ │ │ ├── unet_2d_with_adapter.cpython-39.pyc │ │ │ ├── unet_2d_with_controlnet.cpython-310.pyc │ │ │ ├── unet_2d_with_dual_text.cpython-310.pyc │ │ │ ├── unet_2d_with_dual_text_controlnet.cpython-310.pyc │ │ │ └── union_net.cpython-310.pyc │ │ ├── adapter.py │ │ ├── adapter_with_char_embedding.py │ │ ├── adapter_with_fussion_TE.py │ │ ├── adapter_with_pre_prompt.py │ │ ├── attention.py │ │ ├── char_encoder.py │ │ ├── controlnet.py │ │ ├── dual_controlnet.py │ │ ├── fussion_text_embedding.py │ │ ├── modules.py │ │ ├── only_pre_prompt.py │ │ ├── only_prefix_prompt.py │ │ ├── openaimodel.py │ │ ├── ori_controlnet.py │ │ ├── transformer_2d_with_dual_text_controlnet.py │ │ ├── unet_2d_blocks_with_adapter.py │ │ ├── unet_2d_blocks_with_controlnet.py │ │ ├── unet_2d_blocks_with_dual_text_controlnet.py │ │ ├── unet_2d_with_adapter.py │ │ ├── unet_2d_with_controlnet.py │ │ ├── unet_2d_with_dual_text.py │ │ ├── unet_2d_with_dual_text_controlnet.py │ │ └── union_net.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting.cpython-39.pyc │ │ │ ├── stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_full_controlnet.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_only_controlnet.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_text_glyph.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_with_adapter.cpython-39.pyc │ │ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc │ │ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc │ │ │ ├── stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc │ │ │ └── stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc │ │ ├── new_paradigm_inpainting_dual_text_encoder.py │ │ ├── stable_diffusion_inpainting.py │ │ ├── stable_diffusion_inpainting_dual_text_full_controlnet.py │ │ ├── stable_diffusion_inpainting_full_controlnet.py │ │ ├── stable_diffusion_inpainting_mask_controlnet.py │ │ ├── stable_diffusion_inpainting_only_controlnet.py │ │ ├── stable_diffusion_inpainting_only_pre_prompt.py │ │ ├── stable_diffusion_inpainting_only_prefix_prompt.py │ │ ├── stable_diffusion_inpainting_text_glyph.py │ │ ├── stable_diffusion_inpainting_text_vae_text_glyph.py │ │ ├── stable_diffusion_inpainting_with_adapter.py │ │ ├── stable_diffusion_inpainting_with_adapter_with_fussion_te.py │ │ ├── stable_diffusion_inpainting_with_adapter_zero_prompt.py │ │ └── stable_diffusion_inpainting_with_char_adapter.py │ └── utils │ │ ├── ori.png │ │ ├── output.png │ │ ├── res.png │ │ ├── res_area.png │ │ ├── res_trilinear.png │ │ └── vis_mask.py ├── train_vae.py └── train_vae.sh ├── my_pipeline.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── __init__.py ├── change_naming_configs_and_checkpoints.py ├── conversion_ldm_uncond.py ├── convert_dance_diffusion_to_diffusers.py ├── convert_ddpm_original_checkpoint_to_diffusers.py ├── convert_diffusers_to_original_stable_diffusion.py ├── convert_dit_to_diffusers.py ├── convert_k_upscaler_to_diffusers.py ├── convert_kakao_brain_unclip_to_diffusers.py ├── convert_ldm_original_checkpoint_to_diffusers.py ├── convert_lora_safetensor_to_diffusers.py ├── convert_models_diffuser_to_diffusers.py ├── convert_ms_text_to_video_to_diffusers.py ├── convert_music_spectrogram_to_diffusers.py ├── convert_ncsnpp_original_checkpoint_to_diffusers.py ├── convert_original_audioldm_to_diffusers.py ├── convert_original_controlnet_to_diffusers.py ├── convert_original_stable_diffusion_to_diffusers.py ├── convert_stable_diffusion_checkpoint_to_onnx.py ├── convert_unclip_txt2img_to_image_variation.py ├── convert_vae_diff_to_onnx.py ├── convert_vae_pt_to_diffusers.py ├── convert_versatile_diffusion_to_diffusers.py ├── convert_vq_diffusion_to_diffusers.py └── generate_logits.py ├── setup.cfg ├── setup.py ├── src ├── diffusers.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── entry_points.txt │ ├── requires.txt │ └── top_level.txt └── diffusers │ ├── __init__.py │ ├── commands │ ├── __init__.py │ ├── diffusers_cli.py │ └── env.py │ ├── configuration_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── experimental │ ├── README.md │ ├── __init__.py │ └── rl │ │ ├── __init__.py │ │ └── value_guided_sampling.py │ ├── image_processor.py │ ├── loaders.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── attention.py │ ├── attention_flax.py │ ├── attention_processor.py │ ├── autoencoder_kl.py │ ├── controlnet.py │ ├── controlnet_flax.py │ ├── cross_attention.py │ ├── dual_transformer_2d.py │ ├── embeddings.py │ ├── embeddings_flax.py │ ├── modeling_flax_pytorch_utils.py │ ├── modeling_flax_utils.py │ ├── modeling_pytorch_flax_utils.py │ ├── modeling_utils.py │ ├── prior_transformer.py │ ├── resnet.py │ ├── resnet_flax.py │ ├── t5_film_transformer.py │ ├── transformer_2d.py │ ├── transformer_temporal.py │ ├── unet_1d.py │ ├── unet_1d_blocks.py │ ├── unet_2d.py │ ├── unet_2d_blocks.py │ ├── unet_2d_blocks_flax.py │ ├── unet_2d_condition.py │ ├── unet_2d_condition_flax.py │ ├── unet_3d_blocks.py │ ├── unet_3d_condition.py │ ├── vae.py │ ├── vae_flax.py │ └── vq_model.py │ ├── optimization.py │ ├── pipeline_utils.py │ ├── pipelines │ ├── README.md │ ├── __init__.py │ ├── alt_diffusion │ │ ├── __init__.py │ │ ├── modeling_roberta_series.py │ │ ├── pipeline_alt_diffusion.py │ │ └── pipeline_alt_diffusion_img2img.py │ ├── audio_diffusion │ │ ├── __init__.py │ │ ├── mel.py │ │ └── pipeline_audio_diffusion.py │ ├── audioldm │ │ ├── __init__.py │ │ └── pipeline_audioldm.py │ ├── dance_diffusion │ │ ├── __init__.py │ │ └── pipeline_dance_diffusion.py │ ├── ddim │ │ ├── __init__.py │ │ └── pipeline_ddim.py │ ├── ddpm │ │ ├── __init__.py │ │ └── pipeline_ddpm.py │ ├── dit │ │ ├── __init__.py │ │ └── pipeline_dit.py │ ├── latent_diffusion │ │ ├── __init__.py │ │ ├── pipeline_latent_diffusion.py │ │ └── pipeline_latent_diffusion_superresolution.py │ ├── latent_diffusion_uncond │ │ ├── __init__.py │ │ └── pipeline_latent_diffusion_uncond.py │ ├── onnx_utils.py │ ├── paint_by_example │ │ ├── __init__.py │ │ ├── image_encoder.py │ │ └── pipeline_paint_by_example.py │ ├── pipeline_flax_utils.py │ ├── pipeline_utils.py │ ├── pndm │ │ ├── __init__.py │ │ └── pipeline_pndm.py │ ├── repaint │ │ ├── __init__.py │ │ └── pipeline_repaint.py │ ├── score_sde_ve │ │ ├── __init__.py │ │ └── pipeline_score_sde_ve.py │ ├── semantic_stable_diffusion │ │ ├── __init__.py │ │ └── pipeline_semantic_stable_diffusion.py │ ├── spectrogram_diffusion │ │ ├── __init__.py │ │ ├── continous_encoder.py │ │ ├── midi_utils.py │ │ ├── notes_encoder.py │ │ └── pipeline_spectrogram_diffusion.py │ ├── stable_diffusion │ │ ├── README.md │ │ ├── __init__.py │ │ ├── convert_from_ckpt.py │ │ ├── pipeline_cycle_diffusion.py │ │ ├── pipeline_flax_stable_diffusion.py │ │ ├── pipeline_flax_stable_diffusion_controlnet.py │ │ ├── pipeline_flax_stable_diffusion_img2img.py │ │ ├── pipeline_flax_stable_diffusion_inpaint.py │ │ ├── pipeline_onnx_stable_diffusion.py │ │ ├── pipeline_onnx_stable_diffusion_img2img.py │ │ ├── pipeline_onnx_stable_diffusion_inpaint.py │ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py │ │ ├── pipeline_onnx_stable_diffusion_upscale.py │ │ ├── pipeline_stable_diffusion.py │ │ ├── pipeline_stable_diffusion_attend_and_excite.py │ │ ├── pipeline_stable_diffusion_controlnet.py │ │ ├── pipeline_stable_diffusion_depth2img.py │ │ ├── pipeline_stable_diffusion_image_variation.py │ │ ├── pipeline_stable_diffusion_img2img.py │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ ├── pipeline_stable_diffusion_inpaint_legacy.py │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py │ │ ├── pipeline_stable_diffusion_k_diffusion.py │ │ ├── pipeline_stable_diffusion_latent_upscale.py │ │ ├── pipeline_stable_diffusion_model_editing.py │ │ ├── pipeline_stable_diffusion_panorama.py │ │ ├── pipeline_stable_diffusion_pix2pix_zero.py │ │ ├── pipeline_stable_diffusion_sag.py │ │ ├── pipeline_stable_diffusion_upscale.py │ │ ├── pipeline_stable_unclip.py │ │ ├── pipeline_stable_unclip_img2img.py │ │ ├── safety_checker.py │ │ ├── safety_checker_flax.py │ │ └── stable_unclip_image_normalizer.py │ ├── stable_diffusion_safe │ │ ├── __init__.py │ │ ├── pipeline_stable_diffusion_safe.py │ │ └── safety_checker.py │ ├── stochastic_karras_ve │ │ ├── __init__.py │ │ └── pipeline_stochastic_karras_ve.py │ ├── text_to_video_synthesis │ │ ├── __init__.py │ │ └── pipeline_text_to_video_synth.py │ ├── unclip │ │ ├── __init__.py │ │ ├── pipeline_unclip.py │ │ ├── pipeline_unclip_image_variation.py │ │ └── text_proj.py │ ├── versatile_diffusion │ │ ├── __init__.py │ │ ├── modeling_text_unet.py │ │ ├── pipeline_versatile_diffusion.py │ │ ├── pipeline_versatile_diffusion_dual_guided.py │ │ ├── pipeline_versatile_diffusion_image_variation.py │ │ └── pipeline_versatile_diffusion_text_to_image.py │ └── vq_diffusion │ │ ├── __init__.py │ │ └── pipeline_vq_diffusion.py │ ├── schedulers │ ├── README.md │ ├── __init__.py │ ├── scheduling_ddim.py │ ├── scheduling_ddim_flax.py │ ├── scheduling_ddim_inverse.py │ ├── scheduling_ddpm.py │ ├── scheduling_ddpm_flax.py │ ├── scheduling_deis_multistep.py │ ├── scheduling_dpmsolver_multistep.py │ ├── scheduling_dpmsolver_multistep_flax.py │ ├── scheduling_dpmsolver_singlestep.py │ ├── scheduling_euler_ancestral_discrete.py │ ├── scheduling_euler_discrete.py │ ├── scheduling_heun_discrete.py │ ├── scheduling_ipndm.py │ ├── scheduling_k_dpm_2_ancestral_discrete.py │ ├── scheduling_k_dpm_2_discrete.py │ ├── scheduling_karras_ve.py │ ├── scheduling_karras_ve_flax.py │ ├── scheduling_lms_discrete.py │ ├── scheduling_lms_discrete_flax.py │ ├── scheduling_pndm.py │ ├── scheduling_pndm_flax.py │ ├── scheduling_repaint.py │ ├── scheduling_sde_ve.py │ ├── scheduling_sde_ve_flax.py │ ├── scheduling_sde_vp.py │ ├── scheduling_unclip.py │ ├── scheduling_unipc_multistep.py │ ├── scheduling_utils.py │ ├── scheduling_utils_flax.py │ └── scheduling_vq_diffusion.py │ ├── training_utils.py │ └── utils │ ├── __init__.py │ ├── accelerate_utils.py │ ├── constants.py │ ├── deprecation_utils.py │ ├── doc_utils.py │ ├── dummy_flax_and_transformers_objects.py │ ├── dummy_flax_objects.py │ ├── dummy_note_seq_objects.py │ ├── dummy_onnx_objects.py │ ├── dummy_pt_objects.py │ ├── dummy_torch_and_librosa_objects.py │ ├── dummy_torch_and_scipy_objects.py │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ ├── dummy_torch_and_transformers_objects.py │ ├── dummy_transformers_and_torch_and_note_seq_objects.py │ ├── dynamic_modules_utils.py │ ├── hub_utils.py │ ├── import_utils.py │ ├── logging.py │ ├── model_card_template.md │ ├── outputs.py │ ├── pil_utils.py │ ├── testing_utils.py │ └── torch_utils.py ├── tests └── utils ├── check_config_docstrings.py ├── check_copies.py ├── check_doc_toc.py ├── check_dummies.py ├── check_inits.py ├── check_repo.py ├── check_table.py ├── custom_init_isort.py ├── get_modified_files.py ├── overwrite_expected_slice.py ├── print_env.py ├── release.py └── stale.py /images/framwork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/images/framwork.png -------------------------------------------------------------------------------- /mmocr/.circleci/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.8.1" 2 | ARG CUDA="10.2" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | # To fix GPG key error when running apt-get update 8 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 9 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub 10 | 11 | RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx 12 | -------------------------------------------------------------------------------- /mmocr/.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | skip = *.ipynb 3 | count = 4 | quiet-level = 3 5 | ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist,ned 6 | -------------------------------------------------------------------------------- /mmocr/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */__init__.py 4 | -------------------------------------------------------------------------------- /mmocr/.dev_scripts/benchmark_options.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | third_part_libs = [ 4 | 'pip install -r ../requirements/albu.txt', 5 | ] 6 | 7 | default_floating_range = 0.5 8 | -------------------------------------------------------------------------------- /mmocr/.dev_scripts/benchmark_train_models.txt: -------------------------------------------------------------------------------- 1 | textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py 2 | textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py 3 | textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py 4 | textrecog/abinet/abinet-vision_20e_st-an_mj.py 5 | textrecog/crnn/crnn_mini-vgg_5e_mj.py 6 | textrecog/aster/aster_resnet45_6e_st_mj.py 7 | textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py 8 | textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py 9 | textrecog/svtr/svtr-small_20e_st_mj.py 10 | -------------------------------------------------------------------------------- /mmocr/.dev_scripts/covignore.cfg: -------------------------------------------------------------------------------- 1 | # Each line should be the relative path to the root directory 2 | # of this repo. Support regular expression as well. 3 | # For example: 4 | # mmocr/models/textdet/postprocess/utils.py 5 | # .*/utils.py 6 | .*/__init__.py 7 | 8 | # It will be removed after all models have been refactored 9 | mmocr/utils/bbox_utils.py 10 | 11 | # Major part is covered, however, it's hard to cover model's output. 12 | mmocr/models/textdet/detectors/mmdet_wrapper.py 13 | 14 | # It will be removed after KieVisualizer and TextSpotterVisualizer 15 | mmocr/visualization/visualize.py 16 | 17 | # Add tests for data preparers later 18 | mmocr/datasets/preparers 19 | -------------------------------------------------------------------------------- /mmocr/.owners.yml: -------------------------------------------------------------------------------- 1 | assign: 2 | strategy: 3 | random 4 | # daily-shift-based 5 | scedule: 6 | '*/1 * * * *' 7 | assignees: 8 | - gaotongxiao 9 | - Harold-lkk 10 | -------------------------------------------------------------------------------- /mmocr/.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | formats: all 4 | 5 | python: 6 | version: 3.7 7 | install: 8 | - requirements: requirements/docs.txt 9 | - requirements: requirements/readthedocs.txt 10 | -------------------------------------------------------------------------------- /mmocr/CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | title: "OpenMMLab Text Detection, Recognition and Understanding Toolbox" 4 | authors: 5 | - name: "MMOCR Contributors" 6 | version: 0.3.0 7 | date-released: 2020-08-15 8 | repository-code: "https://github.com/open-mmlab/mmocr" 9 | license: Apache-2.0 10 | -------------------------------------------------------------------------------- /mmocr/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements/*.txt 2 | include mmocr/.mim/model-index.yml 3 | include mmocr/.mim/dicts/*.txt 4 | recursive-include mmocr/.mim/configs *.py *.yml 5 | recursive-include mmocr/.mim/tools *.sh *.py 6 | -------------------------------------------------------------------------------- /mmocr/README.md: -------------------------------------------------------------------------------- 1 | 该项目基于mmocr框架,请依据requirements.txt搭建环境 2 | 3 | 在configs/textdet中配置好文本图像数据集的路径后,使用./my_train.sh脚本进行检测实验 4 | -------------------------------------------------------------------------------- /mmocr/configs/backbone/oclip/metafile.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Name: oCLIP 3 | Metadata: 4 | Training Data: SynthText 5 | Architecture: 6 | - CLIPResNet 7 | Paper: 8 | URL: https://arxiv.org/abs/2203.03911 9 | Title: 'Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting' 10 | README: configs/backbone/oclip/README.md 11 | 12 | Models: 13 | Weights: https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth 14 | -------------------------------------------------------------------------------- /mmocr/configs/kie/_base_/datasets/wildreceipt.py: -------------------------------------------------------------------------------- 1 | wildreceipt_data_root = 'data/wildreceipt/' 2 | 3 | wildreceipt_train = dict( 4 | type='WildReceiptDataset', 5 | data_root=wildreceipt_data_root, 6 | metainfo=wildreceipt_data_root + 'class_list.txt', 7 | ann_file='train.txt', 8 | pipeline=None) 9 | 10 | wildreceipt_test = dict( 11 | type='WildReceiptDataset', 12 | data_root=wildreceipt_data_root, 13 | metainfo=wildreceipt_data_root + 'class_list.txt', 14 | ann_file='test.txt', 15 | test_mode=True, 16 | pipeline=None) 17 | -------------------------------------------------------------------------------- /mmocr/configs/kie/_base_/schedules/schedule_adam_60e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict( 3 | type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001)) 4 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1) 5 | val_cfg = dict(type='ValLoop') 6 | test_cfg = dict(type='TestLoop') 7 | # learning rate 8 | param_scheduler = [ 9 | dict(type='MultiStepLR', milestones=[40, 50], end=60), 10 | ] 11 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/ctw1500.py: -------------------------------------------------------------------------------- 1 | ctw1500_textdet_data_root = 'data/ctw1500' 2 | 3 | ctw1500_textdet_train = dict( 4 | type='OCRDataset', 5 | data_root=ctw1500_textdet_data_root, 6 | ann_file='textdet_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | ctw1500_textdet_test = dict( 11 | type='OCRDataset', 12 | data_root=ctw1500_textdet_data_root, 13 | ann_file='textdet_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/icdar2013.py: -------------------------------------------------------------------------------- 1 | icdar2013_textdet_data_root = 'data/icdar2013' 2 | 3 | icdar2013_textdet_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2013_textdet_data_root, 6 | ann_file='textdet_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | icdar2013_textdet_test = dict( 11 | type='OCRDataset', 12 | data_root=icdar2013_textdet_data_root, 13 | ann_file='textdet_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/icdar2017.py: -------------------------------------------------------------------------------- 1 | icdar2017_textdet_data_root = 'data/mlt2017' 2 | 3 | icdar2017_textdet_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2017_textdet_data_root, 6 | ann_file='textdet_test.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | icdar2017_textdet_test = dict( 11 | type='OCRDataset', 12 | data_root=icdar2017_textdet_data_root, 13 | ann_file='textdet_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/synthtext.py: -------------------------------------------------------------------------------- 1 | synthtext_textdet_data_root = 'data/synthtext' 2 | 3 | synthtext_textdet_train = dict( 4 | type='OCRDataset', 5 | data_root=synthtext_textdet_data_root, 6 | ann_file='textdet_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/totaltext.py: -------------------------------------------------------------------------------- 1 | totaltext_textdet_data_root = 'data/totaltext' 2 | 3 | totaltext_textdet_train = dict( 4 | type='OCRDataset', 5 | data_root=totaltext_textdet_data_root, 6 | ann_file='textdet_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | totaltext_textdet_test = dict( 11 | type='OCRDataset', 12 | data_root=totaltext_textdet_data_root, 13 | ann_file='textdet_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/datasets/toy_data.py: -------------------------------------------------------------------------------- 1 | toy_det_data_root = 'tests/data/det_toy_dataset' 2 | 3 | toy_det_train = dict( 4 | type='OCRDataset', 5 | data_root=toy_det_data_root, 6 | ann_file='instances_training.json', 7 | data_prefix=dict(img_path='imgs/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | 11 | toy_det_test = dict( 12 | type='OCRDataset', 13 | data_root=toy_det_data_root, 14 | ann_file='instances_test.json', 15 | data_prefix=dict(img_path='imgs/'), 16 | test_mode=True, 17 | pipeline=None) 18 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/pretrain_runtime.py: -------------------------------------------------------------------------------- 1 | _base_ = 'default_runtime.py' 2 | 3 | default_hooks = dict( 4 | logger=dict(type='LoggerHook', interval=1000), 5 | checkpoint=dict( 6 | type='CheckpointHook', 7 | interval=10000, 8 | by_epoch=False, 9 | max_keep_ckpts=1), 10 | ) 11 | 12 | # Evaluation 13 | val_evaluator = None 14 | test_evaluator = None 15 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/schedules/schedule_adam_600e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3)) 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=10) 4 | val_cfg = dict(type='ValLoop') 5 | test_cfg = dict(type='TestLoop') 6 | # learning rate 7 | param_scheduler = [ 8 | dict(type='PolyLR', power=0.9, end=60), 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/schedules/schedule_sgd_100k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict( 3 | type='OptimWrapper', 4 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)) 5 | 6 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000) 7 | # test_cfg = None 8 | # val_cfg = None 9 | val_cfg = dict(type='ValLoop') 10 | test_cfg = dict(type='TestLoop') 11 | # learning policy 12 | param_scheduler = [ 13 | dict(type='PolyLR', power=0.9, eta_min=1e-7, by_epoch=False, end=100000), 14 | ] 15 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/schedules/schedule_sgd_1200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict( 3 | type='OptimWrapper', 4 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)) 5 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1200, val_interval=100) 6 | val_cfg = dict(type='ValLoop') 7 | test_cfg = dict(type='TestLoop') 8 | # learning policy 9 | param_scheduler = [ 10 | dict(type='PolyLR', power=0.9, eta_min=1e-7, end=1200), 11 | ] 12 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/_base_/schedules/schedule_sgd_base.py: -------------------------------------------------------------------------------- 1 | # Note: This schedule config serves as a base config for other schedules. 2 | # Users would have to at least fill in "max_epochs" and "val_interval" 3 | # in order to use this config in their experiments. 4 | 5 | # optimizer 6 | optim_wrapper = dict( 7 | type='OptimWrapper', 8 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)) 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=20) 10 | val_cfg = dict(type='ValLoop') 11 | test_cfg = dict(type='TestLoop') 12 | # learning policy 13 | param_scheduler = [ 14 | dict(type='ConstantLR', factor=1.0), 15 | ] 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='CLIPResNet', 9 | init_cfg=dict( 10 | type='Pretrained', 11 | checkpoint='https://download.openmmlab.com/' 12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 13 | 14 | _base_.train_dataloader.num_workers = 24 15 | _base_.optim_wrapper.optimizer.lr = 0.002 16 | 17 | param_scheduler = [ 18 | dict(type='LinearLR', end=100, start_factor=0.001), 19 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200), 20 | ] 21 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/dbnet/dbnet_resnet50_1200e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='mmdet.ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch', 16 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')) 17 | 18 | _base_.train_dataloader.num_workers = 24 19 | _base_.optim_wrapper.optimizer.lr = 0.002 20 | 21 | param_scheduler = [ 22 | dict(type='LinearLR', end=100, start_factor=0.001), 23 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200), 24 | ] 25 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='CLIPResNet', 9 | init_cfg=dict( 10 | type='Pretrained', 11 | checkpoint='https://download.openmmlab.com/' 12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 13 | 14 | _base_.train_dataloader.num_workers = 24 15 | _base_.optim_wrapper.optimizer.lr = 0.002 16 | 17 | param_scheduler = [ 18 | dict(type='LinearLR', end=200, start_factor=0.001), 19 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200), 20 | ] 21 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='mmdet.ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch', 16 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')) 17 | 18 | _base_.train_dataloader.num_workers = 24 19 | _base_.optim_wrapper.optimizer.lr = 0.003 20 | 21 | param_scheduler = [ 22 | dict(type='LinearLR', end=200, start_factor=0.001), 23 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200), 24 | ] 25 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'drrg_resnet50_fpn-unet_1200e_ctw1500.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='CLIPResNet', 9 | init_cfg=dict( 10 | type='Pretrained', 11 | checkpoint='https://download.openmmlab.com/' 12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 13 | 14 | param_scheduler = [ 15 | dict(type='LinearLR', end=100, start_factor=0.001), 16 | dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200), 17 | ] 18 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/fcenet/_base_fcenet_resnet50-dcnv2_fpn.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_fcenet_resnet50_fpn.py', 3 | ] 4 | 5 | model = dict( 6 | backbone=dict( 7 | norm_eval=True, 8 | style='pytorch', 9 | dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False), 10 | stage_with_dcn=(False, True, True, True)), 11 | det_head=dict( 12 | module_loss=dict( 13 | type='FCEModuleLoss', 14 | num_sample=50, 15 | level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))), 16 | postprocessor=dict(text_repr_type='poly', alpha=1.0, beta=2.0))) 17 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.backbone = dict( 8 | type='CLIPResNet', 9 | out_indices=(1, 2, 3), 10 | init_cfg=dict( 11 | type='Pretrained', 12 | checkpoint='https://download.openmmlab.com/' 13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 14 | 15 | _base_.train_dataloader.num_workers = 24 16 | _base_.optim_wrapper.optimizer.lr = 0.0005 17 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'fcenet_resnet50_fpn_1500e_icdar2015.py', 3 | ] 4 | load_from = None 5 | 6 | _base_.model.backbone = dict( 7 | type='CLIPResNet', 8 | out_indices=(1, 2, 3), 9 | init_cfg=dict( 10 | type='Pretrained', 11 | checkpoint='https://download.openmmlab.com/' 12 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 13 | 14 | _base_.train_dataloader.batch_size = 16 15 | _base_.train_dataloader.num_workers = 24 16 | _base_.optim_wrapper.optimizer.lr = 0.0005 17 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'mask-rcnn_resnet50_fpn_160e_ctw1500.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.cfg.backbone = dict( 8 | _scope_='mmocr', 9 | type='CLIPResNet', 10 | init_cfg=dict( 11 | type='Pretrained', 12 | checkpoint='https://download.openmmlab.com/' 13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 14 | 15 | _base_.optim_wrapper.optimizer.lr = 0.02 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'mask-rcnn_resnet50_fpn_160e_icdar2015.py', 3 | ] 4 | 5 | load_from = None 6 | 7 | _base_.model.cfg.backbone = dict( 8 | _scope_='mmocr', 9 | type='CLIPResNet', 10 | init_cfg=dict( 11 | type='Pretrained', 12 | checkpoint='https://download.openmmlab.com/' 13 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 14 | 15 | _base_.optim_wrapper.optimizer.lr = 0.02 16 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2017.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'mask-rcnn_resnet50_fpn_160e_icdar2015.py', 3 | '../_base_/datasets/icdar2017.py', 4 | ] 5 | 6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train 7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test 8 | # use the same pipeline as icdar2015 9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline 10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline 11 | 12 | train_dataloader = dict(dataset=icdar2017_textdet_train) 13 | val_dataloader = dict(dataset=icdar2017_textdet_test) 14 | test_dataloader = val_dataloader 15 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/panet/_base_panet_resnet50_fpem-ffm.py: -------------------------------------------------------------------------------- 1 | _base_ = '_base_panet_resnet18_fpem-ffm.py' 2 | 3 | model = dict( 4 | type='PANet', 5 | backbone=dict( 6 | _delete_=True, 7 | type='mmdet.ResNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='caffe', 15 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), 16 | ), 17 | neck=dict(in_channels=[256, 512, 1024, 2048]), 18 | det_head=dict(postprocessor=dict(text_repr_type='poly'))) 19 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'psenet_resnet50_fpnf_600e_ctw1500.py', 3 | ] 4 | 5 | _base_.model.backbone = dict( 6 | type='CLIPResNet', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='https://download.openmmlab.com/' 10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 11 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'psenet_resnet50_fpnf_600e_icdar2015.py', 3 | ] 4 | 5 | _base_.model.backbone = dict( 6 | type='CLIPResNet', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='https://download.openmmlab.com/' 10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 11 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2017.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'psenet_resnet50_fpnf_600e_icdar2015.py', 3 | '../_base_/datasets/icdar2017.py', 4 | ] 5 | 6 | icdar2017_textdet_train = _base_.icdar2017_textdet_train 7 | icdar2017_textdet_test = _base_.icdar2017_textdet_test 8 | # use the same pipeline as icdar2015 9 | icdar2017_textdet_train.pipeline = _base_.train_pipeline 10 | icdar2017_textdet_test.pipeline = _base_.test_pipeline 11 | 12 | train_dataloader = dict(dataset=icdar2017_textdet_train) 13 | val_dataloader = dict(dataset=icdar2017_textdet_test) 14 | test_dataloader = val_dataloader 15 | 16 | auto_scale_lr = dict(base_batch_size=64 * 4) 17 | -------------------------------------------------------------------------------- /mmocr/configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'textsnake_resnet50_fpn-unet_1200e_ctw1500.py', 3 | ] 4 | 5 | _base_.model.backbone = dict( 6 | type='CLIPResNet', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='https://download.openmmlab.com/' 10 | 'mmocr/backbone/resnet50-oclip-7ba0c533.pth')) 11 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/coco_text_v1.py: -------------------------------------------------------------------------------- 1 | cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1' 2 | 3 | cocotextv1_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=cocotextv1_textrecog_data_root, 6 | ann_file='train_labels.json', 7 | test_mode=False, 8 | pipeline=None) 9 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/cute80.py: -------------------------------------------------------------------------------- 1 | cute80_textrecog_data_root = 'data/cute80' 2 | 3 | cute80_textrecog_test = dict( 4 | type='OCRDataset', 5 | data_root=cute80_textrecog_data_root, 6 | ann_file='textrecog_test.json', 7 | test_mode=True, 8 | pipeline=None) 9 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/icdar2011.py: -------------------------------------------------------------------------------- 1 | icdar2011_textrecog_data_root = 'data/rec/icdar_2011/' 2 | 3 | icdar2011_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2011_textrecog_data_root, 6 | ann_file='train_labels.json', 7 | test_mode=False, 8 | pipeline=None) 9 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/icdar2013.py: -------------------------------------------------------------------------------- 1 | icdar2013_textrecog_data_root = 'data/icdar2013' 2 | 3 | icdar2013_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2013_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | icdar2013_textrecog_test = dict( 10 | type='OCRDataset', 11 | data_root=icdar2013_textrecog_data_root, 12 | ann_file='textrecog_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | 16 | icdar2013_857_textrecog_test = dict( 17 | type='OCRDataset', 18 | data_root=icdar2013_textrecog_data_root, 19 | ann_file='textrecog_test_857.json', 20 | test_mode=True, 21 | pipeline=None) 22 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/icdar2015.py: -------------------------------------------------------------------------------- 1 | icdar2015_textrecog_data_root = 'data/icdar2015' 2 | 3 | icdar2015_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2015_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | icdar2015_textrecog_test = dict( 10 | type='OCRDataset', 11 | data_root=icdar2015_textrecog_data_root, 12 | ann_file='textrecog_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | 16 | icdar2015_1811_textrecog_test = dict( 17 | type='OCRDataset', 18 | data_root=icdar2015_textrecog_data_root, 19 | ann_file='textrecog_test_1811.json', 20 | test_mode=True, 21 | pipeline=None) 22 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/iiit5k.py: -------------------------------------------------------------------------------- 1 | iiit5k_textrecog_data_root = 'data/iiit5k' 2 | 3 | iiit5k_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=iiit5k_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | iiit5k_textrecog_test = dict( 10 | type='OCRDataset', 11 | data_root=iiit5k_textrecog_data_root, 12 | ann_file='textrecog_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/mjsynth.py: -------------------------------------------------------------------------------- 1 | mjsynth_textrecog_data_root = 'data/mjsynth' 2 | 3 | mjsynth_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=mjsynth_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | mjsynth_sub_textrecog_train = dict( 10 | type='OCRDataset', 11 | data_root=mjsynth_textrecog_data_root, 12 | ann_file='subset_textrecog_train.json', 13 | pipeline=None) 14 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/svt.py: -------------------------------------------------------------------------------- 1 | svt_textrecog_data_root = 'data/svt' 2 | 3 | svt_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=svt_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | svt_textrecog_test = dict( 10 | type='OCRDataset', 11 | data_root=svt_textrecog_data_root, 12 | ann_file='textrecog_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/svtp.py: -------------------------------------------------------------------------------- 1 | svtp_textrecog_data_root = 'data/svtp' 2 | 3 | svtp_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=svtp_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | svtp_textrecog_test = dict( 10 | type='OCRDataset', 11 | data_root=svtp_textrecog_data_root, 12 | ann_file='textrecog_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/synthtext.py: -------------------------------------------------------------------------------- 1 | synthtext_textrecog_data_root = 'data/synthtext' 2 | 3 | synthtext_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=synthtext_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | pipeline=None) 8 | 9 | synthtext_sub_textrecog_train = dict( 10 | type='OCRDataset', 11 | data_root=synthtext_textrecog_data_root, 12 | ann_file='subset_textrecog_train.json', 13 | pipeline=None) 14 | 15 | synthtext_an_textrecog_train = dict( 16 | type='OCRDataset', 17 | data_root=synthtext_textrecog_data_root, 18 | ann_file='alphanumeric_textrecog_train.json', 19 | pipeline=None) 20 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/synthtext_add.py: -------------------------------------------------------------------------------- 1 | synthtext_add_textrecog_data_root = 'data/rec/synthtext_add/' 2 | 3 | synthtext_add_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=synthtext_add_textrecog_data_root, 6 | ann_file='train_labels.json', 7 | test_mode=False, 8 | pipeline=None) 9 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/totaltext.py: -------------------------------------------------------------------------------- 1 | totaltext_textrecog_data_root = 'data/totaltext/' 2 | 3 | totaltext_textrecog_train = dict( 4 | type='OCRDataset', 5 | data_root=totaltext_textrecog_data_root, 6 | ann_file='textrecog_train.json', 7 | test_mode=False, 8 | pipeline=None) 9 | 10 | totaltext_textrecog_test = dict( 11 | type='OCRDataset', 12 | data_root=totaltext_textrecog_data_root, 13 | ann_file='textrecog_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/datasets/toy_data.py: -------------------------------------------------------------------------------- 1 | toy_data_root = 'tests/data/rec_toy_dataset/' 2 | 3 | toy_rec_train = dict( 4 | type='OCRDataset', 5 | data_root=toy_data_root, 6 | data_prefix=dict(img_path='imgs/'), 7 | ann_file='labels.json', 8 | pipeline=None, 9 | test_mode=False) 10 | 11 | toy_rec_test = dict( 12 | type='OCRDataset', 13 | data_root=toy_data_root, 14 | data_prefix=dict(img_path='imgs/'), 15 | ann_file='labels.json', 16 | pipeline=None, 17 | test_mode=True) 18 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/schedules/schedule_adadelta_5e.py: -------------------------------------------------------------------------------- 1 | optim_wrapper = dict( 2 | type='OptimWrapper', optimizer=dict(type='Adadelta', lr=1.0)) 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) 4 | val_cfg = dict(type='ValLoop') 5 | test_cfg = dict(type='TestLoop') 6 | # learning rate 7 | param_scheduler = [ 8 | dict(type='ConstantLR', factor=1.0), 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/schedules/schedule_adam_base.py: -------------------------------------------------------------------------------- 1 | # Note: This schedule config serves as a base config for other schedules. 2 | # Users would have to at least fill in "max_epochs" and "val_interval" 3 | # in order to use this config in their experiments. 4 | 5 | # optimizer 6 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=3e-4)) 7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=1) 8 | val_cfg = dict(type='ValLoop') 9 | test_cfg = dict(type='TestLoop') 10 | # learning policy 11 | param_scheduler = [ 12 | dict(type='ConstantLR', factor=1.0), 13 | ] 14 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/schedules/schedule_adam_step_5e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3)) 3 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) 4 | val_cfg = dict(type='ValLoop') 5 | test_cfg = dict(type='TestLoop') 6 | # learning policy 7 | param_scheduler = [ 8 | dict(type='MultiStepLR', milestones=[3, 4], end=5), 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/_base_/schedules/schedule_adamw_cos_6e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict( 3 | type='OptimWrapper', 4 | optimizer=dict( 5 | type='AdamW', 6 | lr=4e-4, 7 | betas=(0.9, 0.999), 8 | eps=1e-08, 9 | weight_decay=0.05)) 10 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=6, val_interval=1) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | # learning policy 15 | param_scheduler = [ 16 | dict( 17 | type='CosineAnnealingLR', 18 | T_max=6, 19 | eta_min=4e-6, 20 | convert_to_iter_based=True) 21 | ] 22 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/abinet/_base_abinet.py: -------------------------------------------------------------------------------- 1 | _base_ = '_base_abinet-vision.py' 2 | 3 | model = dict( 4 | decoder=dict( 5 | d_model=512, 6 | num_iters=3, 7 | language_decoder=dict( 8 | type='ABILanguageDecoder', 9 | d_model=512, 10 | n_head=8, 11 | d_inner=2048, 12 | n_layers=4, 13 | dropout=0.1, 14 | detach_tokens=True, 15 | use_self_attn=False, 16 | )), ) 17 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'nrtr_resnet31-1by16-1by8_6e_st_mj.py', 3 | ] 4 | 5 | model = dict(backbone=dict(last_stage_pool=False)) 6 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/sar/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py', 3 | ] 4 | 5 | model = dict(decoder=dict(type='SequentialSARDecoder')) 6 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py: -------------------------------------------------------------------------------- 1 | _base_ = ['satrn_shallow_5e_st_mj.py'] 2 | 3 | model = dict( 4 | backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=256), 5 | encoder=dict( 6 | type='SATRNEncoder', 7 | n_layers=6, 8 | n_head=8, 9 | d_k=256 // 8, 10 | d_v=256 // 8, 11 | d_model=256, 12 | n_position=100, 13 | d_inner=256 * 4, 14 | dropout=0.1), 15 | decoder=dict( 16 | type='NRTRDecoder', 17 | n_layers=6, 18 | d_embedding=256, 19 | n_head=8, 20 | d_model=256, 21 | d_inner=256 * 4, 22 | d_k=256 // 8, 23 | d_v=256 // 8)) 24 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/svtr/svtr-base_20e_st_mj.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'svtr-tiny_20e_st_mj.py', 3 | ] 4 | 5 | model = dict( 6 | preprocessor=dict(output_image_size=(48, 160), ), 7 | encoder=dict( 8 | img_size=[48, 160], 9 | max_seq_len=40, 10 | out_channels=256, 11 | embed_dims=[128, 256, 384], 12 | depth=[3, 6, 9], 13 | num_heads=[4, 8, 12], 14 | mixer_types=['Local'] * 8 + ['Global'] * 10), 15 | decoder=dict(in_channels=256)) 16 | 17 | train_dataloader = dict(batch_size=256, ) 18 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/svtr/svtr-large_20e_st_mj.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'svtr-tiny_20e_st_mj.py', 3 | ] 4 | 5 | model = dict( 6 | preprocessor=dict(output_image_size=(48, 160), ), 7 | encoder=dict( 8 | img_size=[48, 160], 9 | max_seq_len=40, 10 | out_channels=384, 11 | embed_dims=[192, 256, 512], 12 | depth=[3, 9, 9], 13 | num_heads=[6, 8, 16], 14 | mixer_types=['Local'] * 10 + ['Global'] * 11), 15 | decoder=dict(in_channels=384)) 16 | 17 | train_dataloader = dict(batch_size=128, ) 18 | 19 | optim_wrapper = dict(optimizer=dict(lr=2.5 / (10**4))) 20 | -------------------------------------------------------------------------------- /mmocr/configs/textrecog/svtr/svtr-small_20e_st_mj.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | 'svtr-tiny_20e_st_mj.py', 3 | ] 4 | 5 | model = dict( 6 | encoder=dict( 7 | embed_dims=[96, 192, 256], 8 | depth=[3, 6, 6], 9 | num_heads=[3, 6, 8], 10 | mixer_types=['Local'] * 8 + ['Global'] * 7)) 11 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/cocotextv2/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker' 5 | 6 | config_generator = dict(type='TextRecogConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/cocotextv2/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 4 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 5 | 6 | config_generator = dict(type='TextSpottingConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/ctw1500/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | 6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker' 8 | 9 | config_generator = dict(type='TextRecogConfigGenerator') 10 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/ctw1500/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | 6 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 7 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 8 | 9 | _base_.test_preparer.obtainer.files.append( 10 | dict( 11 | url='https://download.openmmlab.com/mmocr/data/1.x/textspotting/' 12 | 'ctw1500/lexicons.zip', 13 | save_name='ctw1500_lexicons.zip', 14 | md5='168150ca45da161917bf35a20e45b8d6', 15 | content=['lexicons'], 16 | mapping=[['ctw1500_lexicons/lexicons', 'lexicons']])) 17 | 18 | _base_.delete.append('ctw1500_lexicons') 19 | config_generator = dict(type='TextSpottingConfigGenerator') 20 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/cute80/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Recognition** 2 | 3 | ```text 4 | # timage/img_name text 1 text 5 | 6 | timage/001.jpg RONALDO 1 RONALDO 7 | timage/002.jpg 7 1 7 8 | timage/003.jpg SEACREST 1 SEACREST 9 | timage/004.jpg BEACH 1 BEACH 10 | ``` 11 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/funsd/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | 6 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 7 | _base_.test_preparer.packer.type = 'TextRecogCropPacker' 8 | 9 | config_generator = dict(type='TextRecogConfigGenerator') 10 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/funsd/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 3 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 4 | 5 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 6 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 7 | 8 | config_generator = dict(type='TextSpottingConfigGenerator') 9 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/icdar2013/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Detection** 2 | 3 | ```text 4 | # train split 5 | # x1 y1 x2 y2 "transcript" 6 | 7 | 158 128 411 181 "Footpath" 8 | 443 128 501 169 "To" 9 | 64 200 363 243 "Colchester" 10 | 11 | # test split 12 | # x1, y1, x2, y2, "transcript" 13 | 14 | 38, 43, 920, 215, "Tiredness" 15 | 275, 264, 665, 450, "kills" 16 | 0, 699, 77, 830, "A" 17 | ``` 18 | 19 | **Text Recognition** 20 | 21 | ```text 22 | # img_name, "text" 23 | 24 | word_1.png, "PROPER" 25 | word_2.png, "FOOD" 26 | word_3.png, "PRONTO" 27 | ``` 28 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/icdar2015/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Detection** 2 | 3 | ```text 4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans 5 | 6 | 377,117,463,117,465,130,378,130,Genaxis Theatre 7 | 493,115,519,115,519,131,493,131,[06] 8 | 374,155,409,155,409,170,374,170,### 9 | ``` 10 | 11 | **Text Recognition** 12 | 13 | ```text 14 | # img_name, "text" 15 | 16 | word_1.png, "Genaxis Theatre" 17 | word_2.png, "[06]" 18 | word_3.png, "62-03" 19 | ``` 20 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/iiit5k/metafile.yml: -------------------------------------------------------------------------------- 1 | Name: 'IIIT5K' 2 | Paper: 3 | Title: Scene Text Recognition using Higher Order Language Priors 4 | URL: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/Home/mishraBMVC12.pdf 5 | Venue: BMVC 6 | Year: '2012' 7 | BibTeX: '@InProceedings{MishraBMVC12, 8 | author = "Mishra, A. and Alahari, K. and Jawahar, C.~V.", 9 | title = "Scene Text Recognition using Higher Order Language Priors", 10 | booktitle = "BMVC", 11 | year = "2012"}' 12 | Data: 13 | Website: http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html 14 | Language: 15 | - English 16 | Scene: 17 | - Natural Scene 18 | Granularity: 19 | - Word 20 | Tasks: 21 | - textrecog 22 | License: 23 | Type: N/A 24 | Link: N/A 25 | Format: .txt 26 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/iiit5k/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Recognition** 2 | 3 | ```text 4 | # img_name, "text" 5 | 6 | train/1009_2.png You 7 | train/1017_1.png Rescue 8 | train/1017_2.png mission 9 | ``` 10 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/mjsynth/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Recognition** 2 | 3 | ```txt 4 | ./3000/7/182_slinking_71711.jpg 71711 5 | ./3000/7/182_REMODELERS_64541.jpg 64541 6 | ``` 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/sroie/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Detection, Text Recognition and Text Spotting** 2 | 3 | ```text 4 | # x1,y1,x2,y2,x3,y3,x4,y4,trans 5 | 6 | 72,25,326,25,326,64,72,64,TAN WOON YANN 7 | 50,82,440,82,440,121,50,121,BOOK TA .K(TAMAN DAYA) SDN BND 8 | 205,121,285,121,285,139,205,139,789417-W 9 | ``` 10 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/sroie/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker' 7 | 8 | config_generator = dict(type='TextRecogConfigGenerator') 9 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/sroie/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 6 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 7 | 8 | config_generator = dict(type='TextSpottingConfigGenerator') 9 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/svt/metafile.yml: -------------------------------------------------------------------------------- 1 | Name: 'Street View Text Dataset (SVT)' 2 | Paper: 3 | Title: Word Spotting in the Wild 4 | URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf 5 | Venue: ECCV 6 | Year: '2010' 7 | BibTeX: '@inproceedings{wang2010word, 8 | title={Word spotting in the wild}, 9 | author={Wang, Kai and Belongie, Serge}, 10 | booktitle={European conference on computer vision}, 11 | pages={591--604}, 12 | year={2010}, 13 | organization={Springer}}' 14 | Data: 15 | Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset 16 | Language: 17 | - English 18 | Scene: 19 | - Natural Scene 20 | Granularity: 21 | - Word 22 | Tasks: 23 | - textdet 24 | - textrecog 25 | - textspotting 26 | License: 27 | Type: N/A 28 | Link: N/A 29 | Format: .xml 30 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/svt/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 4 | _base_.test_preparer.packer.type = 'TextRecogCropPacker' 5 | 6 | config_generator = dict(type='TextRecogConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/svt/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 4 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 5 | 6 | config_generator = dict(type='TextSpottingConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/svtp/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Recognition** 2 | 3 | ```txt 4 | 13_15_0_par.jpg WYNDHAM 5 | 13_15_1_par.jpg HOTEL 6 | 12_16_0_par.jpg UNITED 7 | ``` 8 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/synthtext/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 4 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 5 | 6 | config_generator = dict(type='TextSpottingConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/textocr/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 4 | _base_.val_preparer.packer.type = 'TextRecogCropPacker' 5 | 6 | config_generator = dict(type='TextRecogConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/textocr/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 4 | _base_.val_preparer.packer.type = 'TextSpottingPacker' 5 | 6 | config_generator = dict(type='TextSpottingConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/totaltext/sample_anno.md: -------------------------------------------------------------------------------- 1 | **Text Detection/Spotting** 2 | 3 | ```text 4 | x: [[259 313 389 427 354 302]], y: [[542 462 417 459 507 582]], ornt: [u'c'], transcriptions: [u'PAUL'] 5 | x: [[400 478 494 436]], y: [[398 380 448 465]], ornt: [u'#'], transcriptions: [u'#'] 6 | ``` 7 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/totaltext/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' 4 | _base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test' 5 | _base_.train_preparer.packer.type = 'TextRecogCropPacker' 6 | _base_.test_preparer.packer.type = 'TextRecogCropPacker' 7 | 8 | config_generator = dict(type='TextRecogConfigGenerator') 9 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/wildreceipt/textdet.py: -------------------------------------------------------------------------------- 1 | _base_ = ['kie.py'] 2 | 3 | _base_.train_preparer.update( 4 | dict( 5 | parser=dict(type='WildreceiptTextDetAnnParser'), 6 | packer=dict(type='TextDetPacker'), 7 | dumper=dict(type='JsonDumper'))) 8 | _base_.test_preparer.update( 9 | dict( 10 | parser=dict(type='WildreceiptTextDetAnnParser'), 11 | packer=dict(type='TextDetPacker'), 12 | dumper=dict(type='JsonDumper'))) 13 | 14 | config_generator = dict(type='TextDetConfigGenerator') 15 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/wildreceipt/textrecog.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.update( 4 | dict( 5 | parser=dict(type='WildreceiptTextDetAnnParser'), 6 | packer=dict(type='TextRecogCropPacker'), 7 | dumper=dict(type='JsonDumper'))) 8 | 9 | _base_.test_preparer.update( 10 | dict( 11 | parser=dict(type='WildreceiptTextDetAnnParser'), 12 | packer=dict(type='TextRecogCropPacker'), 13 | dumper=dict(type='JsonDumper'))) 14 | 15 | config_generator = dict(type='TextRecogConfigGenerator') 16 | -------------------------------------------------------------------------------- /mmocr/dataset_zoo/wildreceipt/textspotting.py: -------------------------------------------------------------------------------- 1 | _base_ = ['textdet.py'] 2 | 3 | _base_.train_preparer.packer.type = 'TextSpottingPacker' 4 | _base_.test_preparer.packer.type = 'TextSpottingPacker' 5 | 6 | config_generator = dict(type='TextSpottingConfigGenerator') 7 | -------------------------------------------------------------------------------- /mmocr/demo/demo_densetext_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_densetext_det.jpg -------------------------------------------------------------------------------- /mmocr/demo/demo_kie.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_kie.jpeg -------------------------------------------------------------------------------- /mmocr/demo/demo_text_det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_det.jpg -------------------------------------------------------------------------------- /mmocr/demo/demo_text_ocr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_ocr.jpg -------------------------------------------------------------------------------- /mmocr/demo/demo_text_recog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/demo_text_recog.jpg -------------------------------------------------------------------------------- /mmocr/demo/resources/demo_kie_pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/demo_kie_pred.png -------------------------------------------------------------------------------- /mmocr/demo/resources/det_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/det_vis.png -------------------------------------------------------------------------------- /mmocr/demo/resources/kie_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/kie_vis.png -------------------------------------------------------------------------------- /mmocr/demo/resources/log_analysis_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/log_analysis_demo.png -------------------------------------------------------------------------------- /mmocr/demo/resources/rec_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/demo/resources/rec_vis.png -------------------------------------------------------------------------------- /mmocr/dicts/english_digits_symbols.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z 37 | A 38 | B 39 | C 40 | D 41 | E 42 | F 43 | G 44 | H 45 | I 46 | J 47 | K 48 | L 49 | M 50 | N 51 | O 52 | P 53 | Q 54 | R 55 | S 56 | T 57 | U 58 | V 59 | W 60 | X 61 | Y 62 | Z 63 | ! 64 | " 65 | # 66 | $ 67 | % 68 | & 69 | ' 70 | ( 71 | ) 72 | * 73 | + 74 | , 75 | - 76 | . 77 | / 78 | : 79 | ; 80 | < 81 | = 82 | > 83 | ? 84 | @ 85 | [ 86 | \ 87 | ] 88 | _ 89 | ` 90 | ~ -------------------------------------------------------------------------------- /mmocr/dicts/english_digits_symbols_space.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z 37 | A 38 | B 39 | C 40 | D 41 | E 42 | F 43 | G 44 | H 45 | I 46 | J 47 | K 48 | L 49 | M 50 | N 51 | O 52 | P 53 | Q 54 | R 55 | S 56 | T 57 | U 58 | V 59 | W 60 | X 61 | Y 62 | Z 63 | ! 64 | " 65 | # 66 | $ 67 | % 68 | & 69 | ' 70 | ( 71 | ) 72 | * 73 | + 74 | , 75 | - 76 | . 77 | / 78 | : 79 | ; 80 | < 81 | = 82 | > 83 | ? 84 | @ 85 | [ 86 | \ 87 | ] 88 | _ 89 | ` 90 | ~ 91 | -------------------------------------------------------------------------------- /mmocr/dicts/lower_english_digits.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z -------------------------------------------------------------------------------- /mmocr/dicts/lower_english_digits_space.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | a 12 | b 13 | c 14 | d 15 | e 16 | f 17 | g 18 | h 19 | i 20 | j 21 | k 22 | l 23 | m 24 | n 25 | o 26 | p 27 | q 28 | r 29 | s 30 | t 31 | u 32 | v 33 | w 34 | x 35 | y 36 | z 37 | -------------------------------------------------------------------------------- /mmocr/dicts/sdmgr_dict.txt: -------------------------------------------------------------------------------- 1 | / 2 | \ 3 | . 4 | $ 5 | £ 6 | € 7 | ¥ 8 | : 9 | - 10 | , 11 | * 12 | # 13 | ( 14 | ) 15 | % 16 | @ 17 | ! 18 | ' 19 | & 20 | = 21 | > 22 | + 23 | " 24 | × 25 | ? 26 | < 27 | [ 28 | ] 29 | _ 30 | 0 31 | 1 32 | 2 33 | 3 34 | 4 35 | 5 36 | 6 37 | 7 38 | 8 39 | 9 40 | a 41 | b 42 | c 43 | d 44 | e 45 | f 46 | g 47 | h 48 | i 49 | j 50 | k 51 | l 52 | m 53 | n 54 | o 55 | p 56 | q 57 | r 58 | s 59 | t 60 | u 61 | v 62 | w 63 | x 64 | y 65 | z 66 | A 67 | B 68 | C 69 | D 70 | E 71 | F 72 | G 73 | H 74 | I 75 | J 76 | K 77 | L 78 | M 79 | N 80 | O 81 | P 82 | Q 83 | R 84 | S 85 | T 86 | U 87 | V 88 | W 89 | X 90 | Y 91 | Z -------------------------------------------------------------------------------- /mmocr/docker/serve/config.properties: -------------------------------------------------------------------------------- 1 | inference_address=http://0.0.0.0:8080 2 | management_address=http://0.0.0.0:8081 3 | metrics_address=http://0.0.0.0:8082 4 | model_store=/home/model-server/model-store 5 | load_models=all 6 | -------------------------------------------------------------------------------- /mmocr/docker/serve/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [[ "$1" = "serve" ]]; then 5 | shift 1 6 | torchserve --start --ts-config /home/model-server/config.properties 7 | else 8 | eval "$@" 9 | fi 10 | 11 | # prevent docker exit 12 | tail -f /dev/null 13 | -------------------------------------------------------------------------------- /mmocr/docs/en/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /mmocr/docs/en/_static/css/readthedocs.css: -------------------------------------------------------------------------------- 1 | .header-logo { 2 | background-image: url("../images/mmocr.png"); 3 | background-size: 110px 40px; 4 | height: 40px; 5 | width: 110px; 6 | } 7 | -------------------------------------------------------------------------------- /mmocr/docs/en/_static/images/mmocr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/en/_static/images/mmocr.png -------------------------------------------------------------------------------- /mmocr/docs/en/_static/js/collapsed.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = ['Migration Guides', 'API Reference'] 2 | -------------------------------------------------------------------------------- /mmocr/docs/en/_templates/classtemplate.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | 12 | .. 13 | autogenerated from source/_templates/classtemplate.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /mmocr/docs/en/api/apis.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.apis 5 | =================================== 6 | 7 | .. contents:: mmocr.apis 8 | :depth: 2 9 | :local: 10 | :backlinks: top 11 | 12 | .. currentmodule:: mmocr.apis.inferencers 13 | 14 | Inferencers 15 | --------------------------------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated 19 | :nosignatures: 20 | :template: classtemplate.rst 21 | 22 | MMOCRInferencer 23 | TextDetInferencer 24 | TextRecInferencer 25 | TextSpotInferencer 26 | KIEInferencer 27 | -------------------------------------------------------------------------------- /mmocr/docs/en/api/engine.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.engine 5 | =================================== 6 | 7 | .. contents:: mmocr.engine 8 | :depth: 2 9 | :local: 10 | :backlinks: top 11 | 12 | .. currentmodule:: mmocr.engine.hooks 13 | 14 | Hooks 15 | --------------------------------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated 19 | :nosignatures: 20 | :template: classtemplate.rst 21 | 22 | VisualizationHook 23 | -------------------------------------------------------------------------------- /mmocr/docs/en/api/structures.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.structures 5 | =================================== 6 | 7 | .. currentmodule:: mmocr.structures 8 | .. autosummary:: 9 | :toctree: generated 10 | :nosignatures: 11 | :template: classtemplate.rst 12 | 13 | TextDetDataSample 14 | TextRecogDataSample 15 | KIEDataSample 16 | -------------------------------------------------------------------------------- /mmocr/docs/en/api/visualization.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.visualization 5 | =================================== 6 | 7 | .. currentmodule:: mmocr.visualization 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | :nosignatures: 12 | :template: classtemplate.rst 13 | 14 | BaseLocalVisualizer 15 | TextDetLocalVisualizer 16 | TextRecogLocalVisualizer 17 | TextSpottingLocalVisualizer 18 | KIELocalVisualizer 19 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/convention.md: -------------------------------------------------------------------------------- 1 | # Convention\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/data_flow.md: -------------------------------------------------------------------------------- 1 | # Data Flow\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/engine.md: -------------------------------------------------------------------------------- 1 | # Engine\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/models.md: -------------------------------------------------------------------------------- 1 | # Models\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/overview.md: -------------------------------------------------------------------------------- 1 | # Overview & Features\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/basic_concepts/visualizers.md: -------------------------------------------------------------------------------- 1 | # Visualizers\[coming soon\] 2 | 3 | Coming Soon! 4 | -------------------------------------------------------------------------------- /mmocr/docs/en/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /mmocr/docs/en/migration/model.md: -------------------------------------------------------------------------------- 1 | # Pretrained Model Migration 2 | 3 | Due to the extensive refactoring and fixing of the model structure in the new version, MMOCR 1.x does not support load weights trained by the old version. We have updated the pre-training weights and logs of all models on our website. 4 | 5 | In addition, we are working on the development of a weight migration tool for text detection tasks and plan to release it in the near future. Since the text recognition and key information extraction models are too much modified and the migration is lossy, we do not plan to support them accordingly for the time being. If you have specific requirements, please feel free to raise an [Issue](https://github.com/open-mmlab/mmocr/issues). 6 | -------------------------------------------------------------------------------- /mmocr/docs/en/requirements.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /mmocr/docs/en/switch_language.md: -------------------------------------------------------------------------------- 1 | ## English 2 | 3 | ## 简体中文 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/_static/css/readthedocs.css: -------------------------------------------------------------------------------- 1 | .header-logo { 2 | background-image: url("../images/mmocr.png"); 3 | background-size: 110px 40px; 4 | height: 40px; 5 | width: 110px; 6 | } 7 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/_static/images/mmocr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/docs/zh_cn/_static/images/mmocr.png -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/_static/js/collapsed.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = ['MMOCR 0.x 迁移指南', 'API 文档'] 2 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/_templates/classtemplate.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | 12 | .. 13 | autogenerated from source/_templates/classtemplate.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/api/apis.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.apis 5 | =================================== 6 | 7 | .. contents:: mmocr.apis 8 | :depth: 2 9 | :local: 10 | :backlinks: top 11 | 12 | .. currentmodule:: mmocr.apis.inferencers 13 | 14 | Inferencers 15 | --------------------------------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated 19 | :nosignatures: 20 | :template: classtemplate.rst 21 | 22 | MMOCRInferencer 23 | TextDetInferencer 24 | TextRecInferencer 25 | TextSpotInferencer 26 | KIEInferencer 27 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/api/engine.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.engine 5 | =================================== 6 | 7 | .. contents:: mmocr.engine 8 | :depth: 2 9 | :local: 10 | :backlinks: top 11 | 12 | .. currentmodule:: mmocr.engine.hooks 13 | 14 | Hooks 15 | --------------------------------------------- 16 | 17 | .. autosummary:: 18 | :toctree: generated 19 | :nosignatures: 20 | :template: classtemplate.rst 21 | 22 | VisualizationHook 23 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/api/structures.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.structures 5 | =================================== 6 | 7 | .. currentmodule:: mmocr.structures 8 | .. autosummary:: 9 | :toctree: generated 10 | :nosignatures: 11 | :template: classtemplate.rst 12 | 13 | TextDetDataSample 14 | TextRecogDataSample 15 | KIEDataSample 16 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/api/visualization.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | mmocr.visualization 5 | =================================== 6 | 7 | .. currentmodule:: mmocr.visualization 8 | 9 | .. autosummary:: 10 | :toctree: generated 11 | :nosignatures: 12 | :template: classtemplate.rst 13 | 14 | BaseLocalVisualizer 15 | TextDetLocalVisualizer 16 | TextRecogLocalVisualizer 17 | TextSpottingLocalVisualizer 18 | KIELocalVisualizer 19 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/convention.md: -------------------------------------------------------------------------------- 1 | # 开发默认约定\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/data_flow.md: -------------------------------------------------------------------------------- 1 | # 数据流\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/engine.md: -------------------------------------------------------------------------------- 1 | # 引擎\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/models.md: -------------------------------------------------------------------------------- 1 | # 模型\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/overview.md: -------------------------------------------------------------------------------- 1 | # 设计理念与特性\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/basic_concepts/visualizers.md: -------------------------------------------------------------------------------- 1 | # 可视化组件\[待更新\] 2 | 3 | 待更新 4 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/cp_origin_docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation 4 | 5 | for filename in $(find ../en/ -name '*.md' -printf "%P\n"); 6 | do 7 | mkdir -p $(dirname $filename) 8 | cp -n ../en/$filename ./$filename 9 | done 10 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/migration/model.md: -------------------------------------------------------------------------------- 1 | # 预训练模型迁移指南 2 | 3 | 由于在新版本中我们对模型的结构进行了大量的重构和修复,MMOCR 1.x 并不能直接读入旧版的预训练权重。我们在网站上同步更新了所有模型的预训练权重和log,供有需要的用户使用。 4 | 5 | 此外,我们正在进行针对文本检测任务的权重迁移工具的开发,并计划于近期版本内发布。由于文本识别和关键信息提取模型改动过大,且迁移是有损的,我们暂时不计划作相应支持。如果您有具体的需求,欢迎通过 [Issue](https://github.com/open-mmlab/mmocr/issues) 向我们提问。 6 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/notes/branches.md: -------------------------------------------------------------------------------- 1 | # 分支 2 | 3 | 本文档旨在全面解释 MMOCR 中每个分支的目的和功能。 4 | 5 | ## 分支概述 6 | 7 | ### 1. `main` 8 | 9 | `main` 分支是 MMOCR 项目的默认分支。它包含了 MMOCR 的最新稳定版本,目前包含了 MMOCR 1.x(例如 v1.0.0)的代码。`main` 分支确保用户能够使用最新和最可靠的软件版本。 10 | 11 | ### 2. `dev-1.x` 12 | 13 | `dev-1.x` 分支用于开发 MMOCR 的下一个版本。此分支将在发版前进行依赖性测试,通过的提交将会合成到新版本中,并被发布到 `main` 分支。通过设置单独的开发分支,项目可以在不影响 `main` 分支稳定性的情况下继续发展。**所有 PR 应合并到 `dev-1.x` 分支。** 14 | 15 | ### 3. `0.x` 16 | 17 | `0.x` 分支用作 MMOCR 0.x(例如 v0.6.3)的存档。此分支将不再积极接受更新或改进,但它仍可作为历史参考,或供尚未升级到 MMOCR 1.x 的用户使用。 18 | 19 | ### 4. `1.x` 20 | 21 | 它是 `main` 分支的别名,旨在实现从兼容性时期平稳过渡。它将在 2023 年的年中删除。 22 | 23 | ```{note} 24 | 分支映射在 2023.04.06 发生了变化。有关旧分支映射和迁移指南,请参阅[分支迁移指南](../migration/branches.md)。 25 | ``` 26 | -------------------------------------------------------------------------------- /mmocr/docs/zh_cn/switch_language.md: -------------------------------------------------------------------------------- 1 | ## English 2 | 3 | ## 简体中文 4 | -------------------------------------------------------------------------------- /mmocr/mmocr/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inferencers import * # NOQA 3 | -------------------------------------------------------------------------------- /mmocr/mmocr/apis/inferencers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .kie_inferencer import KIEInferencer 3 | from .mmocr_inferencer import MMOCRInferencer 4 | from .textdet_inferencer import TextDetInferencer 5 | from .textrec_inferencer import TextRecInferencer 6 | from .textspot_inferencer import TextSpotInferencer 7 | 8 | __all__ = [ 9 | 'TextDetInferencer', 'TextRecInferencer', 'KIEInferencer', 10 | 'MMOCRInferencer', 'TextSpotInferencer' 11 | ] 12 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dataset_wrapper import ConcatDataset 3 | from .icdar_dataset import IcdarDataset 4 | from .ocr_dataset import OCRDataset 5 | from .recog_lmdb_dataset import RecogLMDBDataset 6 | from .recog_text_dataset import RecogTextDataset 7 | from .samplers import * # NOQA 8 | from .transforms import * # NOQA 9 | from .wildreceipt_dataset import WildReceiptDataset 10 | 11 | __all__ = [ 12 | 'IcdarDataset', 'OCRDataset', 'RecogLMDBDataset', 'RecogTextDataset', 13 | 'WildReceiptDataset', 'ConcatDataset' 14 | ] 15 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .config_generators import * # noqa 3 | from .data_preparer import DatasetPreparer 4 | from .dumpers import * # noqa 5 | from .gatherers import * # noqa 6 | from .obtainers import * # noqa 7 | from .packers import * # noqa 8 | from .parsers import * # noqa 9 | 10 | __all__ = ['DatasetPreparer'] 11 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/config_generators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseDatasetConfigGenerator 3 | from .textdet_config_generator import TextDetConfigGenerator 4 | from .textrecog_config_generator import TextRecogConfigGenerator 5 | from .textspotting_config_generator import TextSpottingConfigGenerator 6 | 7 | __all__ = [ 8 | 'BaseDatasetConfigGenerator', 'TextDetConfigGenerator', 9 | 'TextRecogConfigGenerator', 'TextSpottingConfigGenerator' 10 | ] 11 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/dumpers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseDumper 3 | from .json_dumper import JsonDumper 4 | from .lmdb_dumper import TextRecogLMDBDumper 5 | from .wild_receipt_openset_dumper import WildreceiptOpensetDumper 6 | 7 | __all__ = [ 8 | 'BaseDumper', 'JsonDumper', 'WildreceiptOpensetDumper', 9 | 'TextRecogLMDBDumper' 10 | ] 11 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/dumpers/json_dumper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | from typing import Dict 4 | 5 | import mmengine 6 | 7 | from mmocr.registry import DATA_DUMPERS 8 | from .base import BaseDumper 9 | 10 | 11 | @DATA_DUMPERS.register_module() 12 | class JsonDumper(BaseDumper): 13 | """Dumper for json file.""" 14 | 15 | def dump(self, data: Dict) -> None: 16 | """Dump data to json file. 17 | 18 | Args: 19 | data (Dict): Data to be dumped. 20 | """ 21 | 22 | filename = f'{self.task}_{self.split}.json' 23 | dst_file = osp.join(self.data_root, filename) 24 | mmengine.dump(data, dst_file) 25 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/dumpers/wild_receipt_openset_dumper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | from typing import List 4 | 5 | from mmocr.registry import DATA_DUMPERS 6 | from mmocr.utils import list_to_file 7 | from .base import BaseDumper 8 | 9 | 10 | @DATA_DUMPERS.register_module() 11 | class WildreceiptOpensetDumper(BaseDumper): 12 | 13 | def dump(self, data: List): 14 | """Dump data to txt file. 15 | 16 | Args: 17 | data (List): Data to be dumped. 18 | """ 19 | 20 | filename = f'openset_{self.split}.txt' 21 | dst_file = osp.join(self.data_root, filename) 22 | list_to_file(dst_file, data) 23 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/gatherers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .base import BaseGatherer 4 | from .mono_gatherer import MonoGatherer 5 | from .naf_gatherer import NAFGatherer 6 | from .pair_gatherer import PairGatherer 7 | 8 | __all__ = ['BaseGatherer', 'MonoGatherer', 'PairGatherer', 'NAFGatherer'] 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/obtainers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .naive_data_obtainer import NaiveDataObtainer 3 | 4 | __all__ = ['NaiveDataObtainer'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/preparers/packers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BasePacker 3 | from .textdet_packer import TextDetPacker 4 | from .textrecog_packer import TextRecogCropPacker, TextRecogPacker 5 | from .textspotting_packer import TextSpottingPacker 6 | from .wildreceipt_packer import WildReceiptPacker 7 | 8 | __all__ = [ 9 | 'BasePacker', 'TextDetPacker', 'TextRecogPacker', 'TextRecogCropPacker', 10 | 'TextSpottingPacker', 'WildReceiptPacker' 11 | ] 12 | -------------------------------------------------------------------------------- /mmocr/mmocr/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .batch_aug import BatchAugSampler 3 | 4 | __all__ = ['BatchAugSampler'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .hooks import * # NOQA 3 | -------------------------------------------------------------------------------- /mmocr/mmocr/engine/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .visualization_hook import VisualizationHook 3 | 4 | __all__ = ['VisualizationHook'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluator import * # NOQA 3 | from .metrics import * # NOQA 4 | -------------------------------------------------------------------------------- /mmocr/mmocr/evaluation/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .multi_datasets_evaluator import MultiDatasetsEvaluator 3 | 4 | __all__ = ['MultiDatasetsEvaluator'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/evaluation/functional/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .hmean import compute_hmean 3 | 4 | __all__ = ['compute_hmean'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/evaluation/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .f_metric import F1Metric 3 | from .hmean_iou_metric import HmeanIOUMetric 4 | from .recog_metric import CharMetric, OneMinusNEDMetric, WordMetric 5 | 6 | __all__ = [ 7 | 'WordMetric', 'CharMetric', 'OneMinusNEDMetric', 'HmeanIOUMetric', 8 | 'F1Metric' 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .common import * # NOQA 3 | from .kie import * # NOQA 4 | from .textdet import * # NOQA 5 | from .textrecog import * # NOQA 6 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # NOQA 3 | from .dictionary import * # NOQA 4 | from .layers import * # NOQA 5 | from .losses import * # NOQA 6 | from .modules import * # NOQA 7 | from .plugins import * # NOQA 8 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .clip_resnet import CLIPResNet 3 | from .unet import UNet 4 | 5 | __all__ = ['UNet', 'CLIPResNet'] 6 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/dictionary/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .dictionary import Dictionary 4 | 5 | __all__ = ['Dictionary'] 6 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .transformer_layers import TFDecoderLayer, TFEncoderLayer 3 | 4 | __all__ = ['TFEncoderLayer', 'TFDecoderLayer'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bce_loss import (MaskedBalancedBCELoss, MaskedBalancedBCEWithLogitsLoss, 3 | MaskedBCELoss, MaskedBCEWithLogitsLoss) 4 | from .ce_loss import CrossEntropyLoss 5 | from .dice_loss import MaskedDiceLoss, MaskedSquareDiceLoss 6 | from .l1_loss import MaskedSmoothL1Loss, SmoothL1Loss 7 | 8 | __all__ = [ 9 | 'MaskedBalancedBCEWithLogitsLoss', 'MaskedDiceLoss', 'MaskedSmoothL1Loss', 10 | 'MaskedSquareDiceLoss', 'MaskedBCEWithLogitsLoss', 'SmoothL1Loss', 11 | 'CrossEntropyLoss', 'MaskedBalancedBCELoss', 'MaskedBCELoss' 12 | ] 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/losses/ce_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | from mmocr.registry import MODELS 5 | 6 | 7 | @MODELS.register_module() 8 | class CrossEntropyLoss(nn.CrossEntropyLoss): 9 | """Cross entropy loss.""" 10 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .transformer_module import (MultiHeadAttention, PositionalEncoding, 3 | PositionwiseFeedForward, 4 | ScaledDotProductAttention) 5 | 6 | __all__ = [ 7 | 'ScaledDotProductAttention', 'MultiHeadAttention', 8 | 'PositionwiseFeedForward', 'PositionalEncoding' 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/common/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .common import AvgPool2d 3 | 4 | __all__ = ['AvgPool2d'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/kie/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .extractors import * # NOQA 3 | from .heads import * # NOQA 4 | from .module_losses import * # NOQA 5 | from .postprocessors import * # NOQA 6 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/kie/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .sdmgr import SDMGR 3 | 4 | __all__ = ['SDMGR'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/kie/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .sdmgr_head import SDMGRHead 3 | 4 | __all__ = ['SDMGRHead'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/kie/module_losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .sdmgr_module_loss import SDMGRModuleLoss 3 | 4 | __all__ = ['SDMGRModuleLoss'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/kie/postprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .sdmgr_postprocessor import SDMGRPostProcessor 3 | 4 | __all__ = ['SDMGRPostProcessor'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .data_preprocessors import * # NOQA 3 | from .detectors import * # NOQA 4 | from .heads import * # NOQA 5 | from .module_losses import * # NOQA 6 | from .necks import * # NOQA 7 | from .postprocessors import * # NOQA 8 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/data_preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .data_preprocessor import TextDetDataPreprocessor 3 | 4 | __all__ = ['TextDetDataPreprocessor'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dbnet import DBNet 3 | from .drrg import DRRG 4 | from .fcenet import FCENet 5 | from .mmdet_wrapper import MMDetWrapper 6 | from .panet import PANet 7 | from .psenet import PSENet 8 | from .single_stage_text_detector import SingleStageTextDetector 9 | from .textsnake import TextSnake 10 | 11 | __all__ = [ 12 | 'SingleStageTextDetector', 'DBNet', 'PANet', 'PSENet', 'TextSnake', 13 | 'FCENet', 'DRRG', 'MMDetWrapper' 14 | ] 15 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/dbnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class DBNet(SingleStageTextDetector): 8 | """The class for implementing DBNet text detector: Real-time Scene Text 9 | Detection with Differentiable Binarization. 10 | 11 | [https://arxiv.org/abs/1911.08947]. 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/drrg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class DRRG(SingleStageTextDetector): 8 | """The class for implementing DRRG text detector. Deep Relational Reasoning 9 | Graph Network for Arbitrary Shape Text Detection. 10 | 11 | [https://arxiv.org/abs/2003.07493] 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/fcenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class FCENet(SingleStageTextDetector): 8 | """The class for implementing FCENet text detector 9 | FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text 10 | Detection 11 | 12 | [https://arxiv.org/abs/2104.10442] 13 | """ 14 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/panet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class PANet(SingleStageTextDetector): 8 | """The class for implementing PANet text detector: 9 | 10 | Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel 11 | Aggregation Network [https://arxiv.org/abs/1908.05900]. 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/psenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class PSENet(SingleStageTextDetector): 8 | """The class for implementing PSENet text detector: Shape Robust Text 9 | Detection with Progressive Scale Expansion Network. 10 | 11 | [https://arxiv.org/abs/1806.02559]. 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/detectors/textsnake.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .single_stage_text_detector import SingleStageTextDetector 4 | 5 | 6 | @MODELS.register_module() 7 | class TextSnake(SingleStageTextDetector): 8 | """The class for implementing TextSnake text detector: TextSnake: A 9 | Flexible Representation for Detecting Text of Arbitrary Shapes. 10 | 11 | [https://arxiv.org/abs/1807.01544] 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseTextDetHead 3 | from .db_head import DBHead 4 | from .drrg_head import DRRGHead 5 | from .fce_head import FCEHead 6 | from .pan_head import PANHead 7 | from .pse_head import PSEHead 8 | from .textsnake_head import TextSnakeHead 9 | 10 | __all__ = [ 11 | 'PSEHead', 'PANHead', 'DBHead', 'FCEHead', 'TextSnakeHead', 'DRRGHead', 12 | 'BaseTextDetHead' 13 | ] 14 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/module_losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .db_module_loss import DBModuleLoss 3 | from .drrg_module_loss import DRRGModuleLoss 4 | from .fce_module_loss import FCEModuleLoss 5 | from .pan_module_loss import PANModuleLoss 6 | from .pse_module_loss import PSEModuleLoss 7 | from .seg_based_module_loss import SegBasedModuleLoss 8 | from .textsnake_module_loss import TextSnakeModuleLoss 9 | 10 | __all__ = [ 11 | 'PANModuleLoss', 'PSEModuleLoss', 'DBModuleLoss', 'TextSnakeModuleLoss', 12 | 'FCEModuleLoss', 'DRRGModuleLoss', 'SegBasedModuleLoss' 13 | ] 14 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .fpem_ffm import FPEM_FFM 3 | from .fpn_cat import FPNC 4 | from .fpn_unet import FPN_UNet 5 | from .fpnf import FPNF 6 | 7 | __all__ = ['FPEM_FFM', 'FPNF', 'FPNC', 'FPN_UNet'] 8 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textdet/postprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseTextDetPostProcessor 3 | from .db_postprocessor import DBPostprocessor 4 | from .drrg_postprocessor import DRRGPostprocessor 5 | from .fce_postprocessor import FCEPostprocessor 6 | from .pan_postprocessor import PANPostprocessor 7 | from .pse_postprocessor import PSEPostprocessor 8 | from .textsnake_postprocessor import TextSnakePostprocessor 9 | 10 | __all__ = [ 11 | 'PSEPostprocessor', 'PANPostprocessor', 'DBPostprocessor', 12 | 'DRRGPostprocessor', 'FCEPostprocessor', 'TextSnakePostprocessor', 13 | 'BaseTextDetPostProcessor' 14 | ] 15 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # NOQA 3 | from .data_preprocessors import * # NOQA 4 | from .decoders import * # NOQA 5 | from .encoders import * # NOQA 6 | from .layers import * # NOQA 7 | from .module_losses import * # NOQA 8 | from .plugins import * # NOQA 9 | from .postprocessors import * # NOQA 10 | from .preprocessors import * # NOQA 11 | from .recognizers import * # NOQA 12 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .mini_vgg import MiniVGG 3 | from .mobilenet_v2 import MobileNetV2 4 | from .nrtr_modality_transformer import NRTRModalityTransform 5 | from .resnet import ResNet 6 | from .resnet31_ocr import ResNet31OCR 7 | from .resnet_abi import ResNetABI 8 | from .shallow_cnn import ShallowCNN 9 | 10 | __all__ = [ 11 | 'ResNet31OCR', 'MiniVGG', 'NRTRModalityTransform', 'ShallowCNN', 12 | 'ResNetABI', 'ResNet', 'MobileNetV2' 13 | ] 14 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/data_preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .data_preprocessor import TextRecogDataPreprocessor 3 | 4 | __all__ = ['TextRecogDataPreprocessor'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .abi_encoder import ABIEncoder 3 | from .aster_encoder import ASTEREncoder 4 | from .base import BaseEncoder 5 | from .channel_reduction_encoder import ChannelReductionEncoder 6 | from .nrtr_encoder import NRTREncoder 7 | from .sar_encoder import SAREncoder 8 | from .satrn_encoder import SATRNEncoder 9 | from .svtr_encoder import SVTREncoder 10 | 11 | __all__ = [ 12 | 'SAREncoder', 'NRTREncoder', 'BaseEncoder', 'ChannelReductionEncoder', 13 | 'SATRNEncoder', 'ABIEncoder', 'SVTREncoder', 'ASTEREncoder' 14 | ] 15 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/encoders/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmengine.model import BaseModule 3 | 4 | from mmocr.registry import MODELS 5 | 6 | 7 | @MODELS.register_module() 8 | class BaseEncoder(BaseModule): 9 | """Base Encoder class for text recognition.""" 10 | 11 | def forward(self, feat, **kwargs): 12 | return feat 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .conv_layer import BasicBlock, Bottleneck 3 | from .dot_product_attention_layer import DotProductAttentionLayer 4 | from .lstm_layer import BidirectionalLSTM 5 | from .position_aware_layer import PositionAwareLayer 6 | from .robust_scanner_fusion_layer import RobustScannerFusionLayer 7 | from .satrn_layers import Adaptive2DPositionalEncoding, SATRNEncoderLayer 8 | 9 | __all__ = [ 10 | 'BidirectionalLSTM', 'Adaptive2DPositionalEncoding', 'BasicBlock', 11 | 'Bottleneck', 'RobustScannerFusionLayer', 'DotProductAttentionLayer', 12 | 'PositionAwareLayer', 'SATRNEncoderLayer' 13 | ] 14 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/layers/lstm_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | 5 | class BidirectionalLSTM(nn.Module): 6 | 7 | def __init__(self, nIn, nHidden, nOut): 8 | super().__init__() 9 | 10 | self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) 11 | self.embedding = nn.Linear(nHidden * 2, nOut) 12 | 13 | def forward(self, input): 14 | recurrent, _ = self.rnn(input) 15 | T, b, h = recurrent.size() 16 | t_rec = recurrent.view(T * b, h) 17 | 18 | output = self.embedding(t_rec) # [T * b, nOut] 19 | output = output.view(T, b, -1) 20 | 21 | return output 22 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/layers/robust_scanner_fusion_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmengine.model import BaseModule 5 | 6 | 7 | class RobustScannerFusionLayer(BaseModule): 8 | 9 | def __init__(self, dim_model, dim=-1, init_cfg=None): 10 | super().__init__(init_cfg=init_cfg) 11 | 12 | self.dim_model = dim_model 13 | self.dim = dim 14 | 15 | self.linear_layer = nn.Linear(dim_model * 2, dim_model * 2) 16 | self.glu_layer = nn.GLU(dim=dim) 17 | 18 | def forward(self, x0, x1): 19 | assert x0.size() == x1.size() 20 | fusion_input = torch.cat([x0, x1], self.dim) 21 | output = self.linear_layer(fusion_input) 22 | output = self.glu_layer(output) 23 | 24 | return output 25 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/module_losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .abi_module_loss import ABIModuleLoss 3 | from .base import BaseTextRecogModuleLoss 4 | from .ce_module_loss import CEModuleLoss 5 | from .ctc_module_loss import CTCModuleLoss 6 | 7 | __all__ = [ 8 | 'BaseTextRecogModuleLoss', 'CEModuleLoss', 'CTCModuleLoss', 'ABIModuleLoss' 9 | ] 10 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .common import GCAModule, Maxpool2d 3 | 4 | __all__ = ['Maxpool2d', 'GCAModule'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/postprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .attn_postprocessor import AttentionPostprocessor 3 | from .base import BaseTextRecogPostprocessor 4 | from .ctc_postprocessor import CTCPostProcessor 5 | 6 | __all__ = [ 7 | 'BaseTextRecogPostprocessor', 'AttentionPostprocessor', 'CTCPostProcessor' 8 | ] 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .tps_preprocessor import STN, TPStransform 3 | 4 | __all__ = ['TPStransform', 'STN'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/preprocessors/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmengine.model import BaseModule 3 | 4 | from mmocr.registry import MODELS 5 | 6 | 7 | @MODELS.register_module() 8 | class BasePreprocessor(BaseModule): 9 | """Base Preprocessor class for text recognition.""" 10 | 11 | def forward(self, x, **kwargs): 12 | return x 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .abinet import ABINet 3 | from .aster import ASTER 4 | from .base import BaseRecognizer 5 | from .crnn import CRNN 6 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 7 | from .encoder_decoder_recognizer_tta import EncoderDecoderRecognizerTTAModel 8 | from .master import MASTER 9 | from .nrtr import NRTR 10 | from .robust_scanner import RobustScanner 11 | from .sar import SARNet 12 | from .satrn import SATRN 13 | from .svtr import SVTR 14 | 15 | __all__ = [ 16 | 'BaseRecognizer', 'EncoderDecoderRecognizer', 'CRNN', 'SARNet', 'NRTR', 17 | 'RobustScanner', 'SATRN', 'ABINet', 'MASTER', 'SVTR', 'ASTER', 18 | 'EncoderDecoderRecognizerTTAModel' 19 | ] 20 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/abinet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class ABINet(EncoderDecoderRecognizer): 8 | """Implementation of `Read Like Humans: Autonomous, Bidirectional and 9 | Iterative LanguageModeling for Scene Text Recognition. 10 | 11 | `_ 12 | """ 13 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/aster.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class ASTER(EncoderDecoderRecognizer): 8 | """Implement `ASTER: An Attentional Scene Text Recognizer with Flexible 9 | Rectification. 10 | 11 | `_""" 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/nrtr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class NRTR(EncoderDecoderRecognizer): 8 | """Implementation of `NRTR `_""" 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/robust_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class RobustScanner(EncoderDecoderRecognizer): 8 | """Implementation of `RobustScanner. 9 | 10 | 11 | """ 12 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/sar.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class SARNet(EncoderDecoderRecognizer): 8 | """Implementation of `SAR `_""" 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/satrn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class SATRN(EncoderDecoderRecognizer): 8 | """Implementation of `SATRN `_""" 9 | -------------------------------------------------------------------------------- /mmocr/mmocr/models/textrecog/recognizers/svtr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_recognizer import EncoderDecoderRecognizer 4 | 5 | 6 | @MODELS.register_module() 7 | class SVTR(EncoderDecoderRecognizer): 8 | """A PyTorch implementation of : `SVTR: Scene Text Recognition with a 9 | Single Visual Model `_""" 10 | -------------------------------------------------------------------------------- /mmocr/mmocr/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .kie_data_sample import KIEDataSample 3 | from .textdet_data_sample import TextDetDataSample 4 | from .textrecog_data_sample import TextRecogDataSample 5 | from .textspotting_data_sample import TextSpottingDataSample 6 | 7 | __all__ = [ 8 | 'TextDetDataSample', 'TextRecogDataSample', 'KIEDataSample', 9 | 'TextSpottingDataSample' 10 | ] 11 | -------------------------------------------------------------------------------- /mmocr/mmocr/testing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .data import create_dummy_dict_file, create_dummy_textdet_inputs 3 | 4 | __all__ = ['create_dummy_dict_file', 'create_dummy_textdet_inputs'] 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmengine.utils import get_git_hash 3 | from mmengine.utils.dl_utils import collect_env as collect_base_env 4 | 5 | import mmocr 6 | 7 | 8 | def collect_env(): 9 | """Collect the information of the running environments.""" 10 | env_info = collect_base_env() 11 | env_info['MMOCR'] = mmocr.__version__ + '+' + get_git_hash()[:7] 12 | return env_info 13 | 14 | 15 | if __name__ == '__main__': 16 | for name, val in collect_env().items(): 17 | print(f'{name}: {val}') 18 | -------------------------------------------------------------------------------- /mmocr/mmocr/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '1.0.0' 4 | short_version = __version__ 5 | -------------------------------------------------------------------------------- /mmocr/mmocr/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_visualizer import BaseLocalVisualizer 3 | from .kie_visualizer import KIELocalVisualizer 4 | from .textdet_visualizer import TextDetLocalVisualizer 5 | from .textrecog_visualizer import TextRecogLocalVisualizer 6 | from .textspotting_visualizer import TextSpottingLocalVisualizer 7 | 8 | __all__ = [ 9 | 'BaseLocalVisualizer', 'KIELocalVisualizer', 'TextDetLocalVisualizer', 10 | 'TextRecogLocalVisualizer', 'TextSpottingLocalVisualizer' 11 | ] 12 | -------------------------------------------------------------------------------- /mmocr/model-index.yml: -------------------------------------------------------------------------------- 1 | Import: 2 | - configs/textdet/dbnet/metafile.yml 3 | - configs/textdet/dbnetpp/metafile.yml 4 | - configs/textdet/maskrcnn/metafile.yml 5 | - configs/textdet/drrg/metafile.yml 6 | - configs/textdet/fcenet/metafile.yml 7 | - configs/textdet/panet/metafile.yml 8 | - configs/textdet/psenet/metafile.yml 9 | - configs/textdet/textsnake/metafile.yml 10 | - configs/textrecog/abinet/metafile.yml 11 | - configs/textrecog/aster/metafile.yml 12 | - configs/textrecog/crnn/metafile.yml 13 | - configs/textrecog/master/metafile.yml 14 | - configs/textrecog/nrtr/metafile.yml 15 | - configs/textrecog/svtr/metafile.yml 16 | - configs/textrecog/robust_scanner/metafile.yml 17 | - configs/textrecog/sar/metafile.yml 18 | - configs/textrecog/satrn/metafile.yml 19 | - configs/kie/sdmgr/metafile.yml 20 | -------------------------------------------------------------------------------- /mmocr/my_test.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python tools/test.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py output/new_10k_synthtext/epoch_1.pth --save-preds 2 | -------------------------------------------------------------------------------- /mmocr/my_train.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python tools/train.py configs/textdet/dbnet/synth_data_train_100k_ic15_test.py --work-dir output/new_SD_base_10000_curve --amp 2 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/abcnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | from .metric import * # NOQA 4 | from .model import * # NOQA 5 | from .utils import * # NOQA 6 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/abcnet/metric/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .e2e_hmean_iou_metric import E2EHmeanIOUMetric 3 | 4 | __all__ = ['E2EHmeanIOUMetric'] 5 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/abcnet/model/abcnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .two_stage_text_spotting import TwoStageTextSpotter 4 | 5 | 6 | @MODELS.register_module() 7 | class ABCNet(TwoStageTextSpotter): 8 | """CTC-loss based recognizer.""" 9 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/abcnet/model/abcnet_rec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.models.textrecog import EncoderDecoderRecognizer 3 | from mmocr.registry import MODELS 4 | 5 | 6 | @MODELS.register_module() 7 | class ABCNetRec(EncoderDecoderRecognizer): 8 | """CTC-loss based recognizer.""" 9 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/abcnet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bezier_utils import bezier2poly, poly2bezier 3 | 4 | __all__ = ['poly2bezier', 'bezier2poly'] 5 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/config/_base_/datasets/icdar2015.py: -------------------------------------------------------------------------------- 1 | icdar2015_textspotting_data_root = 'data/icdar2015' 2 | 3 | icdar2015_textspotting_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2015_textspotting_data_root, 6 | ann_file='textspotting_train.json', 7 | pipeline=None) 8 | 9 | icdar2015_textspotting_test = dict( 10 | type='OCRDataset', 11 | data_root=icdar2015_textspotting_data_root, 12 | ann_file='textspotting_test.json', 13 | test_mode=True, 14 | # indices=50, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/config/_base_/schedules/schedule_sgd_500e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optim_wrapper = dict( 3 | type='OptimWrapper', 4 | optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001), 5 | clip_grad=dict(type='value', clip_value=1)) 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=20) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | # learning policy 10 | param_scheduler = [ 11 | dict(type='LinearLR', end=1000, start_factor=0.001, by_epoch=False), 12 | ] 13 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/config/abcnet_v2/abcnet-v2_resnet50_bifpn_500e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_abcnet-v2_resnet50_bifpn.py', 3 | '../_base_/datasets/icdar2015.py', 4 | '../_base_/default_runtime.py', 5 | ] 6 | 7 | # dataset settings 8 | icdar2015_textspotting_test = _base_.icdar2015_textspotting_test 9 | icdar2015_textspotting_test.pipeline = _base_.test_pipeline 10 | 11 | val_dataloader = dict( 12 | batch_size=1, 13 | num_workers=4, 14 | persistent_workers=True, 15 | sampler=dict(type='DefaultSampler', shuffle=False), 16 | dataset=icdar2015_textspotting_test) 17 | 18 | test_dataloader = val_dataloader 19 | 20 | val_cfg = dict(type='ValLoop') 21 | test_cfg = dict(type='TestLoop') 22 | 23 | custom_imports = dict(imports=['abcnet'], allow_failed_imports=False) 24 | -------------------------------------------------------------------------------- /mmocr/projects/ABCNet/dicts/abcnet.txt: -------------------------------------------------------------------------------- 1 | 2 | ! 3 | " 4 | # 5 | $ 6 | % 7 | & 8 | ' 9 | ( 10 | ) 11 | * 12 | + 13 | , 14 | - 15 | . 16 | / 17 | 0 18 | 1 19 | 2 20 | 3 21 | 4 22 | 5 23 | 6 24 | 7 25 | 8 26 | 9 27 | : 28 | ; 29 | < 30 | = 31 | > 32 | ? 33 | @ 34 | A 35 | B 36 | C 37 | D 38 | E 39 | F 40 | G 41 | H 42 | I 43 | J 44 | K 45 | L 46 | M 47 | N 48 | O 49 | P 50 | Q 51 | R 52 | S 53 | T 54 | U 55 | V 56 | W 57 | X 58 | Y 59 | Z 60 | [ 61 | \ 62 | ] 63 | ^ 64 | _ 65 | ` 66 | a 67 | b 68 | c 69 | d 70 | e 71 | f 72 | g 73 | h 74 | i 75 | j 76 | k 77 | l 78 | m 79 | n 80 | o 81 | p 82 | q 83 | r 84 | s 85 | t 86 | u 87 | v 88 | w 89 | x 90 | y 91 | z 92 | { 93 | | 94 | } 95 | ~ -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/ctw1500-spts.py: -------------------------------------------------------------------------------- 1 | ctw1500_textspotting_data_root = 'data/CTW1500' 2 | 3 | ctw1500_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=ctw1500_textspotting_data_root, 6 | ann_file='annotations/train_ctw1500_maxlen25_v2.json', 7 | data_prefix=dict(img_path='ctwtrain_text_image/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | 11 | ctw1500_textspotting_test = dict( 12 | type='AdelDataset', 13 | data_root=ctw1500_textspotting_data_root, 14 | ann_file='annotations/test_ctw1500_maxlen25.json', 15 | data_prefix=dict(img_path='ctwtest_text_image/'), 16 | test_mode=True, 17 | pipeline=None) 18 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/icdar2013-spts.py: -------------------------------------------------------------------------------- 1 | icdar2013_textspotting_data_root = 'spts-data/icdar2013' 2 | 3 | icdar2013_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=icdar2013_textspotting_data_root, 6 | ann_file='ic13_train.json', 7 | data_prefix=dict(img_path='train_images/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | 11 | icdar2013_textspotting_test = dict( 12 | type='AdelDataset', 13 | data_root=icdar2013_textspotting_data_root, 14 | data_prefix=dict(img_path='test_images/'), 15 | ann_file='ic13_test.json', 16 | test_mode=True, 17 | pipeline=None) 18 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/icdar2013.py: -------------------------------------------------------------------------------- 1 | icdar2013_textspotting_data_root = 'data/icdar2013' 2 | 3 | icdar2013_textspotting_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2013_textspotting_data_root, 6 | ann_file='textspotting_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | icdar2013_textspotting_test = dict( 11 | type='OCRDataset', 12 | data_root=icdar2013_textspotting_data_root, 13 | ann_file='textspotting_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/icdar2015-spts.py: -------------------------------------------------------------------------------- 1 | icdar2015_textspotting_data_root = 'spts-data/icdar2015' 2 | 3 | icdar2015_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=icdar2015_textspotting_data_root, 6 | ann_file='ic15_train.json', 7 | data_prefix=dict(img_path='train_images/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | 11 | icdar2015_textspotting_test = dict( 12 | type='AdelDataset', 13 | data_root=icdar2015_textspotting_data_root, 14 | data_prefix=dict(img_path='test_images/'), 15 | ann_file='ic15_test.json', 16 | test_mode=True, 17 | pipeline=None) 18 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/icdar2015.py: -------------------------------------------------------------------------------- 1 | icdar2015_textspotting_data_root = 'data/icdar2015' 2 | 3 | icdar2015_textspotting_train = dict( 4 | type='OCRDataset', 5 | data_root=icdar2015_textspotting_data_root, 6 | ann_file='textspotting_train.json', 7 | pipeline=None) 8 | 9 | icdar2015_textspotting_test = dict( 10 | type='OCRDataset', 11 | data_root=icdar2015_textspotting_data_root, 12 | ann_file='textspotting_test.json', 13 | test_mode=True, 14 | pipeline=None) 15 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/mlt-spts.py: -------------------------------------------------------------------------------- 1 | mlt_textspotting_data_root = 'spts-data/mlt2017' 2 | 3 | mlt_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=mlt_textspotting_data_root, 6 | ann_file='train.json', 7 | data_prefix=dict(img_path='MLT_train_images/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/syntext1-spts.py: -------------------------------------------------------------------------------- 1 | syntext1_textspotting_data_root = 'spts-data/syntext1' 2 | 3 | syntext1_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=syntext1_textspotting_data_root, 6 | ann_file='train.json', 7 | data_prefix=dict(img_path='syntext_word_eng/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/syntext2-spts.py: -------------------------------------------------------------------------------- 1 | syntext2_textspotting_data_root = 'spts-data/syntext2' 2 | 3 | syntext2_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=syntext2_textspotting_data_root, 6 | ann_file='train.json', 7 | data_prefix=dict(img_path='emcs_imgs/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/totaltext-spts.py: -------------------------------------------------------------------------------- 1 | totaltext_textspotting_data_root = 'spts-data/totaltext' 2 | 3 | totaltext_textspotting_train = dict( 4 | type='AdelDataset', 5 | data_root=totaltext_textspotting_data_root, 6 | ann_file='train.json', 7 | data_prefix=dict(img_path='train_images/'), 8 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 9 | pipeline=None) 10 | 11 | totaltext_textspotting_test = dict( 12 | type='AdelDataset', 13 | data_root=totaltext_textspotting_data_root, 14 | ann_file='test.json', 15 | data_prefix=dict(img_path='test_images/'), 16 | test_mode=True, 17 | pipeline=None) 18 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/config/_base_/datasets/totaltext.py: -------------------------------------------------------------------------------- 1 | totaltext_textspotting_data_root = 'data/totaltext' 2 | 3 | totaltext_textspotting_train = dict( 4 | type='OCRDataset', 5 | data_root=totaltext_textspotting_data_root, 6 | ann_file='textspotting_train.json', 7 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 8 | pipeline=None) 9 | 10 | totaltext_textspotting_test = dict( 11 | type='OCRDataset', 12 | data_root=totaltext_textspotting_data_root, 13 | ann_file='textspotting_test.json', 14 | test_mode=True, 15 | pipeline=None) 16 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/dicts/spts.txt: -------------------------------------------------------------------------------- 1 | 2 | ! 3 | " 4 | # 5 | $ 6 | % 7 | & 8 | ' 9 | ( 10 | ) 11 | * 12 | + 13 | , 14 | - 15 | . 16 | / 17 | 0 18 | 1 19 | 2 20 | 3 21 | 4 22 | 5 23 | 6 24 | 7 25 | 8 26 | 9 27 | : 28 | ; 29 | < 30 | = 31 | > 32 | ? 33 | @ 34 | A 35 | B 36 | C 37 | D 38 | E 39 | F 40 | G 41 | H 42 | I 43 | J 44 | K 45 | L 46 | M 47 | N 48 | O 49 | P 50 | Q 51 | R 52 | S 53 | T 54 | U 55 | V 56 | W 57 | X 58 | Y 59 | Z 60 | [ 61 | \ 62 | ] 63 | ^ 64 | _ 65 | ` 66 | a 67 | b 68 | c 69 | d 70 | e 71 | f 72 | g 73 | h 74 | i 75 | j 76 | k 77 | l 78 | m 79 | n 80 | o 81 | p 82 | q 83 | r 84 | s 85 | t 86 | u 87 | v 88 | w 89 | x 90 | y 91 | z 92 | { 93 | | 94 | } 95 | ~ -------------------------------------------------------------------------------- /mmocr/projects/SPTS/spts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | from .datasets import * # NOQA 4 | from .metric import * # NOQA 5 | from .model import * # NOQA 6 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/spts/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .adel_dataset import AdelDataset 3 | from .transforms.spts_transforms import (Bezier2Polygon, ConvertText, 4 | LoadOCRAnnotationsWithBezier, 5 | Polygon2Bezier, RescaleToShortSide) 6 | 7 | __all__ = [ 8 | 'AdelDataset', 'LoadOCRAnnotationsWithBezier', 'Bezier2Polygon', 9 | 'Polygon2Bezier', 'ConvertText', 'RescaleToShortSide' 10 | ] 11 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/spts/metric/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .e2e_point_metric import E2EPointMetric 3 | 4 | __all__ = ['E2EPointMetric'] 5 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/spts/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .spts import SPTS 3 | from .spts_decoder import SPTSDecoder 4 | from .spts_dictionary import SPTSDictionary 5 | from .spts_encoder import SPTSEncoder 6 | from .spts_module_loss import SPTSModuleLoss 7 | from .spts_postprocessor import SPTSPostprocessor 8 | 9 | __all__ = [ 10 | 'SPTSEncoder', 'SPTSDecoder', 'SPTSPostprocessor', 'SPTS', 11 | 'SPTSDictionary', 'SPTSModuleLoss' 12 | ] 13 | -------------------------------------------------------------------------------- /mmocr/projects/SPTS/spts/model/spts.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmocr.registry import MODELS 3 | from .encoder_decoder_text_spotter import EncoderDecoderTextSpotter 4 | 5 | 6 | @MODELS.register_module() 7 | class SPTS(EncoderDecoderTextSpotter): 8 | """SPTS.""" 9 | -------------------------------------------------------------------------------- /mmocr/projects/example_project/configs/dbnet_dummy-resnet_fpnc_1200e_icdar2015.py: -------------------------------------------------------------------------------- 1 | _base_ = ['mmocr::textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py'] 2 | 3 | custom_imports = dict(imports=['dummy']) 4 | 5 | _base_.model.backbone.type = 'DummyResNet' 6 | -------------------------------------------------------------------------------- /mmocr/projects/example_project/dummy/__init__.py: -------------------------------------------------------------------------------- 1 | from .dummy_resnet import DummyResNet 2 | 3 | __all__ = ['DummyResNet'] 4 | -------------------------------------------------------------------------------- /mmocr/projects/example_project/dummy/dummy_resnet.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.backbones import ResNet 2 | 3 | from mmocr.registry import MODELS 4 | 5 | 6 | @MODELS.register_module() 7 | class DummyResNet(ResNet): 8 | """Implements a dummy ResNet wrapper for demonstration purpose. 9 | 10 | Args: 11 | **kwargs: All the arguments are passed to the parent class. 12 | """ 13 | 14 | def __init__(self, **kwargs) -> None: 15 | print('Hello world!') 16 | super().__init__(**kwargs) 17 | -------------------------------------------------------------------------------- /mmocr/projects/selected.txt: -------------------------------------------------------------------------------- 1 | projects/ABCNet/README.md 2 | projects/ABCNet/README_V2.md 3 | projects/SPTS/README.md 4 | -------------------------------------------------------------------------------- /mmocr/requirements/albu.txt: -------------------------------------------------------------------------------- 1 | albumentations>=1.1.0 --no-binary qudida,albumentations 2 | -------------------------------------------------------------------------------- /mmocr/requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmocr 2 | numpy 3 | pyclipper 4 | torch>=1.1 5 | -------------------------------------------------------------------------------- /mmocr/requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.16.0 2 | markdown>=3.4.0 3 | myst-parser 4 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 5 | sphinx==4.0.2 6 | sphinx-tabs 7 | sphinx_copybutton 8 | sphinx_markdown_tables>=0.0.16 9 | tabulate 10 | -------------------------------------------------------------------------------- /mmocr/requirements/mminstall.txt: -------------------------------------------------------------------------------- 1 | mmcv>=2.0.0rc4,<2.1.0 2 | mmdet>=3.0.0rc5,<3.1.0 3 | mmengine>=0.7.0, <1.0.0 4 | -------------------------------------------------------------------------------- /mmocr/requirements/optional.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/requirements/optional.txt -------------------------------------------------------------------------------- /mmocr/requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | imgaug 2 | kwarray 3 | lmdb 4 | matplotlib 5 | mmcv>=2.0.0rc1 6 | mmdet>=3.0.0rc0 7 | mmengine>=0.1.0 8 | pyclipper 9 | rapidfuzz>=2.0.0 10 | regex 11 | scikit-image 12 | scipy 13 | shapely 14 | titlecase 15 | torch 16 | torchvision 17 | -------------------------------------------------------------------------------- /mmocr/requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | imgaug 2 | lmdb 3 | matplotlib 4 | numpy 5 | opencv-python >=4.2.0.32, != 4.5.5.* # avoid Github security alert 6 | pyclipper 7 | pycocotools 8 | rapidfuzz>=2.0.0 9 | scikit-image 10 | -------------------------------------------------------------------------------- /mmocr/requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | lanms-neo==1.0.2 9 | parameterized 10 | pytest 11 | pytest-cov 12 | pytest-runner 13 | ubelt 14 | xdoctest >= 0.10.0 15 | yapf 16 | -------------------------------------------------------------------------------- /mmocr/resources/illustration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/illustration.jpg -------------------------------------------------------------------------------- /mmocr/resources/kie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/kie.jpg -------------------------------------------------------------------------------- /mmocr/resources/mmocr-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/mmocr-logo.png -------------------------------------------------------------------------------- /mmocr/resources/textdet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textdet.jpg -------------------------------------------------------------------------------- /mmocr/resources/textrecog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/textrecog.jpg -------------------------------------------------------------------------------- /mmocr/resources/verification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/mmocr/resources/verification.png -------------------------------------------------------------------------------- /mmocr/setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [yapf] 5 | based_on_style = pep8 6 | blank_line_before_nested_class_or_def = true 7 | split_before_expression_after_opening_paren = true 8 | split_penalty_import_names=0 9 | SPLIT_PENALTY_AFTER_OPENING_BRACKET=800 10 | 11 | [isort] 12 | line_length = 79 13 | multi_line_output = 0 14 | extra_standard_library = setuptools 15 | known_first_party = mmocr 16 | known_third_party = PIL,cv2,imgaug,lanms,lmdb,matplotlib,mmcv,mmdet,numpy,packaging,pyclipper,pytest,pytorch_sphinx_theme,rapidfuzz,requests,scipy,shapely,skimage,titlecase,torch,torchvision,ts,yaml,mmengine 17 | no_lines_before = STDLIB,LOCALFOLDER 18 | default_section = THIRDPARTY 19 | 20 | [style] 21 | BASED_ON_STYLE = pep8 22 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 23 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 24 | -------------------------------------------------------------------------------- /mmocr/tests/test_evaluation/test_functional/test_hmean.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | from mmocr.evaluation.functional import compute_hmean 5 | 6 | 7 | class TestHmean(TestCase): 8 | 9 | def test_compute_hmean(self): 10 | with self.assertRaises(AssertionError): 11 | compute_hmean(0, 0, 0.0, 0) 12 | with self.assertRaises(AssertionError): 13 | compute_hmean(0, 0, 0, 0.0) 14 | with self.assertRaises(AssertionError): 15 | compute_hmean([1], 0, 0, 0) 16 | with self.assertRaises(AssertionError): 17 | compute_hmean(0, [1], 0, 0) 18 | 19 | _, _, hmean = compute_hmean(2, 2, 2, 2) 20 | self.assertEqual(hmean, 1) 21 | 22 | _, _, hmean = compute_hmean(0, 0, 2, 2) 23 | self.assertEqual(hmean, 0) 24 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_common/test_modules/test_transformer_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.common.modules import PositionalEncoding 7 | 8 | 9 | class TestPositionalEncoding(TestCase): 10 | 11 | def test_forward(self): 12 | pos_encoder = PositionalEncoding() 13 | x = torch.rand(1, 30, 512) 14 | out = pos_encoder(x) 15 | assert out.size() == x.size() 16 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_common/test_plugins/test_avgpool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.common.plugins import AvgPool2d 7 | 8 | 9 | class TestAvgPool2d(TestCase): 10 | 11 | def setUp(self) -> None: 12 | self.img = torch.rand(1, 3, 32, 100) 13 | 14 | def test_avgpool2d(self): 15 | avgpool2d = AvgPool2d(kernel_size=2, stride=2) 16 | self.assertEqual(avgpool2d(self.img).shape, torch.Size([1, 3, 16, 50])) 17 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textdet/test_heads/test_pse_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textdet.heads import PSEHead 7 | 8 | 9 | class TestPSEHead(TestCase): 10 | 11 | def setUp(self): 12 | self.feature = torch.randn((2, 10, 40, 50)) 13 | 14 | def test_init(self): 15 | with self.assertRaises(TypeError): 16 | PSEHead(in_channels=1) 17 | 18 | with self.assertRaises(TypeError): 19 | PSEHead(out_channels='out') 20 | 21 | def test_forward(self): 22 | pse_head = PSEHead(in_channels=[10], hidden_dim=128, out_channel=7) 23 | results = pse_head(self.feature) 24 | self.assertEqual(results.shape, (2, 7, 40, 50)) 25 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textdet/test_heads/test_textsnake_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textdet.heads import TextSnakeHead 7 | 8 | 9 | class TestTextSnakeHead(TestCase): 10 | 11 | def test_init(self): 12 | with self.assertRaises(AssertionError): 13 | TextSnakeHead(in_channels='test') 14 | 15 | def test_forward(self): 16 | ts_head = TextSnakeHead(in_channels=10) 17 | data = torch.randn((2, 10, 40, 50)) 18 | results = ts_head(data, None) 19 | self.assertEqual(results.shape, (2, 5, 40, 50)) 20 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textdet/test_necks/test_fpnf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import unittest 3 | 4 | import torch 5 | from parameterized import parameterized 6 | 7 | from mmocr.models.textdet.necks import FPNF 8 | 9 | 10 | class TestFPNF(unittest.TestCase): 11 | 12 | def setUp(self): 13 | in_channels = [256, 512, 1024, 2048] 14 | size = [112, 56, 28, 14] 15 | inputs = [] 16 | for i in range(4): 17 | inputs.append(torch.rand(1, in_channels[i], size[i], size[i])) 18 | self.inputs = inputs 19 | 20 | @parameterized.expand([('concat'), ('add')]) 21 | def test_forward(self, fusion_type): 22 | fpnf = FPNF(fusion_type=fusion_type) 23 | outputs = fpnf.forward(self.inputs) 24 | self.assertListEqual(list(outputs.size()), [1, 256, 112, 112]) 25 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_backbones/test_mini_vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.backbones import MiniVGG 7 | 8 | 9 | class TestMiniVGG(TestCase): 10 | 11 | def test_forward(self): 12 | 13 | model = MiniVGG() 14 | model.init_weights() 15 | model.train() 16 | 17 | imgs = torch.randn(1, 3, 32, 160) 18 | feats = model(imgs) 19 | self.assertEqual(feats.shape, torch.Size([1, 512, 1, 41])) 20 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_backbones/test_mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.backbones import MobileNetV2 7 | 8 | 9 | class TestMobileNetV2(TestCase): 10 | 11 | def setUp(self) -> None: 12 | self.img = torch.rand(1, 3, 32, 160) 13 | 14 | def test_mobilenetv2(self): 15 | mobilenet_v2 = MobileNetV2() 16 | self.assertEqual( 17 | mobilenet_v2(self.img).shape, torch.Size([1, 1280, 1, 43])) 18 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_backbones/test_nrtr_modality_transformer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import unittest 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.backbones import NRTRModalityTransform 7 | 8 | 9 | class TestNRTRBackbone(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.img = torch.randn(2, 3, 32, 100) 13 | 14 | def test_encoder(self): 15 | nrtr_backbone = NRTRModalityTransform() 16 | nrtr_backbone.init_weights() 17 | nrtr_backbone.train() 18 | out_enc = nrtr_backbone(self.img) 19 | self.assertEqual(out_enc.shape, torch.Size([2, 512, 1, 25])) 20 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_backbones/test_shallow_cnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import unittest 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.backbones import ShallowCNN 7 | 8 | 9 | class TestShallowCNN(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.imgs = torch.randn(1, 1, 32, 100) 13 | 14 | def test_shallow_cnn(self): 15 | 16 | model = ShallowCNN() 17 | model.init_weights() 18 | model.train() 19 | 20 | feat = model(self.imgs) 21 | self.assertEqual(feat.shape, torch.Size([1, 512, 8, 25])) 22 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_encoders/test_abi_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.encoders.abi_encoder import ABIEncoder 7 | 8 | 9 | class TestABIEncoder(TestCase): 10 | 11 | def test_init(self): 12 | with self.assertRaises(AssertionError): 13 | ABIEncoder(d_model=512, n_head=10) 14 | 15 | def test_forward(self): 16 | model = ABIEncoder() 17 | x = torch.randn(10, 512, 8, 32) 18 | self.assertEqual(model(x, None).shape, torch.Size([10, 512, 8, 32])) 19 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_encoders/test_aster_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import unittest 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.encoders import ASTEREncoder 7 | 8 | 9 | class TestASTEREncoder(unittest.TestCase): 10 | 11 | def test_encoder(self): 12 | encoder = ASTEREncoder(10) 13 | feat = torch.randn(2, 10, 1, 25) 14 | out = encoder(feat) 15 | self.assertEqual(out.shape, torch.Size([2, 25, 10])) 16 | -------------------------------------------------------------------------------- /mmocr/tests/test_models/test_textrecog/test_plugins/test_maxpool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from unittest import TestCase 3 | 4 | import torch 5 | 6 | from mmocr.models.textrecog.plugins import Maxpool2d 7 | 8 | 9 | class TestMaxpool2d(TestCase): 10 | 11 | def setUp(self) -> None: 12 | self.img = torch.rand(1, 3, 32, 100) 13 | 14 | def test_maxpool2d(self): 15 | maxpool2d = Maxpool2d(kernel_size=2, stride=2) 16 | self.assertEqual(maxpool2d(self.img).shape, torch.Size([1, 3, 16, 50])) 17 | -------------------------------------------------------------------------------- /mmocr/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /mmocr/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | CONFIG=$1 5 | GPUS=$2 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/train.py \ 19 | $CONFIG \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /mmocr/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | CHECKPOINT=$4 10 | GPUS=${GPUS:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 23 | -------------------------------------------------------------------------------- /mmocr/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export MASTER_PORT=$((12000 + $RANDOM % 20000)) 3 | 4 | set -x 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | WORK_DIR=$4 10 | GPUS=${GPUS:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 13 | PY_ARGS=${@:5} 14 | SRUN_ARGS=${SRUN_ARGS:-""} 15 | 16 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 17 | srun -p ${PARTITION} \ 18 | --job-name=${JOB_NAME} \ 19 | --gres=gpu:${GPUS_PER_NODE} \ 20 | --ntasks=${GPUS} \ 21 | --ntasks-per-node=${GPUS_PER_NODE} \ 22 | --cpus-per-task=${CPUS_PER_TASK} \ 23 | --kill-on-bad-exit=1 \ 24 | ${SRUN_ARGS} \ 25 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 26 | -------------------------------------------------------------------------------- /textfussion/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include src/diffusers/utils/model_card_template.md 3 | -------------------------------------------------------------------------------- /textfussion/README.md: -------------------------------------------------------------------------------- 1 | 该项目主要基于diffusers==0.15.0.dev0框架,请依照requirements.txt进行环境的搭建 2 | 3 | 准备好文本图像训练集后,通过./my_inpainting/new_paradigm_train.sh脚本进行生成模型的训练 4 | 5 | 完成模型训练后,通过./my_inpainting/my_build_synth_data_baseline.py脚本,制作合成数据集 6 | -------------------------------------------------------------------------------- /textfussion/_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py 8 | nd="np" # nd may be np (numpy) 9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py 10 | 11 | 12 | [files] 13 | extend-exclude = ["_typos.toml"] 14 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from .rl import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/experimental/rl/__init__.py: -------------------------------------------------------------------------------- 1 | from .value_guided_sampling import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/audio_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .mel import Mel 2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline 3 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/audioldm/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | AudioLDMPipeline, 15 | ) 16 | else: 17 | from .pipeline_audioldm import AudioLDMPipeline 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddim import DDIMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddpm import DDPMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/dit/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dit import DiTPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_transformers_available 2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_latent_diffusion_uncond import LDMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/paint_by_example/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import is_torch_available, is_transformers_available 9 | 10 | 11 | if is_transformers_available() and is_torch_available(): 12 | from .image_encoder import PaintByExampleImageEncoder 13 | from .pipeline_paint_by_example import PaintByExamplePipeline 14 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_pndm import PNDMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/repaint/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_repaint import RePaintPipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 2 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/unclip/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline 14 | else: 15 | from .pipeline_unclip import UnCLIPPipeline 16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline 17 | from .text_proj import UnCLIPTextProjModel 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/pipelines/vq_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_torch_available, is_transformers_available 2 | 3 | 4 | if is_transformers_available() and is_torch_available(): 5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline 6 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/dummy_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class MidiProcessor(metaclass=DummyObject): 6 | _backends = ["note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["note_seq"]) 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/dummy_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class OnnxRuntimeModel(metaclass=DummyObject): 6 | _backends = ["onnx"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["onnx"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["onnx"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["onnx"]) 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class LMSDiscreteScheduler(metaclass=DummyObject): 6 | _backends = ["torch", "scipy"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "scipy"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "scipy"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "scipy"]) 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "transformers", "k_diffusion"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "transformers", "k_diffusion"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["transformers", "torch", "note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["transformers", "torch", "note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 18 | -------------------------------------------------------------------------------- /textfussion/build/lib/diffusers/utils/pil_utils.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import PIL.ImageOps 3 | from packaging import version 4 | 5 | 6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): 7 | PIL_INTERPOLATION = { 8 | "linear": PIL.Image.Resampling.BILINEAR, 9 | "bilinear": PIL.Image.Resampling.BILINEAR, 10 | "bicubic": PIL.Image.Resampling.BICUBIC, 11 | "lanczos": PIL.Image.Resampling.LANCZOS, 12 | "nearest": PIL.Image.Resampling.NEAREST, 13 | } 14 | else: 15 | PIL_INTERPOLATION = { 16 | "linear": PIL.Image.LINEAR, 17 | "bilinear": PIL.Image.BILINEAR, 18 | "bicubic": PIL.Image.BICUBIC, 19 | "lanczos": PIL.Image.LANCZOS, 20 | "nearest": PIL.Image.NEAREST, 21 | } 22 | -------------------------------------------------------------------------------- /textfussion/docs/source/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Diffusers installation 4 | ! pip install diffusers transformers datasets accelerate 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/diffusers.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] -------------------------------------------------------------------------------- /textfussion/docs/source/en/api/experimental/rl.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # TODO 14 | 15 | Coming soon! -------------------------------------------------------------------------------- /textfussion/docs/source/en/imgs/access_request.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/access_request.png -------------------------------------------------------------------------------- /textfussion/docs/source/en/imgs/diffusers_library.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/docs/source/en/imgs/diffusers_library.jpg -------------------------------------------------------------------------------- /textfussion/docs/source/en/using-diffusers/audio.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Using Diffusers for audio 14 | 15 | [`DanceDiffusionPipeline`] and [`AudioDiffusionPipeline`] can be used to generate 16 | audio rapidly! More coming soon! -------------------------------------------------------------------------------- /textfussion/docs/source/ko/in_translation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # 번역중 14 | 15 | 열심히 번역을 진행중입니다. 조금만 기다려주세요. 16 | 감사합니다! -------------------------------------------------------------------------------- /textfussion/examples/community/one_step_unet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import torch 3 | 4 | from diffusers import DiffusionPipeline 5 | 6 | 7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline): 8 | def __init__(self, unet, scheduler): 9 | super().__init__() 10 | 11 | self.register_modules(unet=unet, scheduler=scheduler) 12 | 13 | def __call__(self): 14 | image = torch.randn( 15 | (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 16 | ) 17 | timestep = 1 18 | 19 | model_output = self.unet(image, timestep).sample 20 | scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample 21 | 22 | result = scheduler_output - scheduler_output + torch.ones_like(scheduler_output) 23 | 24 | return result 25 | -------------------------------------------------------------------------------- /textfussion/examples/controlnet/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | datasets 7 | -------------------------------------------------------------------------------- /textfussion/examples/controlnet/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | datasets 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | Jinja2 10 | -------------------------------------------------------------------------------- /textfussion/examples/dreambooth/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /textfussion/examples/dreambooth/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | flax 3 | optax 4 | torch 5 | torchvision 6 | ftfy 7 | tensorboard 8 | Jinja2 9 | -------------------------------------------------------------------------------- /textfussion/examples/inference/image_to_image.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionImg2ImgPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /textfussion/examples/inference/inpainting.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionInpaintPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /textfussion/examples/instruct_pix2pix/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | datasets 5 | ftfy 6 | tensorboard -------------------------------------------------------------------------------- /textfussion/examples/research_projects/README.md: -------------------------------------------------------------------------------- 1 | # Research projects 2 | 3 | This folder contains various research projects using 🧨 Diffusers. 4 | They are not really maintained by the core maintainers of this library and often require a specific version of Diffusers that is indicated in the requirements file of each folder. 5 | Updating them to the most recent version of the library will require some work. 6 | 7 | To use any of them, just run the command 8 | 9 | ``` 10 | pip install -r requirements.txt 11 | ``` 12 | inside the folder of your choice. 13 | 14 | If you need help with any of those, please open an issue where you directly ping the author(s), as indicated at the top of the README of each folder. 15 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/colossalai/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from diffusers import StableDiffusionPipeline 4 | 5 | 6 | model_id = "path-to-your-trained-model" 7 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") 8 | 9 | prompt = "A photo of sks dog in a bucket" 10 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] 11 | 12 | image.save("dog-bucket.png") 13 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/colossalai/requirement.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | torch 3 | torchvision 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | transformers -------------------------------------------------------------------------------- /textfussion/examples/research_projects/dreambooth_inpaint/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.9.0 2 | accelerate 3 | torchvision 4 | transformers>=4.21.0 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/intel_opts/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.21.0 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | intel_extension_for_pytorch>=1.13 8 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/lora/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | datasets 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | git+https://github.com/huggingface/peft.git -------------------------------------------------------------------------------- /textfussion/examples/research_projects/mulit_token_textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/mulit_token_textual_inversion/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | flax 3 | optax 4 | torch 5 | torchvision 6 | ftfy 7 | tensorboard 8 | Jinja2 9 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/multi_subject_dreambooth/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 -------------------------------------------------------------------------------- /textfussion/examples/research_projects/onnxruntime/README.md: -------------------------------------------------------------------------------- 1 | ## Diffusers examples with ONNXRuntime optimizations 2 | 3 | **This research project is not actively maintained by the diffusers team. For any questions or comments, please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions.** 4 | 5 | This aims to provide diffusers examples with ONNXRuntime optimizations for training/fine-tuning unconditional image generation, text to image, and textual inversion. Please see individual directories for more details on how to run each task using ONNXRuntime. -------------------------------------------------------------------------------- /textfussion/examples/research_projects/onnxruntime/text_to_image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | datasets 5 | ftfy 6 | tensorboard 7 | modelcards 8 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/onnxruntime/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | modelcards 7 | -------------------------------------------------------------------------------- /textfussion/examples/research_projects/onnxruntime/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /textfussion/examples/text_to_image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | transformers>=4.25.1 3 | datasets 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /textfussion/examples/text_to_image/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | datasets 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | Jinja2 10 | -------------------------------------------------------------------------------- /textfussion/examples/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /textfussion/examples/textual_inversion/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | flax 3 | optax 4 | torch 5 | torchvision 6 | ftfy 7 | tensorboard 8 | Jinja2 9 | -------------------------------------------------------------------------------- /textfussion/examples/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__init__.py -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/batch_utils.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/crop_tools.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/glyph_utils.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/build_synth_data/__pycache__/rec_inferencer.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__init__.py -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/base_text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/batch_utils.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/crop_image_for_test.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/new_paradigm_text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/text_mapper.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_pure_text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_text_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/dataset/__pycache__/zoom_up_with_blank_text_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/engines/__init__.py -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__init__.py -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_char_embedding.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_fussion_TE.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/adapter_with_pre_prompt.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/attention.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/char_encoder.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/dual_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/fussion_text_embedding.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/modules.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_pre_prompt.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/only_prefix_prompt.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/openaimodel.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/transformer_2d_with_dual_text_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_adapter.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_blocks_with_dual_text_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-38.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_adapter.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/unet_2d_with_dual_text_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/models/__pycache__/union_net.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__init__.py -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/new_paradigm_inpainting_dual_text_encoder.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_dual_text_full_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_full_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_mask_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_controlnet.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_pre_prompt.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_only_prefix_prompt.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_glyph.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_text_vae_text_glyph.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-310.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_with_fussion_te.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_adapter_zero_prompt.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/pipelines/__pycache__/stable_diffusion_inpainting_with_char_adapter.cpython-39.pyc -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/utils/ori.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/ori.png -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/utils/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/output.png -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/utils/res.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res.png -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/utils/res_area.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_area.png -------------------------------------------------------------------------------- /textfussion/my_inpainting/src/utils/res_trilinear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/my_inpainting/src/utils/res_trilinear.png -------------------------------------------------------------------------------- /textfussion/my_inpainting/train_vae.sh: -------------------------------------------------------------------------------- 1 | export MODEL_NAME="stabilityai/stable-diffusion-2-inpainting" 2 | export OUTPUT_DIR="output/pretrain_8702_text_vae" 3 | 4 | NCCL_P2P_DISABLE=1 accelerate launch train_vae.py \ 5 | --pretrained_model_name_or_path=$MODEL_NAME \ 6 | --output_dir=$OUTPUT_DIR \ 7 | --resolution=512 \ 8 | --train_batch_size=4 \ 9 | --gradient_accumulation_steps=1 \ 10 | --gradient_checkpointing \ 11 | --learning_rate=5e-6 \ 12 | --num_train_epochs=3 \ 13 | --lr_scheduler="constant" \ 14 | --lr_warmup_steps=3000 \ 15 | --dataloader_num_workers=8 \ 16 | --mixed_precision=fp16 \ 17 | -------------------------------------------------------------------------------- /textfussion/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py37'] 4 | 5 | [tool.ruff] 6 | # Never enforce `E501` (line length violations). 7 | ignore = ["C901", "E501", "E741", "W605"] 8 | select = ["C", "E", "F", "I", "W"] 9 | line-length = 119 10 | 11 | # Ignore import violations in all `__init__.py` files. 12 | [tool.ruff.per-file-ignores] 13 | "__init__.py" = ["E402", "F401", "F403", "F811"] 14 | "src/diffusers/utils/dummy_*.py" = ["F401"] 15 | 16 | [tool.ruff.isort] 17 | lines-after-imports = 2 18 | known-first-party = ["diffusers"] 19 | -------------------------------------------------------------------------------- /textfussion/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/99Franklin/DiffText/6c16119cf08b1d23e71da55e93efa69e4a61384d/textfussion/scripts/__init__.py -------------------------------------------------------------------------------- /textfussion/setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = accelerate 7 | known_third_party = 8 | numpy 9 | torch 10 | torch_xla 11 | 12 | line_length = 119 13 | lines_after_imports = 2 14 | multi_line_output = 3 15 | use_parentheses = True 16 | 17 | [flake8] 18 | ignore = E203, E722, E501, E741, W503, W605 19 | max-line-length = 119 20 | per-file-ignores = __init__.py:F401 21 | -------------------------------------------------------------------------------- /textfussion/src/diffusers.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers.egg-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [console_scripts] 2 | diffusers-cli = diffusers.commands.diffusers_cli:main 3 | -------------------------------------------------------------------------------- /textfussion/src/diffusers.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/experimental/README.md: -------------------------------------------------------------------------------- 1 | # 🧨 Diffusers Experimental 2 | 3 | We are adding experimental code to support novel applications and usages of the Diffusers library. 4 | Currently, the following experiments are supported: 5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model. -------------------------------------------------------------------------------- /textfussion/src/diffusers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from .rl import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/experimental/rl/__init__.py: -------------------------------------------------------------------------------- 1 | from .value_guided_sampling import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models). -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/audio_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .mel import Mel 2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline 3 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/audioldm/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | AudioLDMPipeline, 15 | ) 16 | else: 17 | from .pipeline_audioldm import AudioLDMPipeline 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddim import DDIMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddpm import DDPMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/dit/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dit import DiTPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_transformers_available 2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_latent_diffusion_uncond import LDMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/paint_by_example/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import is_torch_available, is_transformers_available 9 | 10 | 11 | if is_transformers_available() and is_torch_available(): 12 | from .image_encoder import PaintByExampleImageEncoder 13 | from .pipeline_paint_by_example import PaintByExamplePipeline 14 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_pndm import PNDMPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/repaint/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_repaint import RePaintPipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 2 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/unclip/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline 14 | else: 15 | from .pipeline_unclip import UnCLIPPipeline 16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline 17 | from .text_proj import UnCLIPTextProjModel 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/pipelines/vq_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_torch_available, is_transformers_available 2 | 3 | 4 | if is_transformers_available() and is_torch_available(): 5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline 6 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/schedulers/README.md: -------------------------------------------------------------------------------- 1 | # Schedulers 2 | 3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview). -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/dummy_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class MidiProcessor(metaclass=DummyObject): 6 | _backends = ["note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["note_seq"]) 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/dummy_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class OnnxRuntimeModel(metaclass=DummyObject): 6 | _backends = ["onnx"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["onnx"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["onnx"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["onnx"]) 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class LMSDiscreteScheduler(metaclass=DummyObject): 6 | _backends = ["torch", "scipy"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "scipy"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "scipy"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "scipy"]) 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "transformers", "k_diffusion"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "transformers", "k_diffusion"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["transformers", "torch", "note_seq"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["transformers", "torch", "note_seq"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["transformers", "torch", "note_seq"]) 18 | -------------------------------------------------------------------------------- /textfussion/src/diffusers/utils/pil_utils.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import PIL.ImageOps 3 | from packaging import version 4 | 5 | 6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): 7 | PIL_INTERPOLATION = { 8 | "linear": PIL.Image.Resampling.BILINEAR, 9 | "bilinear": PIL.Image.Resampling.BILINEAR, 10 | "bicubic": PIL.Image.Resampling.BICUBIC, 11 | "lanczos": PIL.Image.Resampling.LANCZOS, 12 | "nearest": PIL.Image.Resampling.NEAREST, 13 | } 14 | else: 15 | PIL_INTERPOLATION = { 16 | "linear": PIL.Image.LINEAR, 17 | "bilinear": PIL.Image.BILINEAR, 18 | "bicubic": PIL.Image.BICUBIC, 19 | "lanczos": PIL.Image.LANCZOS, 20 | "nearest": PIL.Image.NEAREST, 21 | } 22 | -------------------------------------------------------------------------------- /textfussion/tests: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------