├── .gitignore ├── PDF_parsing ├── GOT │ ├── GOT.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ ├── requires.txt │ │ └── top_level.txt │ ├── GOT │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── base_dataset.py │ │ │ └── conversation_dataset_qwen.py │ │ ├── demo │ │ │ ├── __pycache__ │ │ │ │ └── process_results.cpython-310.pyc │ │ │ ├── process_results.py │ │ │ ├── run_ocr_2.0.py │ │ │ └── run_ocr_2.0_crop.py │ │ ├── eval │ │ │ ├── eval_GOT_ocr.py │ │ │ ├── evaluate_GOT.py │ │ │ ├── multi_hardware_eval_GOT.py │ │ │ └── pyevaltools │ │ │ │ ├── __init__.py │ │ │ │ ├── eval_ocr.py │ │ │ │ ├── eval_ocr_format.py │ │ │ │ ├── eval_ocr_scene.py │ │ │ │ └── merge_results.py │ │ ├── model │ │ │ ├── GOT_ocr_2_0.py │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── GOT_ocr_2_0.cpython-310.pyc │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── modeling_GOT_YiJiang.cpython-310.pyc │ │ │ │ └── modeling_GOT_vllm.cpython-310.pyc │ │ │ ├── modeling_GOT_vllm.py │ │ │ ├── plug │ │ │ │ ├── __pycache__ │ │ │ │ │ └── blip_process.cpython-310.pyc │ │ │ │ └── blip_process.py │ │ │ └── vision_encoder │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── vary_b.cpython-310.pyc │ │ │ │ └── vary_b.py │ │ ├── train │ │ │ ├── train.py │ │ │ ├── train_GOT.py │ │ │ ├── train_flash_attn.py │ │ │ ├── train_lora.py │ │ │ ├── train_lora_flash_attn.py │ │ │ ├── trainer.py │ │ │ ├── trainer_llm_llrd.py │ │ │ ├── trainer_vit_fixlr.py │ │ │ └── trainer_vit_llrd.py │ │ └── utils │ │ │ ├── __pycache__ │ │ │ ├── constants.cpython-310.pyc │ │ │ ├── conversation.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ │ ├── arguments.py │ │ │ ├── constants.py │ │ │ ├── conversation.py │ │ │ └── utils.py │ ├── pyproject.toml │ ├── pyvenv.cfg │ └── zero_config │ │ ├── zero2.json │ │ └── zero3.json ├── MinerU │ ├── Dockerfile │ ├── LICENSE.md │ ├── MinerU_CLA.md │ ├── README.md │ ├── README_ja-JP.md │ ├── README_zh-CN.md │ ├── app.py │ ├── build │ │ └── lib │ │ │ └── magic_pdf │ │ │ ├── __init__.py │ │ │ ├── dict2md │ │ │ ├── __init__.py │ │ │ ├── mkcontent.py │ │ │ └── ocr_mkcontent.py │ │ │ ├── filter │ │ │ ├── __init__.py │ │ │ ├── pdf_classify_by_type.py │ │ │ └── pdf_meta_scan.py │ │ │ ├── integrations │ │ │ ├── __init__.py │ │ │ └── rag │ │ │ │ ├── __init__.py │ │ │ │ ├── api.py │ │ │ │ ├── type.py │ │ │ │ └── utils.py │ │ │ ├── layout │ │ │ ├── __init__.py │ │ │ ├── bbox_sort.py │ │ │ ├── layout_det_utils.py │ │ │ ├── layout_sort.py │ │ │ ├── layout_spiler_recog.py │ │ │ └── mcol_sort.py │ │ │ ├── libs │ │ │ ├── Constants.py │ │ │ ├── MakeContentConfig.py │ │ │ ├── ModelBlockTypeEnum.py │ │ │ ├── __init__.py │ │ │ ├── boxbase.py │ │ │ ├── calc_span_stats.py │ │ │ ├── commons.py │ │ │ ├── config_reader.py │ │ │ ├── convert_utils.py │ │ │ ├── coordinate_transform.py │ │ │ ├── detect_language_from_model.py │ │ │ ├── draw_bbox.py │ │ │ ├── drop_reason.py │ │ │ ├── drop_tag.py │ │ │ ├── hash_utils.py │ │ │ ├── json_compressor.py │ │ │ ├── language.py │ │ │ ├── local_math.py │ │ │ ├── markdown_utils.py │ │ │ ├── nlp_utils.py │ │ │ ├── ocr_content_type.py │ │ │ ├── path_utils.py │ │ │ ├── pdf_check.py │ │ │ ├── pdf_image_tools.py │ │ │ ├── safe_filename.py │ │ │ ├── textbase.py │ │ │ ├── version.py │ │ │ └── vis_utils.py │ │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── doc_analyze_by_custom_model.py │ │ │ ├── magic_model.py │ │ │ ├── model_list.py │ │ │ ├── pdf_extract_kit.py │ │ │ ├── pek_sub_modules │ │ │ │ ├── __init__.py │ │ │ │ ├── layoutlmv3 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── backbone.py │ │ │ │ │ ├── beit.py │ │ │ │ │ ├── deit.py │ │ │ │ │ ├── layoutlmft │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── data │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── cord.py │ │ │ │ │ │ │ ├── data_collator.py │ │ │ │ │ │ │ ├── funsd.py │ │ │ │ │ │ │ ├── image_utils.py │ │ │ │ │ │ │ └── xfund.py │ │ │ │ │ │ └── models │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ └── layoutlmv3 │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── configuration_layoutlmv3.py │ │ │ │ │ │ │ ├── modeling_layoutlmv3.py │ │ │ │ │ │ │ ├── tokenization_layoutlmv3.py │ │ │ │ │ │ │ └── tokenization_layoutlmv3_fast.py │ │ │ │ │ ├── model_init.py │ │ │ │ │ ├── rcnn_vl.py │ │ │ │ │ └── visualizer.py │ │ │ │ ├── post_process.py │ │ │ │ ├── self_modify.py │ │ │ │ └── structeqtable │ │ │ │ │ ├── StructTableModel.py │ │ │ │ │ └── __init__.py │ │ │ ├── ppTableModel.py │ │ │ └── pp_structure_v2.py │ │ │ ├── para │ │ │ ├── __init__.py │ │ │ ├── block_continuation_processor.py │ │ │ ├── block_termination_processor.py │ │ │ ├── commons.py │ │ │ ├── denoise.py │ │ │ ├── draw.py │ │ │ ├── exceptions.py │ │ │ ├── layout_match_processor.py │ │ │ ├── para_pipeline.py │ │ │ ├── para_split.py │ │ │ ├── para_split_v2.py │ │ │ ├── raw_processor.py │ │ │ ├── stats.py │ │ │ └── title_processor.py │ │ │ ├── pdf_parse_by_ocr.py │ │ │ ├── pdf_parse_by_txt.py │ │ │ ├── pdf_parse_union_core.py │ │ │ ├── pipe │ │ │ ├── AbsPipe.py │ │ │ ├── OCRPipe.py │ │ │ ├── TXTPipe.py │ │ │ ├── UNIPipe.py │ │ │ └── __init__.py │ │ │ ├── post_proc │ │ │ ├── __init__.py │ │ │ ├── detect_para.py │ │ │ ├── pdf_post_filter.py │ │ │ └── remove_footnote.py │ │ │ ├── pre_proc │ │ │ ├── __init__.py │ │ │ ├── citationmarker_remove.py │ │ │ ├── construct_page_dict.py │ │ │ ├── cut_image.py │ │ │ ├── detect_equation.py │ │ │ ├── detect_footer_by_model.py │ │ │ ├── detect_footer_header_by_statistics.py │ │ │ ├── detect_footnote.py │ │ │ ├── detect_header.py │ │ │ ├── detect_images.py │ │ │ ├── detect_page_number.py │ │ │ ├── detect_tables.py │ │ │ ├── equations_replace.py │ │ │ ├── fix_image.py │ │ │ ├── fix_table.py │ │ │ ├── main_text_font.py │ │ │ ├── ocr_detect_all_bboxes.py │ │ │ ├── ocr_detect_layout.py │ │ │ ├── ocr_dict_merge.py │ │ │ ├── ocr_span_list_modify.py │ │ │ ├── pdf_pre_filter.py │ │ │ ├── post_layout_split.py │ │ │ ├── remove_bbox_overlap.py │ │ │ ├── remove_colored_strip_bbox.py │ │ │ ├── remove_footer_header.py │ │ │ ├── remove_rotate_bbox.py │ │ │ ├── resolve_bbox_conflict.py │ │ │ ├── solve_line_alien.py │ │ │ └── statistics.py │ │ │ ├── resources │ │ │ ├── fasttext-langdetect │ │ │ │ └── lid.176.ftz │ │ │ └── model_config │ │ │ │ ├── UniMERNet │ │ │ │ └── demo.yaml │ │ │ │ ├── layoutlmv3 │ │ │ │ └── layoutlmv3_base_inference.yaml │ │ │ │ └── model_configs.yaml │ │ │ ├── rw │ │ │ ├── AbsReaderWriter.py │ │ │ ├── DiskReaderWriter.py │ │ │ ├── S3ReaderWriter.py │ │ │ └── __init__.py │ │ │ ├── spark │ │ │ ├── __init__.py │ │ │ └── spark_api.py │ │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── cli_dev.py │ │ │ └── common.py │ │ │ └── user_api.py │ ├── demo │ │ ├── demo.py │ │ ├── demo1.json │ │ ├── demo1.pdf │ │ ├── demo2.json │ │ ├── demo2.pdf │ │ ├── magic_pdf_parse_main.py │ │ └── small_ocr.pdf │ ├── docs │ │ ├── FAQ_en_us.md │ │ ├── FAQ_zh_cn.md │ │ ├── README_Ubuntu_CUDA_Acceleration_en_US.md │ │ ├── README_Ubuntu_CUDA_Acceleration_zh_CN.md │ │ ├── README_Windows_CUDA_Acceleration_en_US.md │ │ ├── README_Windows_CUDA_Acceleration_zh_CN.md │ │ ├── chemical_knowledge_introduction │ │ │ ├── introduction.pdf │ │ │ └── introduction.xmind │ │ ├── download_models.py │ │ ├── download_models_hf.py │ │ ├── how_to_download_models_en.md │ │ ├── how_to_download_models_zh_cn.md │ │ ├── images │ │ │ ├── MinerU-logo-hq.png │ │ │ ├── MinerU-logo.png │ │ │ ├── datalab_logo.png │ │ │ ├── flowchart_en.png │ │ │ ├── flowchart_zh_cn.png │ │ │ ├── layout_example.png │ │ │ ├── poly.png │ │ │ ├── project_panorama_en.png │ │ │ ├── project_panorama_zh_cn.png │ │ │ └── spans_example.png │ │ ├── output_file_en_us.md │ │ └── output_file_zh_cn.md │ ├── magic-pdf.template.json │ ├── magic_pdf.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ ├── entry_points.txt │ │ ├── not-zip-safe │ │ ├── requires.txt │ │ └── top_level.txt │ ├── magic_pdf │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── pdf_parse_by_ocr.cpython-310.pyc │ │ │ ├── pdf_parse_by_txt.cpython-310.pyc │ │ │ ├── pdf_parse_union_core.cpython-310.pyc │ │ │ └── user_api.cpython-310.pyc │ │ ├── dict2md │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── ocr_mkcontent.cpython-310.pyc │ │ │ ├── mkcontent.py │ │ │ └── ocr_mkcontent.py │ │ ├── filter │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── pdf_classify_by_type.cpython-310.pyc │ │ │ │ └── pdf_meta_scan.cpython-310.pyc │ │ │ ├── pdf_classify_by_type.py │ │ │ └── pdf_meta_scan.py │ │ ├── integrations │ │ │ ├── __init__.py │ │ │ └── rag │ │ │ │ ├── __init__.py │ │ │ │ ├── api.py │ │ │ │ ├── type.py │ │ │ │ └── utils.py │ │ ├── layout │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── bbox_sort.cpython-310.pyc │ │ │ │ ├── layout_det_utils.cpython-310.pyc │ │ │ │ ├── layout_sort.cpython-310.pyc │ │ │ │ └── layout_spiler_recog.cpython-310.pyc │ │ │ ├── bbox_sort.py │ │ │ ├── layout_det_utils.py │ │ │ ├── layout_sort.py │ │ │ ├── layout_spiler_recog.py │ │ │ └── mcol_sort.py │ │ ├── libs │ │ │ ├── Constants.py │ │ │ ├── MakeContentConfig.py │ │ │ ├── ModelBlockTypeEnum.py │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── Constants.cpython-310.pyc │ │ │ │ ├── MakeContentConfig.cpython-310.pyc │ │ │ │ ├── ModelBlockTypeEnum.cpython-310.pyc │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── boxbase.cpython-310.pyc │ │ │ │ ├── commons.cpython-310.pyc │ │ │ │ ├── config_reader.cpython-310.pyc │ │ │ │ ├── convert_utils.cpython-310.pyc │ │ │ │ ├── coordinate_transform.cpython-310.pyc │ │ │ │ ├── drop_reason.cpython-310.pyc │ │ │ │ ├── drop_tag.cpython-310.pyc │ │ │ │ ├── hash_utils.cpython-310.pyc │ │ │ │ ├── json_compressor.cpython-310.pyc │ │ │ │ ├── language.cpython-310.pyc │ │ │ │ ├── local_math.cpython-310.pyc │ │ │ │ ├── markdown_utils.cpython-310.pyc │ │ │ │ ├── ocr_content_type.cpython-310.pyc │ │ │ │ ├── pdf_check.cpython-310.pyc │ │ │ │ ├── pdf_image_tools.cpython-310.pyc │ │ │ │ └── version.cpython-310.pyc │ │ │ ├── boxbase.py │ │ │ ├── calc_span_stats.py │ │ │ ├── commons.py │ │ │ ├── config_reader.py │ │ │ ├── convert_utils.py │ │ │ ├── coordinate_transform.py │ │ │ ├── detect_language_from_model.py │ │ │ ├── draw_bbox.py │ │ │ ├── drop_reason.py │ │ │ ├── drop_tag.py │ │ │ ├── hash_utils.py │ │ │ ├── json_compressor.py │ │ │ ├── language.py │ │ │ ├── local_math.py │ │ │ ├── markdown_utils.py │ │ │ ├── nlp_utils.py │ │ │ ├── ocr_content_type.py │ │ │ ├── path_utils.py │ │ │ ├── pdf_check.py │ │ │ ├── pdf_image_tools.py │ │ │ ├── safe_filename.py │ │ │ ├── textbase.py │ │ │ ├── version.py │ │ │ └── vis_utils.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── doc_analyze_by_custom_model.cpython-310.pyc │ │ │ │ ├── magic_model.cpython-310.pyc │ │ │ │ ├── model_list.cpython-310.pyc │ │ │ │ ├── pdf_extract_kit.cpython-310.pyc │ │ │ │ └── ppTableModel.cpython-310.pyc │ │ │ ├── doc_analyze_by_custom_model.py │ │ │ ├── magic_model.py │ │ │ ├── model_list.py │ │ │ ├── pdf_extract_kit.py │ │ │ ├── pek_sub_modules │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── post_process.cpython-310.pyc │ │ │ │ │ └── self_modify.cpython-310.pyc │ │ │ │ ├── layoutlmv3 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── backbone.cpython-310.pyc │ │ │ │ │ │ ├── beit.cpython-310.pyc │ │ │ │ │ │ ├── deit.cpython-310.pyc │ │ │ │ │ │ ├── model_init.cpython-310.pyc │ │ │ │ │ │ ├── rcnn_vl.cpython-310.pyc │ │ │ │ │ │ └── visualizer.cpython-310.pyc │ │ │ │ │ ├── backbone.py │ │ │ │ │ ├── beit.py │ │ │ │ │ ├── deit.py │ │ │ │ │ ├── layoutlmft │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ │ └── __init__.cpython-310.pyc │ │ │ │ │ │ ├── data │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── cord.py │ │ │ │ │ │ │ ├── data_collator.py │ │ │ │ │ │ │ ├── funsd.py │ │ │ │ │ │ │ ├── image_utils.py │ │ │ │ │ │ │ └── xfund.py │ │ │ │ │ │ └── models │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ │ └── __init__.cpython-310.pyc │ │ │ │ │ │ │ └── layoutlmv3 │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ │ ├── configuration_layoutlmv3.cpython-310.pyc │ │ │ │ │ │ │ ├── modeling_layoutlmv3.cpython-310.pyc │ │ │ │ │ │ │ ├── tokenization_layoutlmv3.cpython-310.pyc │ │ │ │ │ │ │ └── tokenization_layoutlmv3_fast.cpython-310.pyc │ │ │ │ │ │ │ ├── configuration_layoutlmv3.py │ │ │ │ │ │ │ ├── modeling_layoutlmv3.py │ │ │ │ │ │ │ ├── tokenization_layoutlmv3.py │ │ │ │ │ │ │ └── tokenization_layoutlmv3_fast.py │ │ │ │ │ ├── model_init.py │ │ │ │ │ ├── rcnn_vl.py │ │ │ │ │ └── visualizer.py │ │ │ │ ├── post_process.py │ │ │ │ ├── self_modify.py │ │ │ │ └── structeqtable │ │ │ │ │ ├── StructTableModel.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── __pycache__ │ │ │ │ │ ├── StructTableModel.cpython-310.pyc │ │ │ │ │ └── __init__.cpython-310.pyc │ │ │ ├── ppTableModel.py │ │ │ └── pp_structure_v2.py │ │ ├── para │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── para_split_v2.cpython-310.pyc │ │ │ ├── block_continuation_processor.py │ │ │ ├── block_termination_processor.py │ │ │ ├── commons.py │ │ │ ├── denoise.py │ │ │ ├── draw.py │ │ │ ├── exceptions.py │ │ │ ├── layout_match_processor.py │ │ │ ├── para_pipeline.py │ │ │ ├── para_split.py │ │ │ ├── para_split_v2.py │ │ │ ├── raw_processor.py │ │ │ ├── stats.py │ │ │ └── title_processor.py │ │ ├── pdf_parse_by_ocr.py │ │ ├── pdf_parse_by_txt.py │ │ ├── pdf_parse_union_core.py │ │ ├── pipe │ │ │ ├── AbsPipe.py │ │ │ ├── OCRPipe.py │ │ │ ├── TXTPipe.py │ │ │ ├── UNIPipe.py │ │ │ ├── __init__.py │ │ │ └── __pycache__ │ │ │ │ ├── AbsPipe.cpython-310.pyc │ │ │ │ ├── TXTPipe.cpython-310.pyc │ │ │ │ └── __init__.cpython-310.pyc │ │ ├── post_proc │ │ │ ├── __init__.py │ │ │ ├── detect_para.py │ │ │ ├── pdf_post_filter.py │ │ │ └── remove_footnote.py │ │ ├── pre_proc │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── citationmarker_remove.cpython-310.pyc │ │ │ │ ├── construct_page_dict.cpython-310.pyc │ │ │ │ ├── cut_image.cpython-310.pyc │ │ │ │ ├── equations_replace.cpython-310.pyc │ │ │ │ ├── ocr_detect_all_bboxes.cpython-310.pyc │ │ │ │ ├── ocr_dict_merge.cpython-310.pyc │ │ │ │ ├── ocr_span_list_modify.cpython-310.pyc │ │ │ │ ├── remove_bbox_overlap.cpython-310.pyc │ │ │ │ └── resolve_bbox_conflict.cpython-310.pyc │ │ │ ├── citationmarker_remove.py │ │ │ ├── construct_page_dict.py │ │ │ ├── cut_image.py │ │ │ ├── detect_equation.py │ │ │ ├── detect_footer_by_model.py │ │ │ ├── detect_footer_header_by_statistics.py │ │ │ ├── detect_footnote.py │ │ │ ├── detect_header.py │ │ │ ├── detect_images.py │ │ │ ├── detect_page_number.py │ │ │ ├── detect_tables.py │ │ │ ├── equations_replace.py │ │ │ ├── fix_image.py │ │ │ ├── fix_table.py │ │ │ ├── main_text_font.py │ │ │ ├── ocr_detect_all_bboxes.py │ │ │ ├── ocr_detect_layout.py │ │ │ ├── ocr_dict_merge.py │ │ │ ├── ocr_span_list_modify.py │ │ │ ├── pdf_pre_filter.py │ │ │ ├── post_layout_split.py │ │ │ ├── remove_bbox_overlap.py │ │ │ ├── remove_colored_strip_bbox.py │ │ │ ├── remove_footer_header.py │ │ │ ├── remove_rotate_bbox.py │ │ │ ├── resolve_bbox_conflict.py │ │ │ ├── solve_line_alien.py │ │ │ └── statistics.py │ │ ├── resources │ │ │ ├── fasttext-langdetect │ │ │ │ └── lid.176.ftz │ │ │ └── model_config │ │ │ │ ├── UniMERNet │ │ │ │ └── demo.yaml │ │ │ │ ├── layoutlmv3 │ │ │ │ └── layoutlmv3_base_inference.yaml │ │ │ │ └── model_configs.yaml │ │ ├── rw │ │ │ ├── AbsReaderWriter.py │ │ │ ├── DiskReaderWriter.py │ │ │ ├── S3ReaderWriter.py │ │ │ ├── __init__.py │ │ │ └── __pycache__ │ │ │ │ ├── AbsReaderWriter.cpython-310.pyc │ │ │ │ ├── DiskReaderWriter.cpython-310.pyc │ │ │ │ └── __init__.cpython-310.pyc │ │ ├── spark │ │ │ ├── __init__.py │ │ │ └── spark_api.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── cli_dev.py │ │ │ └── common.py │ │ └── user_api.py │ ├── projects │ │ ├── README.md │ │ ├── README_zh-CN.md │ │ └── llama_index_rag │ │ │ ├── README.md │ │ │ ├── README_zh-CN.md │ │ │ ├── data_ingestion.py │ │ │ ├── docker-compose.yml │ │ │ ├── example │ │ │ └── data │ │ │ │ └── declaration_of_the_rights_of_man_1789.pdf │ │ │ ├── query.py │ │ │ └── rag_data_api.png │ ├── requirements-docker.txt │ ├── requirements-qa.txt │ ├── requirements.txt │ ├── setup.py │ ├── signatures │ │ └── version1 │ │ │ └── cla.json │ ├── tests │ │ ├── assets │ │ │ ├── more_para_test_samples │ │ │ │ ├── gift_files.txt │ │ │ │ ├── scihub_files.txt │ │ │ │ └── zlib_files.txt │ │ │ ├── paper │ │ │ │ ├── images_tables_equations.json │ │ │ │ ├── paper.pdf │ │ │ │ ├── paper_recogPara.json │ │ │ │ ├── paper_recogPara.pdf │ │ │ │ └── pdf_dic.json │ │ │ ├── paras_test.json │ │ │ ├── pdf_text_example │ │ │ │ ├── vertical_blocks.json │ │ │ │ └── vertical_en_blocks.json │ │ │ └── pre_proc_results │ │ │ │ ├── 2列_ViLT_1_title.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── arxiv_2011.13925 │ │ │ │ └── preproc_out.json │ │ │ │ ├── p3_图文混排_5.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── p3_图文混排_6.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── p3_图文混排_84.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── scihub_10800000 │ │ │ │ └── preproc_out.json │ │ │ │ ├── scihub_46600000 │ │ │ │ └── preproc_out.json │ │ │ │ ├── scihub_60900000 │ │ │ │ └── preproc_out.json │ │ │ │ ├── scihub_76800000 │ │ │ │ └── preproc_out.json │ │ │ │ ├── the_eye_cdn_00412782 │ │ │ │ └── preproc_out.json │ │ │ │ ├── 中文单列_书籍_100247_4_装饰图片.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 书双列图文交错_p3_图文混排_91.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 双列_4图_10.1016_j.ijheatmasstransfer.2006.10.031_7.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 双列底部跨列表_10.1016_j.ijheatmasstransfer.2006.10.031_3.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 图注在侧边正文_10.1002_mrm.24141 9.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 图注在右侧栏_10.1002_mrm.24141 8.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 文字框重叠layout.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 春单列_论文-图、表、公式_5_带特殊table.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯2列_10.1002_mrm.24141_4.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯2列_ViLT_6_文字_表格.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯单列_中文_同花顺数据接口用户手册-windows-Python_17_段落_表格.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯单列_中文_同花顺数据接口用户手册-windows-Python_4_段落_图片.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯单列_论文-图、表、公式_14_少量文字_双列图片.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 纯单列_论文-图、表、公式_6_单列文字_双列图片.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 顶部2列文字 底部跨栏表格_10.1002_mrm.24141_10.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 顶部title 摘要,底部双列_10.1016_j.ijheatmasstransfer.2006.10.031_1.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 顶部表格_下2列_ViLT_7.pdf │ │ │ │ └── preproc_out.json │ │ │ │ ├── 顶部跨列表_底部2列文字_10.1016_j.ijheatmasstransfer.2006.10.031_2.pdf │ │ │ │ └── preproc_out.json │ │ │ │ └── 顶部金字塔形布局的图片_10.1002_mrm.24141_5.pdf │ │ │ │ └── preproc_out.json │ │ ├── get_coverage.py │ │ ├── magic-pdf.json │ │ ├── overall_indicator.py │ │ ├── preproc_2_parasplit_example.json │ │ ├── pymu_textblocks.json │ │ ├── retry_env.sh │ │ ├── test_bookname.txt │ │ ├── test_cli │ │ │ ├── conf │ │ │ │ ├── __init__py │ │ │ │ └── conf.py │ │ │ ├── lib │ │ │ │ ├── __init__.py │ │ │ │ ├── calculate_score.py │ │ │ │ ├── common.py │ │ │ │ ├── pre_clean.py │ │ │ │ └── scoring.py │ │ │ ├── magic-pdf.json │ │ │ ├── pdf_dev │ │ │ │ ├── annotations │ │ │ │ │ └── cleaned │ │ │ │ │ │ └── cleaned_research_report_1f978cd81fb7260c8f7644039ec2c054.md │ │ │ │ ├── pdf │ │ │ │ │ └── research_report_1f978cd81fb7260c8f7644039ec2c054.pdf │ │ │ │ └── result.json │ │ │ ├── test_bench.py │ │ │ ├── test_bench_gpu.py │ │ │ └── test_cli_sdk.py │ │ ├── test_commons.py │ │ ├── test_footnote │ │ │ └── footnote_bookid.txt │ │ ├── test_integrations │ │ │ └── test_rag │ │ │ │ ├── assets │ │ │ │ ├── middle.json │ │ │ │ ├── one_page_with_table_image.2.pdf │ │ │ │ └── one_page_with_table_image.pdf │ │ │ │ ├── test_api.py │ │ │ │ └── test_utils.py │ │ ├── test_metascan_classify │ │ │ ├── test_classify.py │ │ │ ├── test_meta_scan.py │ │ │ └── test_metascan_classify_data.json │ │ ├── test_para │ │ │ ├── para_test_pdf_ids.ini │ │ │ ├── test_hyphen_at_line_end.py │ │ │ ├── test_para_pipeline.py │ │ │ ├── test_pdf2text_recogPara_BlockContinuationProcessor.py │ │ │ ├── test_pdf2text_recogPara_BlockInnerParasProcessor.py │ │ │ ├── test_pdf2text_recogPara_Common.py │ │ │ ├── test_pdf2text_recogPara_TitleProcessor.py │ │ │ └── utils_for_test_para.py │ │ ├── test_table │ │ │ ├── assets │ │ │ │ └── table.jpg │ │ │ └── test_tablemaster.py │ │ ├── test_tools │ │ │ ├── __init__.py │ │ │ ├── assets │ │ │ │ ├── cli │ │ │ │ │ ├── path │ │ │ │ │ │ ├── cli_test_01.pdf │ │ │ │ │ │ └── cli_test_02.pdf │ │ │ │ │ └── pdf │ │ │ │ │ │ └── cli_test_01.pdf │ │ │ │ ├── cli_dev │ │ │ │ │ ├── cli_test_01.jsonl │ │ │ │ │ ├── cli_test_01.model.json │ │ │ │ │ └── cli_test_01.pdf │ │ │ │ └── common │ │ │ │ │ └── cli_test_01.pdf │ │ │ ├── test_cli.py │ │ │ ├── test_cli_dev.py │ │ │ └── test_common.py │ │ └── test_unit.py │ └── update_version.py ├── __init__.py ├── magic-pdf.json ├── pdf2md.py ├── pic2tab.py └── process_md.py ├── environment.yml ├── pdf2txt.py ├── readme.md └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/.gitignore -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT.egg-info/PKG-INFO -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT.egg-info/requires.txt -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | GOT 2 | zero_config 3 | -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/data/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/data/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/data/base_dataset.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/data/conversation_dataset_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/data/conversation_dataset_qwen.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/demo/__pycache__/process_results.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/demo/__pycache__/process_results.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/demo/process_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/demo/process_results.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/demo/run_ocr_2.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/demo/run_ocr_2.0.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/demo/run_ocr_2.0_crop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/demo/run_ocr_2.0_crop.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/eval_GOT_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/eval_GOT_ocr.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/evaluate_GOT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/evaluate_GOT.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/multi_hardware_eval_GOT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/multi_hardware_eval_GOT.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/pyevaltools/__init__.py: -------------------------------------------------------------------------------- 1 | author='aagrawal' 2 | -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr_format.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr_scene.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/pyevaltools/eval_ocr_scene.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/eval/pyevaltools/merge_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/eval/pyevaltools/merge_results.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/GOT_ocr_2_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/GOT_ocr_2_0.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/__pycache__/GOT_ocr_2_0.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/__pycache__/GOT_ocr_2_0.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/__pycache__/modeling_GOT_YiJiang.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/__pycache__/modeling_GOT_YiJiang.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/__pycache__/modeling_GOT_vllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/__pycache__/modeling_GOT_vllm.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/modeling_GOT_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/modeling_GOT_vllm.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/plug/__pycache__/blip_process.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/plug/__pycache__/blip_process.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/plug/blip_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/plug/blip_process.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/vision_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/vision_encoder/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/vision_encoder/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/vision_encoder/__pycache__/vary_b.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/vision_encoder/__pycache__/vary_b.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/model/vision_encoder/vary_b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/model/vision_encoder/vary_b.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/train.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/train_GOT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/train_GOT.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/train_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/train_flash_attn.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/train_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/train_lora.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/train_lora_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/train_lora_flash_attn.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/trainer.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/trainer_llm_llrd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/trainer_llm_llrd.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/trainer_vit_fixlr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/trainer_vit_fixlr.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/train/trainer_vit_llrd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/train/trainer_vit_llrd.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/__pycache__/constants.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/__pycache__/constants.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/__pycache__/conversation.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/__pycache__/conversation.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/arguments.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/constants.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/conversation.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/GOT/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/GOT/utils/utils.py -------------------------------------------------------------------------------- /PDF_parsing/GOT/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/pyproject.toml -------------------------------------------------------------------------------- /PDF_parsing/GOT/pyvenv.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/pyvenv.cfg -------------------------------------------------------------------------------- /PDF_parsing/GOT/zero_config/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/zero_config/zero2.json -------------------------------------------------------------------------------- /PDF_parsing/GOT/zero_config/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/GOT/zero_config/zero3.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/Dockerfile -------------------------------------------------------------------------------- /PDF_parsing/MinerU/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/LICENSE.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/MinerU_CLA.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/MinerU_CLA.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/README.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/README_ja-JP.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/README_ja-JP.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/README_zh-CN.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/app.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/dict2md/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/dict2md/mkcontent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/dict2md/mkcontent.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/dict2md/ocr_mkcontent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/dict2md/ocr_mkcontent.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/filter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/filter/pdf_classify_by_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/filter/pdf_classify_by_type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/filter/pdf_meta_scan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/filter/pdf_meta_scan.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/integrations/rag/utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/bbox_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/layout/bbox_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_det_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_det_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_spiler_recog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/layout/layout_spiler_recog.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/layout/mcol_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/layout/mcol_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/Constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/Constants.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/MakeContentConfig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/MakeContentConfig.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/ModelBlockTypeEnum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/ModelBlockTypeEnum.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/boxbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/boxbase.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/calc_span_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/calc_span_stats.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/commons.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/config_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/config_reader.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/convert_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/convert_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/coordinate_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/coordinate_transform.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/detect_language_from_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/detect_language_from_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/draw_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/draw_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/drop_reason.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/drop_reason.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/drop_tag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/drop_tag.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/hash_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/hash_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/json_compressor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/json_compressor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/language.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/language.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/local_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/local_math.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/markdown_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/markdown_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/nlp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/nlp_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/ocr_content_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/ocr_content_type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/path_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/path_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/pdf_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/pdf_check.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/pdf_image_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/pdf_image_tools.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/safe_filename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/safe_filename.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/textbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/textbase.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.8.1" 2 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/libs/vis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/libs/vis_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/doc_analyze_by_custom_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/doc_analyze_by_custom_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/magic_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/magic_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/model_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/model_list.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pdf_extract_kit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pdf_extract_kit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/post_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/post_process.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/self_modify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/self_modify.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pek_sub_modules/structeqtable/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/ppTableModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/ppTableModel.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/model/pp_structure_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/model/pp_structure_v2.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/block_continuation_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/block_continuation_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/block_termination_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/block_termination_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/commons.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/denoise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/denoise.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/draw.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/exceptions.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/layout_match_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/layout_match_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/para_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/para_pipeline.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/para_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/para_split.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/para_split_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/para_split_v2.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/raw_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/raw_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/stats.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/para/title_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/para/title_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_by_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_by_ocr.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_by_txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_by_txt.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_union_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pdf_parse_union_core.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pipe/AbsPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pipe/AbsPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pipe/OCRPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pipe/OCRPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pipe/TXTPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pipe/TXTPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pipe/UNIPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pipe/UNIPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pipe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/detect_para.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/detect_para.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/pdf_post_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/pdf_post_filter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/remove_footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/post_proc/remove_footnote.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/citationmarker_remove.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/citationmarker_remove.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/construct_page_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/construct_page_dict.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/cut_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/cut_image.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_equation.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footer_by_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footer_by_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footer_header_by_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footer_header_by_statistics.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_footnote.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_header.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_images.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_page_number.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_page_number.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/detect_tables.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/equations_replace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/equations_replace.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/fix_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/fix_image.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/fix_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/fix_table.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/main_text_font.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/main_text_font.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_detect_all_bboxes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_detect_all_bboxes.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_detect_layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_detect_layout.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_dict_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_dict_merge.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_span_list_modify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/ocr_span_list_modify.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/pdf_pre_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/pdf_pre_filter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/post_layout_split.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_bbox_overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_bbox_overlap.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_colored_strip_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_colored_strip_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_footer_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_footer_header.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_rotate_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/remove_rotate_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/resolve_bbox_conflict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/resolve_bbox_conflict.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/solve_line_alien.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/solve_line_alien.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/pre_proc/statistics.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/resources/fasttext-langdetect/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/resources/fasttext-langdetect/lid.176.ftz -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/UniMERNet/demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/UniMERNet/demo.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/model_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/resources/model_config/model_configs.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/rw/AbsReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/rw/AbsReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/rw/DiskReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/rw/DiskReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/rw/S3ReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/rw/S3ReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/rw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/spark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/spark/spark_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/spark/spark_api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/tools/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/tools/cli.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/tools/cli_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/tools/cli_dev.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/tools/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/tools/common.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/build/lib/magic_pdf/user_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/build/lib/magic_pdf/user_api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/demo.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/demo1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/demo1.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/demo1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/demo1.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/demo2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/demo2.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/demo2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/demo2.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/magic_pdf_parse_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/magic_pdf_parse_main.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/demo/small_ocr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/demo/small_ocr.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/FAQ_en_us.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/FAQ_en_us.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/FAQ_zh_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/FAQ_zh_cn.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/README_Ubuntu_CUDA_Acceleration_en_US.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/README_Ubuntu_CUDA_Acceleration_en_US.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/README_Windows_CUDA_Acceleration_en_US.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/README_Windows_CUDA_Acceleration_en_US.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/README_Windows_CUDA_Acceleration_zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/README_Windows_CUDA_Acceleration_zh_CN.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/chemical_knowledge_introduction/introduction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/chemical_knowledge_introduction/introduction.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/chemical_knowledge_introduction/introduction.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/chemical_knowledge_introduction/introduction.xmind -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/download_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/download_models.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/download_models_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/download_models_hf.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/how_to_download_models_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/how_to_download_models_en.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/how_to_download_models_zh_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/how_to_download_models_zh_cn.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/MinerU-logo-hq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/MinerU-logo-hq.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/MinerU-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/MinerU-logo.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/datalab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/datalab_logo.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/flowchart_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/flowchart_en.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/flowchart_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/flowchart_zh_cn.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/layout_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/layout_example.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/poly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/poly.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/project_panorama_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/project_panorama_en.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/project_panorama_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/project_panorama_zh_cn.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/images/spans_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/images/spans_example.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/output_file_en_us.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/output_file_en_us.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/docs/output_file_zh_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/docs/output_file_zh_cn.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic-pdf.template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic-pdf.template.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf.egg-info/PKG-INFO -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/entry_points.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf.egg-info/entry_points.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/not-zip-safe: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf.egg-info/requires.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | magic_pdf 2 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_by_ocr.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_by_ocr.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_by_txt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_by_txt.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_union_core.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/__pycache__/pdf_parse_union_core.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/__pycache__/user_api.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/__pycache__/user_api.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/dict2md/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/dict2md/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/dict2md/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/dict2md/__pycache__/ocr_mkcontent.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/dict2md/__pycache__/ocr_mkcontent.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/dict2md/mkcontent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/dict2md/mkcontent.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/dict2md/ocr_mkcontent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/dict2md/ocr_mkcontent.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/filter/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/__pycache__/pdf_classify_by_type.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/filter/__pycache__/pdf_classify_by_type.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/__pycache__/pdf_meta_scan.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/filter/__pycache__/pdf_meta_scan.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/pdf_classify_by_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/filter/pdf_classify_by_type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/filter/pdf_meta_scan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/filter/pdf_meta_scan.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/integrations/rag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/integrations/rag/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/integrations/rag/api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/integrations/rag/type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/integrations/rag/type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/integrations/rag/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/integrations/rag/utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__pycache__/bbox_sort.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/__pycache__/bbox_sort.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_det_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_det_utils.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_sort.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_sort.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_spiler_recog.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/__pycache__/layout_spiler_recog.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/bbox_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/bbox_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/layout_det_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/layout_det_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/layout_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/layout_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/layout_spiler_recog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/layout_spiler_recog.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/layout/mcol_sort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/layout/mcol_sort.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/Constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/Constants.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/MakeContentConfig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/MakeContentConfig.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/ModelBlockTypeEnum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/ModelBlockTypeEnum.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/Constants.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/Constants.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/MakeContentConfig.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/MakeContentConfig.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/ModelBlockTypeEnum.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/ModelBlockTypeEnum.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/boxbase.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/boxbase.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/commons.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/commons.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/config_reader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/config_reader.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/convert_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/convert_utils.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/coordinate_transform.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/coordinate_transform.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/drop_reason.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/drop_reason.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/drop_tag.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/drop_tag.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/hash_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/hash_utils.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/json_compressor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/json_compressor.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/language.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/language.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/local_math.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/local_math.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/markdown_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/markdown_utils.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/ocr_content_type.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/ocr_content_type.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/pdf_check.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/pdf_check.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/pdf_image_tools.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/pdf_image_tools.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/__pycache__/version.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/__pycache__/version.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/boxbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/boxbase.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/calc_span_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/calc_span_stats.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/commons.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/config_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/config_reader.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/convert_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/convert_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/coordinate_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/coordinate_transform.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/detect_language_from_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/detect_language_from_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/draw_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/draw_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/drop_reason.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/drop_reason.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/drop_tag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/drop_tag.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/hash_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/hash_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/json_compressor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/json_compressor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/language.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/language.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/local_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/local_math.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/markdown_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/markdown_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/nlp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/nlp_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/ocr_content_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/ocr_content_type.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/path_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/path_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/pdf_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/pdf_check.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/pdf_image_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/pdf_image_tools.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/safe_filename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/safe_filename.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/textbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/textbase.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.8.1" 2 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/libs/vis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/libs/vis_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/doc_analyze_by_custom_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/doc_analyze_by_custom_model.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/magic_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/magic_model.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/model_list.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/model_list.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/pdf_extract_kit.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/pdf_extract_kit.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/__pycache__/ppTableModel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/__pycache__/ppTableModel.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/doc_analyze_by_custom_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/doc_analyze_by_custom_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/magic_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/magic_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/model_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/model_list.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pdf_extract_kit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pdf_extract_kit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/post_process.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/post_process.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/self_modify.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/__pycache__/self_modify.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/backbone.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/backbone.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/beit.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/beit.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/deit.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/deit.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/model_init.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/model_init.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/rcnn_vl.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/rcnn_vl.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/visualizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/__pycache__/visualizer.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/configuration_layoutlmv3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/configuration_layoutlmv3.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/modeling_layoutlmv3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/modeling_layoutlmv3.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/tokenization_layoutlmv3.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/tokenization_layoutlmv3.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/tokenization_layoutlmv3_fast.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__pycache__/tokenization_layoutlmv3_fast.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/post_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/post_process.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/self_modify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/self_modify.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/__pycache__/StructTableModel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/__pycache__/StructTableModel.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pek_sub_modules/structeqtable/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/ppTableModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/ppTableModel.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/model/pp_structure_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/model/pp_structure_v2.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/__pycache__/para_split_v2.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/__pycache__/para_split_v2.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/block_continuation_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/block_continuation_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/block_termination_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/block_termination_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/commons.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/denoise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/denoise.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/draw.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/exceptions.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/layout_match_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/layout_match_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/para_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/para_pipeline.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/para_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/para_split.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/para_split_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/para_split_v2.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/raw_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/raw_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/stats.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/para/title_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/para/title_processor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pdf_parse_by_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pdf_parse_by_ocr.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pdf_parse_by_txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pdf_parse_by_txt.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pdf_parse_union_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pdf_parse_union_core.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/AbsPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/AbsPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/OCRPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/OCRPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/TXTPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/TXTPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/UNIPipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/UNIPipe.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/AbsPipe.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/AbsPipe.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/TXTPipe.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/TXTPipe.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pipe/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/post_proc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/post_proc/detect_para.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/post_proc/detect_para.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/post_proc/pdf_post_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/post_proc/pdf_post_filter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/post_proc/remove_footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/post_proc/remove_footnote.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/citationmarker_remove.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/citationmarker_remove.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/construct_page_dict.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/construct_page_dict.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/cut_image.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/cut_image.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/equations_replace.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/equations_replace.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_detect_all_bboxes.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_detect_all_bboxes.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_dict_merge.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_dict_merge.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_span_list_modify.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/ocr_span_list_modify.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/remove_bbox_overlap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/remove_bbox_overlap.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/resolve_bbox_conflict.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/__pycache__/resolve_bbox_conflict.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/citationmarker_remove.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/citationmarker_remove.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/construct_page_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/construct_page_dict.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/cut_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/cut_image.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_equation.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footer_by_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footer_by_model.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footer_header_by_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footer_header_by_statistics.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_footnote.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_header.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_images.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_page_number.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_page_number.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/detect_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/detect_tables.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/equations_replace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/equations_replace.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/fix_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/fix_image.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/fix_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/fix_table.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/main_text_font.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/main_text_font.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_detect_all_bboxes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_detect_all_bboxes.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_detect_layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_detect_layout.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_dict_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_dict_merge.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_span_list_modify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/ocr_span_list_modify.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/pdf_pre_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/pdf_pre_filter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/post_layout_split.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/remove_bbox_overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/remove_bbox_overlap.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/remove_colored_strip_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/remove_colored_strip_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/remove_footer_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/remove_footer_header.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/remove_rotate_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/remove_rotate_bbox.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/resolve_bbox_conflict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/resolve_bbox_conflict.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/solve_line_alien.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/solve_line_alien.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/pre_proc/statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/pre_proc/statistics.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/resources/fasttext-langdetect/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/resources/fasttext-langdetect/lid.176.ftz -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/resources/model_config/UniMERNet/demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/resources/model_config/UniMERNet/demo.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/resources/model_config/model_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/resources/model_config/model_configs.yaml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/AbsReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/AbsReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/DiskReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/DiskReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/S3ReaderWriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/S3ReaderWriter.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/__pycache__/AbsReaderWriter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/__pycache__/AbsReaderWriter.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/__pycache__/DiskReaderWriter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/__pycache__/DiskReaderWriter.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/rw/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/rw/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/spark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/spark/spark_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/spark/spark_api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/tools/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/tools/cli.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/tools/cli_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/tools/cli_dev.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/tools/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/tools/common.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/magic_pdf/user_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/magic_pdf/user_api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/README.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/README_zh-CN.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/README.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/README_zh-CN.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/data_ingestion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/data_ingestion.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/docker-compose.yml -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/query.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/projects/llama_index_rag/rag_data_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/projects/llama_index_rag/rag_data_api.png -------------------------------------------------------------------------------- /PDF_parsing/MinerU/requirements-docker.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/requirements-docker.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/requirements-qa.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/requirements-qa.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/requirements.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/setup.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/signatures/version1/cla.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/signatures/version1/cla.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/more_para_test_samples/gift_files.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/more_para_test_samples/scihub_files.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/more_para_test_samples/scihub_files.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/more_para_test_samples/zlib_files.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paper/images_tables_equations.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paper/images_tables_equations.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paper/paper.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paper/paper_recogPara.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paper/paper_recogPara.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paper/paper_recogPara.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paper/paper_recogPara.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paper/pdf_dic.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paper/pdf_dic.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/paras_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/paras_test.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pdf_text_example/vertical_blocks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pdf_text_example/vertical_blocks.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pdf_text_example/vertical_en_blocks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pdf_text_example/vertical_en_blocks.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/2列_ViLT_1_title.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/2列_ViLT_1_title.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/arxiv_2011.13925/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/arxiv_2011.13925/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_5.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_5.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_6.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_6.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_84.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/p3_图文混排_84.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_10800000/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_10800000/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_46600000/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_46600000/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_60900000/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_60900000/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_76800000/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/scihub_76800000/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/the_eye_cdn_00412782/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/the_eye_cdn_00412782/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/中文单列_书籍_100247_4_装饰图片.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/中文单列_书籍_100247_4_装饰图片.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/书双列图文交错_p3_图文混排_91.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/书双列图文交错_p3_图文混排_91.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/双列_4图_10.1016_j.ijheatmasstransfer.2006.10.031_7.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/双列_4图_10.1016_j.ijheatmasstransfer.2006.10.031_7.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/双列底部跨列表_10.1016_j.ijheatmasstransfer.2006.10.031_3.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/双列底部跨列表_10.1016_j.ijheatmasstransfer.2006.10.031_3.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/图注在侧边正文_10.1002_mrm.24141 9.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/图注在侧边正文_10.1002_mrm.24141 9.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/图注在右侧栏_10.1002_mrm.24141 8.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/图注在右侧栏_10.1002_mrm.24141 8.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/文字框重叠layout.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/文字框重叠layout.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/春单列_论文-图、表、公式_5_带特殊table.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/春单列_论文-图、表、公式_5_带特殊table.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯2列_10.1002_mrm.24141_4.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯2列_10.1002_mrm.24141_4.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯2列_ViLT_6_文字_表格.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯2列_ViLT_6_文字_表格.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_中文_同花顺数据接口用户手册-windows-Python_17_段落_表格.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_中文_同花顺数据接口用户手册-windows-Python_17_段落_表格.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_中文_同花顺数据接口用户手册-windows-Python_4_段落_图片.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_中文_同花顺数据接口用户手册-windows-Python_4_段落_图片.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_论文-图、表、公式_14_少量文字_双列图片.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_论文-图、表、公式_14_少量文字_双列图片.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_论文-图、表、公式_6_单列文字_双列图片.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/纯单列_论文-图、表、公式_6_单列文字_双列图片.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部2列文字 底部跨栏表格_10.1002_mrm.24141_10.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部2列文字 底部跨栏表格_10.1002_mrm.24141_10.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部title 摘要,底部双列_10.1016_j.ijheatmasstransfer.2006.10.031_1.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部title 摘要,底部双列_10.1016_j.ijheatmasstransfer.2006.10.031_1.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部表格_下2列_ViLT_7.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部表格_下2列_ViLT_7.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部跨列表_底部2列文字_10.1016_j.ijheatmasstransfer.2006.10.031_2.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部跨列表_底部2列文字_10.1016_j.ijheatmasstransfer.2006.10.031_2.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部金字塔形布局的图片_10.1002_mrm.24141_5.pdf/preproc_out.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/assets/pre_proc_results/顶部金字塔形布局的图片_10.1002_mrm.24141_5.pdf/preproc_out.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/get_coverage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/get_coverage.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/magic-pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/magic-pdf.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/overall_indicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/overall_indicator.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/preproc_2_parasplit_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/preproc_2_parasplit_example.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/pymu_textblocks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/pymu_textblocks.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/retry_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/retry_env.sh -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_bookname.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_bookname.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/conf/__init__py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/conf/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/conf/conf.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/lib/calculate_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/lib/calculate_score.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/lib/common.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/lib/pre_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/lib/pre_clean.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/lib/scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/lib/scoring.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/magic-pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/magic-pdf.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/pdf_dev/annotations/cleaned/cleaned_research_report_1f978cd81fb7260c8f7644039ec2c054.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/pdf_dev/annotations/cleaned/cleaned_research_report_1f978cd81fb7260c8f7644039ec2c054.md -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/pdf_dev/pdf/research_report_1f978cd81fb7260c8f7644039ec2c054.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/pdf_dev/pdf/research_report_1f978cd81fb7260c8f7644039ec2c054.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/pdf_dev/result.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/pdf_dev/result.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/test_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/test_bench.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/test_bench_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/test_bench_gpu.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_cli/test_cli_sdk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_cli/test_cli_sdk.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_commons.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_footnote/footnote_bookid.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_footnote/footnote_bookid.txt -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/middle.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/middle.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/one_page_with_table_image.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_integrations/test_rag/assets/one_page_with_table_image.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_integrations/test_rag/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_integrations/test_rag/test_api.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_integrations/test_rag/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_integrations/test_rag/test_utils.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_metascan_classify/test_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_metascan_classify/test_classify.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_metascan_classify/test_meta_scan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_metascan_classify/test_meta_scan.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_metascan_classify/test_metascan_classify_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_metascan_classify/test_metascan_classify_data.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/para_test_pdf_ids.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/para_test_pdf_ids.ini -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_hyphen_at_line_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_hyphen_at_line_end.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_para_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_para_pipeline.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_Common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_Common.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_TitleProcessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/test_pdf2text_recogPara_TitleProcessor.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_para/utils_for_test_para.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_para/utils_for_test_para.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_table/assets/table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_table/assets/table.jpg -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_table/test_tablemaster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_table/test_tablemaster.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli/path/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli/path/cli_test_01.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli/path/cli_test_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli/path/cli_test_02.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli/pdf/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli/pdf/cli_test_01.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.jsonl -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.model.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.model.json -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/cli_dev/cli_test_01.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/assets/common/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/assets/common/cli_test_01.pdf -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/test_cli.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/test_cli_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/test_cli_dev.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_tools/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_tools/test_common.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/tests/test_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/tests/test_unit.py -------------------------------------------------------------------------------- /PDF_parsing/MinerU/update_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/MinerU/update_version.py -------------------------------------------------------------------------------- /PDF_parsing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/__init__.py -------------------------------------------------------------------------------- /PDF_parsing/magic-pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/magic-pdf.json -------------------------------------------------------------------------------- /PDF_parsing/pdf2md.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/pdf2md.py -------------------------------------------------------------------------------- /PDF_parsing/pic2tab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/pic2tab.py -------------------------------------------------------------------------------- /PDF_parsing/process_md.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/PDF_parsing/process_md.py -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/environment.yml -------------------------------------------------------------------------------- /pdf2txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/pdf2txt.py -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/readme.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liunian-Jay/MU-GOT/HEAD/setup.py --------------------------------------------------------------------------------