├── .dockerignore ├── .github └── workflows │ └── mmda-ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── bibliography.ipynb ├── bibliography_extraction │ ├── README.md │ ├── main.py │ └── requirements.txt ├── grobid_augment_existing_document_parser │ ├── augment_doc_with_grobid_annos.ipynb │ └── e5910c027af0ee9c1901c57f6579d903aedee7f4.xml ├── mentions.ipynb ├── section_nesting_prediction │ ├── README.md │ ├── main.py │ ├── nesting.bin │ ├── requirements.txt │ └── sample.pdf ├── title_abstract.py ├── vila_for_scidoc_parsing │ ├── README.md │ ├── main.py │ └── preview.png └── vlue_evaluation │ ├── README.md │ ├── main.py │ └── requirements.txt ├── pyproject.toml ├── release ├── README.md ├── push-aliases.sh ├── push-to-pypi.sh └── pypi-aliases │ ├── papermage │ ├── README.md │ ├── pyproject.toml │ └── src │ │ └── papermage │ │ ├── __init__.py │ │ └── py.typed │ └── scipdf │ ├── README.md │ ├── pyproject.toml │ └── src │ └── scipdf │ ├── __init__.py │ └── py.typed ├── requirements.txt ├── setup.py ├── src ├── ai2_internal │ ├── README.txt │ ├── __init__.py │ ├── api.py │ ├── bibentry_detection_predictor │ │ ├── __init__.py │ │ ├── data │ │ │ ├── 000026bab3c52aa8ff37dc3e155ffbcb506aa1f6.pdf │ │ │ ├── no_bibs.pdf │ │ │ └── spanless_bibs_3cf45514384bbb7d083ae53e19bdc22300e648ab.pdf │ │ ├── integration_test.py │ │ └── interface.py │ ├── bibentry_predictor │ │ ├── __init__.py │ │ ├── integration_test.py │ │ └── interface.py │ ├── bibentry_predictor_mmda │ │ ├── README.txt │ │ ├── __init__.py │ │ ├── data │ │ │ ├── test_data.json.gz │ │ │ └── test_data_v2_first_2_bibs_have_empty_span_groups.json.gz │ │ ├── integration_test.py │ │ └── interface.py │ ├── citation_links │ │ ├── __init__.py │ │ ├── integration_test.py │ │ └── interface.py │ ├── citation_mentions │ │ ├── __init__.py │ │ ├── data │ │ │ ├── arxiv-1906.08632-page0.pdf │ │ │ ├── arxiv-1906.08632-pages1-2.pdf │ │ │ └── arxiv-2201.05673-page1.pdf │ │ ├── integration_test.py │ │ └── interface.py │ ├── config.yaml │ ├── dwp_heuristic │ │ ├── README.txt │ │ ├── __init__.py │ │ ├── integration_test.py │ │ ├── interface.py │ │ └── test_fixtures │ │ │ └── test_doc.json │ ├── evaluation_notebooks │ │ ├── end_to_end_eval_easier.ipynb │ │ └── end_to_end_eval_grobid_comparison.ipynb │ ├── figure_table_predictors │ │ ├── Create_fixtures_for_unit_test.ipynb │ │ ├── README.md │ │ ├── __init__.py │ │ ├── figure_table_timo_service_invocation.ipynb │ │ ├── figure_table_timo_service_invocation_profiling.ipynb │ │ ├── integration_test.py │ │ ├── interface.py │ │ ├── performance_metrics.png │ │ └── test_fixtures │ │ │ ├── test_doc.json │ │ │ ├── test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.json │ │ │ ├── test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.pdf │ │ │ ├── test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.json │ │ │ └── test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.pdf │ ├── layout_parser │ │ ├── __init__.py │ │ ├── integration_test.py │ │ └── interface.py │ ├── shared_test_fixtures │ │ └── page0.png │ ├── svm_word_predictor │ │ ├── README.txt │ │ ├── __init__.py │ │ ├── integration_test.py │ │ ├── interface.py │ │ └── test_fixtures │ │ │ └── test_doc.json │ └── vila │ │ ├── __init__.py │ │ ├── integration_test.py │ │ ├── interface.py │ │ └── test_fixtures │ │ └── test_doc.json └── mmda │ ├── __init__.py │ ├── eval │ ├── __init__.py │ ├── metrics.py │ ├── s2.py │ └── vlue.py │ ├── featurizers │ ├── __init__.py │ └── citation_link_featurizers.py │ ├── parsers │ ├── README.md │ ├── __init__.py │ ├── grobid.config │ ├── grobid_augment_existing_document_parser.py │ ├── grobid_parser.py │ ├── parser.py │ ├── pdfplumber_parser.py │ └── symbol_scraper_parser.py │ ├── predictors │ ├── __init__.py │ ├── base_predictors │ │ ├── __init__.py │ │ ├── base_heuristic_predictor.py │ │ └── base_predictor.py │ ├── d2_predictors │ │ ├── __init__.py │ │ └── bibentry_detection_predictor.py │ ├── heuristic_predictors │ │ ├── README.md │ │ ├── __init__.py │ │ ├── dictionary_word_predictor.py │ │ ├── figure_table_predictors.py │ │ ├── grobid_citation_predictor.py │ │ ├── section_header_predictor.py │ │ ├── sentence_boundary_predictor.py │ │ └── whitespace_predictor.py │ ├── hf_predictors │ │ ├── __init__.py │ │ ├── base_hf_predictor.py │ │ ├── bibentry_predictor │ │ │ ├── __init__.py │ │ │ ├── predictor.py │ │ │ ├── types.py │ │ │ └── utils.py │ │ ├── mention_predictor.py │ │ ├── span_group_classification_predictor.py │ │ ├── token_classification_predictor.py │ │ ├── utils.py │ │ └── vila_predictor.py │ ├── lp_predictors.py │ ├── sklearn_predictors │ │ ├── base_sklearn_predictor.py │ │ └── svm_word_predictor.py │ ├── tesseract_predictors.py │ └── xgb_predictors │ │ ├── __init__.py │ │ ├── citation_link_predictor.py │ │ └── section_nesting_predictor.py │ ├── rasterizers │ ├── __init__.py │ └── rasterizer.py │ ├── recipes │ ├── __init__.py │ ├── core_recipe.py │ └── recipe.py │ ├── types │ ├── __init__.py │ ├── annotation.py │ ├── box.py │ ├── document.py │ ├── image.py │ ├── indexers.py │ ├── metadata.py │ ├── names.py │ ├── old │ │ ├── annotations.old.py │ │ ├── boundingbox.old.py │ │ ├── document.old.py │ │ ├── document_elements.py │ │ ├── image.old.py │ │ └── span.old.py │ ├── span.py │ └── user_data.py │ └── utils │ ├── __init__.py │ ├── outline_metadata.py │ ├── stringify.py │ └── tools.py └── tests ├── __init__.py ├── fixtures ├── 1903.10676.pdf ├── 2107.07170.pdf ├── 4be952924cd565488b4a239dc6549095029ee578.pdf ├── 4be952924cd565488b4a239dc6549095029ee578__pdfplumber_doc.json ├── doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.json ├── doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.pdf ├── doc_fixture_e5910c027af0ee9c1901c57f6579d903aedee7f4.pkl ├── e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf ├── example-dictionary.txt ├── figure_table_predictions.json ├── grobid-tei-maml-header.xml ├── grobid-tei-no-abstract.xml ├── grobid-tei-no-title.xml ├── grobid_augment_existing_document_parser │ ├── e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf │ ├── e5910c027af0ee9c1901c57f6579d903aedee7f4.xml │ ├── e5910c027af0ee9c1901c57f6579d903aedee7f4__pdfplumber_doc.json │ ├── e5910c027af0ee9c1901c57f6579d903aedee7f4_no_authors.xml │ ├── grobid-no-authors.config │ └── grobid.config ├── nesting.bin ├── svm_word_predictor │ ├── hyphen_clf.joblib │ ├── neg_words.txt │ ├── ohencoder.joblib │ ├── pos_words.txt │ ├── scaler.joblib │ ├── svm_word_predictor.tar.gz │ └── unigram_probs.pkl ├── test-uu.pdf ├── types │ ├── 20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entry_span_groups_from_box_groups.json │ ├── 20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json │ ├── c8b53e2d9cd247e2d42719e337bfb13784d22bd2.json │ ├── spp-dag-0-0-4-doc.json │ └── test_document_box_groups.json ├── unicode-test.json └── utils │ ├── 121e30c48546e671dc5e16c694c5e69b392cf8fb_0.0.23.json │ ├── 20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entries.json │ └── 20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json ├── test_eval └── test_metrics.py ├── test_internal_ai2 └── test_api.py ├── test_parsers ├── test_grobid_augment_existing_document_parser.py ├── test_grobid_header_parser.py ├── test_override.py └── test_pdf_plumber_parser.py ├── test_predictors ├── test.json.py ├── test_bibentry_predictor.py ├── test_dictionary_word_predictor.py ├── test_figure_table_predictors.py ├── test_section_header_predictor.py ├── test_section_nesting_predictor.py ├── test_span_group_classification_predictor.py ├── test_svm_word_predictor.py ├── test_vila_predictors.py └── test_whitespace_predictor.py ├── test_recipes ├── __init__.py ├── core_recipe_fixtures.py └── test_core_recipe.py ├── test_types ├── test_annotation.py ├── test_box.py ├── test_document.py ├── test_indexers.py ├── test_json_conversion.py ├── test_metadata.py ├── test_span.py └── test_span_group.py └── test_utils ├── __init__.py ├── test_outline_metadata.py ├── test_stringify.py └── test_tools.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/workflows/mmda-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/.github/workflows/mmda-ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/README.md -------------------------------------------------------------------------------- /examples/bibliography.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/bibliography.ipynb -------------------------------------------------------------------------------- /examples/bibliography_extraction/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/bibliography_extraction/README.md -------------------------------------------------------------------------------- /examples/bibliography_extraction/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/bibliography_extraction/main.py -------------------------------------------------------------------------------- /examples/bibliography_extraction/requirements.txt: -------------------------------------------------------------------------------- 1 | vila 2 | layoutparser[effdet]>=0.3.0 3 | requests 4 | pytesseract 5 | -------------------------------------------------------------------------------- /examples/grobid_augment_existing_document_parser/augment_doc_with_grobid_annos.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/grobid_augment_existing_document_parser/augment_doc_with_grobid_annos.ipynb -------------------------------------------------------------------------------- /examples/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.xml -------------------------------------------------------------------------------- /examples/mentions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/mentions.ipynb -------------------------------------------------------------------------------- /examples/section_nesting_prediction/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/section_nesting_prediction/README.md -------------------------------------------------------------------------------- /examples/section_nesting_prediction/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/section_nesting_prediction/main.py -------------------------------------------------------------------------------- /examples/section_nesting_prediction/nesting.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/section_nesting_prediction/nesting.bin -------------------------------------------------------------------------------- /examples/section_nesting_prediction/requirements.txt: -------------------------------------------------------------------------------- 1 | layoutparser[effdet]>=0.3.0 2 | transformers 3 | vila 4 | xgboost 5 | -------------------------------------------------------------------------------- /examples/section_nesting_prediction/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/section_nesting_prediction/sample.pdf -------------------------------------------------------------------------------- /examples/title_abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/title_abstract.py -------------------------------------------------------------------------------- /examples/vila_for_scidoc_parsing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/vila_for_scidoc_parsing/README.md -------------------------------------------------------------------------------- /examples/vila_for_scidoc_parsing/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/vila_for_scidoc_parsing/main.py -------------------------------------------------------------------------------- /examples/vila_for_scidoc_parsing/preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/vila_for_scidoc_parsing/preview.png -------------------------------------------------------------------------------- /examples/vlue_evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/vlue_evaluation/README.md -------------------------------------------------------------------------------- /examples/vlue_evaluation/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/examples/vlue_evaluation/main.py -------------------------------------------------------------------------------- /examples/vlue_evaluation/requirements.txt: -------------------------------------------------------------------------------- 1 | vila 2 | layoutparser[effdet]>=0.3.0 3 | requests 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/pyproject.toml -------------------------------------------------------------------------------- /release/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/README.md -------------------------------------------------------------------------------- /release/push-aliases.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/push-aliases.sh -------------------------------------------------------------------------------- /release/push-to-pypi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/push-to-pypi.sh -------------------------------------------------------------------------------- /release/pypi-aliases/papermage/README.md: -------------------------------------------------------------------------------- 1 | # PaperMage 2 | 3 | Alias for [mmda](https://pypi.org/project/mmda/). 4 | -------------------------------------------------------------------------------- /release/pypi-aliases/papermage/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/pypi-aliases/papermage/pyproject.toml -------------------------------------------------------------------------------- /release/pypi-aliases/papermage/src/papermage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/pypi-aliases/papermage/src/papermage/__init__.py -------------------------------------------------------------------------------- /release/pypi-aliases/papermage/src/papermage/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /release/pypi-aliases/scipdf/README.md: -------------------------------------------------------------------------------- 1 | # SciPDF 2 | 3 | Alias for [mmda](https://pypi.org/project/mmda/). 4 | -------------------------------------------------------------------------------- /release/pypi-aliases/scipdf/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/pypi-aliases/scipdf/pyproject.toml -------------------------------------------------------------------------------- /release/pypi-aliases/scipdf/src/scipdf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/release/pypi-aliases/scipdf/src/scipdf/__init__.py -------------------------------------------------------------------------------- /release/pypi-aliases/scipdf/src/scipdf/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e .[dev] 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/setup.py -------------------------------------------------------------------------------- /src/ai2_internal/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/README.txt -------------------------------------------------------------------------------- /src/ai2_internal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/api.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/data/000026bab3c52aa8ff37dc3e155ffbcb506aa1f6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_detection_predictor/data/000026bab3c52aa8ff37dc3e155ffbcb506aa1f6.pdf -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/data/no_bibs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_detection_predictor/data/no_bibs.pdf -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/data/spanless_bibs_3cf45514384bbb7d083ae53e19bdc22300e648ab.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_detection_predictor/data/spanless_bibs_3cf45514384bbb7d083ae53e19bdc22300e648ab.pdf -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_detection_predictor/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_detection_predictor/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_detection_predictor/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor_mmda/README.txt -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/data/test_data.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor_mmda/data/test_data.json.gz -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/data/test_data_v2_first_2_bibs_have_empty_span_groups.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor_mmda/data/test_data_v2_first_2_bibs_have_empty_span_groups.json.gz -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor_mmda/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/bibentry_predictor_mmda/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/bibentry_predictor_mmda/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/citation_links/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/citation_links/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_links/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/citation_links/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_links/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/data/arxiv-1906.08632-page0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_mentions/data/arxiv-1906.08632-page0.pdf -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/data/arxiv-1906.08632-pages1-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_mentions/data/arxiv-1906.08632-pages1-2.pdf -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/data/arxiv-2201.05673-page1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_mentions/data/arxiv-2201.05673-page1.pdf -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_mentions/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/citation_mentions/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/citation_mentions/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/config.yaml -------------------------------------------------------------------------------- /src/ai2_internal/dwp_heuristic/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/dwp_heuristic/README.txt -------------------------------------------------------------------------------- /src/ai2_internal/dwp_heuristic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/dwp_heuristic/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/dwp_heuristic/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/dwp_heuristic/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/dwp_heuristic/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/dwp_heuristic/test_fixtures/test_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/dwp_heuristic/test_fixtures/test_doc.json -------------------------------------------------------------------------------- /src/ai2_internal/evaluation_notebooks/end_to_end_eval_easier.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/evaluation_notebooks/end_to_end_eval_easier.ipynb -------------------------------------------------------------------------------- /src/ai2_internal/evaluation_notebooks/end_to_end_eval_grobid_comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/evaluation_notebooks/end_to_end_eval_grobid_comparison.ipynb -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/Create_fixtures_for_unit_test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/Create_fixtures_for_unit_test.ipynb -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/README.md -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/figure_table_timo_service_invocation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/figure_table_timo_service_invocation.ipynb -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/figure_table_timo_service_invocation_profiling.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/figure_table_timo_service_invocation_profiling.ipynb -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/performance_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/performance_metrics.png -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/test_fixtures/test_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/test_fixtures/test_doc.json -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.json -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_08f02e7888f140a76a00ed23fce2f2fc303a.pdf -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.json -------------------------------------------------------------------------------- /src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/figure_table_predictors/test_fixtures/test_doc_sha_d0450478c38dda61f9943f417ab9fcdb2ebeae0a.pdf -------------------------------------------------------------------------------- /src/ai2_internal/layout_parser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/layout_parser/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/layout_parser/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/layout_parser/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/layout_parser/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/shared_test_fixtures/page0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/shared_test_fixtures/page0.png -------------------------------------------------------------------------------- /src/ai2_internal/svm_word_predictor/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/svm_word_predictor/README.txt -------------------------------------------------------------------------------- /src/ai2_internal/svm_word_predictor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/svm_word_predictor/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/svm_word_predictor/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/svm_word_predictor/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/svm_word_predictor/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/svm_word_predictor/test_fixtures/test_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/svm_word_predictor/test_fixtures/test_doc.json -------------------------------------------------------------------------------- /src/ai2_internal/vila/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ai2_internal/vila/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/vila/integration_test.py -------------------------------------------------------------------------------- /src/ai2_internal/vila/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/vila/interface.py -------------------------------------------------------------------------------- /src/ai2_internal/vila/test_fixtures/test_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/ai2_internal/vila/test_fixtures/test_doc.json -------------------------------------------------------------------------------- /src/mmda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/eval/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/eval/metrics.py -------------------------------------------------------------------------------- /src/mmda/eval/s2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/eval/s2.py -------------------------------------------------------------------------------- /src/mmda/eval/vlue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/eval/vlue.py -------------------------------------------------------------------------------- /src/mmda/featurizers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/featurizers/citation_link_featurizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/featurizers/citation_link_featurizers.py -------------------------------------------------------------------------------- /src/mmda/parsers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/README.md -------------------------------------------------------------------------------- /src/mmda/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/__init__.py -------------------------------------------------------------------------------- /src/mmda/parsers/grobid.config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/grobid.config -------------------------------------------------------------------------------- /src/mmda/parsers/grobid_augment_existing_document_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/grobid_augment_existing_document_parser.py -------------------------------------------------------------------------------- /src/mmda/parsers/grobid_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/grobid_parser.py -------------------------------------------------------------------------------- /src/mmda/parsers/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/parser.py -------------------------------------------------------------------------------- /src/mmda/parsers/pdfplumber_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/pdfplumber_parser.py -------------------------------------------------------------------------------- /src/mmda/parsers/symbol_scraper_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/parsers/symbol_scraper_parser.py -------------------------------------------------------------------------------- /src/mmda/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/__init__.py -------------------------------------------------------------------------------- /src/mmda/predictors/base_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/base_predictors/base_heuristic_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/base_predictors/base_heuristic_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/base_predictors/base_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/base_predictors/base_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/d2_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/d2_predictors/bibentry_detection_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/d2_predictors/bibentry_detection_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/README.md -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/figure_table_predictors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/figure_table_predictors.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/section_header_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/section_header_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/sentence_boundary_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/sentence_boundary_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/heuristic_predictors/whitespace_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/heuristic_predictors/whitespace_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/base_hf_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/base_hf_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/bibentry_predictor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/bibentry_predictor/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/bibentry_predictor/types.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/bibentry_predictor/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/bibentry_predictor/utils.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/mention_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/mention_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/span_group_classification_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/span_group_classification_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/token_classification_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/token_classification_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/utils.py -------------------------------------------------------------------------------- /src/mmda/predictors/hf_predictors/vila_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/hf_predictors/vila_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/lp_predictors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/lp_predictors.py -------------------------------------------------------------------------------- /src/mmda/predictors/sklearn_predictors/base_sklearn_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/sklearn_predictors/base_sklearn_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/sklearn_predictors/svm_word_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/sklearn_predictors/svm_word_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/tesseract_predictors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/tesseract_predictors.py -------------------------------------------------------------------------------- /src/mmda/predictors/xgb_predictors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/predictors/xgb_predictors/citation_link_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/xgb_predictors/citation_link_predictor.py -------------------------------------------------------------------------------- /src/mmda/predictors/xgb_predictors/section_nesting_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/predictors/xgb_predictors/section_nesting_predictor.py -------------------------------------------------------------------------------- /src/mmda/rasterizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/rasterizers/__init__.py -------------------------------------------------------------------------------- /src/mmda/rasterizers/rasterizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/rasterizers/rasterizer.py -------------------------------------------------------------------------------- /src/mmda/recipes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/recipes/__init__.py -------------------------------------------------------------------------------- /src/mmda/recipes/core_recipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/recipes/core_recipe.py -------------------------------------------------------------------------------- /src/mmda/recipes/recipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/recipes/recipe.py -------------------------------------------------------------------------------- /src/mmda/types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/__init__.py -------------------------------------------------------------------------------- /src/mmda/types/annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/annotation.py -------------------------------------------------------------------------------- /src/mmda/types/box.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/box.py -------------------------------------------------------------------------------- /src/mmda/types/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/document.py -------------------------------------------------------------------------------- /src/mmda/types/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/image.py -------------------------------------------------------------------------------- /src/mmda/types/indexers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/indexers.py -------------------------------------------------------------------------------- /src/mmda/types/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/metadata.py -------------------------------------------------------------------------------- /src/mmda/types/names.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/names.py -------------------------------------------------------------------------------- /src/mmda/types/old/annotations.old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/annotations.old.py -------------------------------------------------------------------------------- /src/mmda/types/old/boundingbox.old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/boundingbox.old.py -------------------------------------------------------------------------------- /src/mmda/types/old/document.old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/document.old.py -------------------------------------------------------------------------------- /src/mmda/types/old/document_elements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/document_elements.py -------------------------------------------------------------------------------- /src/mmda/types/old/image.old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/image.old.py -------------------------------------------------------------------------------- /src/mmda/types/old/span.old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/old/span.old.py -------------------------------------------------------------------------------- /src/mmda/types/span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/span.py -------------------------------------------------------------------------------- /src/mmda/types/user_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/types/user_data.py -------------------------------------------------------------------------------- /src/mmda/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mmda/utils/outline_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/utils/outline_metadata.py -------------------------------------------------------------------------------- /src/mmda/utils/stringify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/utils/stringify.py -------------------------------------------------------------------------------- /src/mmda/utils/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/src/mmda/utils/tools.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/1903.10676.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/1903.10676.pdf -------------------------------------------------------------------------------- /tests/fixtures/2107.07170.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/2107.07170.pdf -------------------------------------------------------------------------------- /tests/fixtures/4be952924cd565488b4a239dc6549095029ee578.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/4be952924cd565488b4a239dc6549095029ee578.pdf -------------------------------------------------------------------------------- /tests/fixtures/4be952924cd565488b4a239dc6549095029ee578__pdfplumber_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/4be952924cd565488b4a239dc6549095029ee578__pdfplumber_doc.json -------------------------------------------------------------------------------- /tests/fixtures/doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.json -------------------------------------------------------------------------------- /tests/fixtures/doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/doc_fixture_2149e0c1106e6dfa36ea787167d6611cf88b69cb.pdf -------------------------------------------------------------------------------- /tests/fixtures/doc_fixture_e5910c027af0ee9c1901c57f6579d903aedee7f4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/doc_fixture_e5910c027af0ee9c1901c57f6579d903aedee7f4.pkl -------------------------------------------------------------------------------- /tests/fixtures/e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf -------------------------------------------------------------------------------- /tests/fixtures/example-dictionary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/example-dictionary.txt -------------------------------------------------------------------------------- /tests/fixtures/figure_table_predictions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/figure_table_predictions.json -------------------------------------------------------------------------------- /tests/fixtures/grobid-tei-maml-header.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid-tei-maml-header.xml -------------------------------------------------------------------------------- /tests/fixtures/grobid-tei-no-abstract.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid-tei-no-abstract.xml -------------------------------------------------------------------------------- /tests/fixtures/grobid-tei-no-title.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid-tei-no-title.xml -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.pdf -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4.xml -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4__pdfplumber_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4__pdfplumber_doc.json -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4_no_authors.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/e5910c027af0ee9c1901c57f6579d903aedee7f4_no_authors.xml -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/grobid-no-authors.config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/grobid-no-authors.config -------------------------------------------------------------------------------- /tests/fixtures/grobid_augment_existing_document_parser/grobid.config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/grobid_augment_existing_document_parser/grobid.config -------------------------------------------------------------------------------- /tests/fixtures/nesting.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/nesting.bin -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/hyphen_clf.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/hyphen_clf.joblib -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/neg_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/neg_words.txt -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/ohencoder.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/ohencoder.joblib -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/pos_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/pos_words.txt -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/scaler.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/scaler.joblib -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/svm_word_predictor.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/svm_word_predictor.tar.gz -------------------------------------------------------------------------------- /tests/fixtures/svm_word_predictor/unigram_probs.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/svm_word_predictor/unigram_probs.pkl -------------------------------------------------------------------------------- /tests/fixtures/test-uu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/test-uu.pdf -------------------------------------------------------------------------------- /tests/fixtures/types/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entry_span_groups_from_box_groups.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/types/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entry_span_groups_from_box_groups.json -------------------------------------------------------------------------------- /tests/fixtures/types/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/types/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json -------------------------------------------------------------------------------- /tests/fixtures/types/c8b53e2d9cd247e2d42719e337bfb13784d22bd2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/types/c8b53e2d9cd247e2d42719e337bfb13784d22bd2.json -------------------------------------------------------------------------------- /tests/fixtures/types/spp-dag-0-0-4-doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/types/spp-dag-0-0-4-doc.json -------------------------------------------------------------------------------- /tests/fixtures/types/test_document_box_groups.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/types/test_document_box_groups.json -------------------------------------------------------------------------------- /tests/fixtures/unicode-test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/unicode-test.json -------------------------------------------------------------------------------- /tests/fixtures/utils/121e30c48546e671dc5e16c694c5e69b392cf8fb_0.0.23.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/utils/121e30c48546e671dc5e16c694c5e69b392cf8fb_0.0.23.json -------------------------------------------------------------------------------- /tests/fixtures/utils/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entries.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/utils/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__bib_entries.json -------------------------------------------------------------------------------- /tests/fixtures/utils/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/fixtures/utils/20fdafb68d0e69d193527a9a1cbe64e7e69a3798__pdfplumber_doc.json -------------------------------------------------------------------------------- /tests/test_eval/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_eval/test_metrics.py -------------------------------------------------------------------------------- /tests/test_internal_ai2/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_internal_ai2/test_api.py -------------------------------------------------------------------------------- /tests/test_parsers/test_grobid_augment_existing_document_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_parsers/test_grobid_augment_existing_document_parser.py -------------------------------------------------------------------------------- /tests/test_parsers/test_grobid_header_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_parsers/test_grobid_header_parser.py -------------------------------------------------------------------------------- /tests/test_parsers/test_override.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_parsers/test_override.py -------------------------------------------------------------------------------- /tests/test_parsers/test_pdf_plumber_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_parsers/test_pdf_plumber_parser.py -------------------------------------------------------------------------------- /tests/test_predictors/test.json.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_predictors/test_bibentry_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_bibentry_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_dictionary_word_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_dictionary_word_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_figure_table_predictors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_figure_table_predictors.py -------------------------------------------------------------------------------- /tests/test_predictors/test_section_header_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_section_header_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_section_nesting_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_section_nesting_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_span_group_classification_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_span_group_classification_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_svm_word_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_svm_word_predictor.py -------------------------------------------------------------------------------- /tests/test_predictors/test_vila_predictors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_vila_predictors.py -------------------------------------------------------------------------------- /tests/test_predictors/test_whitespace_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_predictors/test_whitespace_predictor.py -------------------------------------------------------------------------------- /tests/test_recipes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_recipes/core_recipe_fixtures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_recipes/core_recipe_fixtures.py -------------------------------------------------------------------------------- /tests/test_recipes/test_core_recipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_recipes/test_core_recipe.py -------------------------------------------------------------------------------- /tests/test_types/test_annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_annotation.py -------------------------------------------------------------------------------- /tests/test_types/test_box.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_box.py -------------------------------------------------------------------------------- /tests/test_types/test_document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_document.py -------------------------------------------------------------------------------- /tests/test_types/test_indexers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_indexers.py -------------------------------------------------------------------------------- /tests/test_types/test_json_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_json_conversion.py -------------------------------------------------------------------------------- /tests/test_types/test_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_metadata.py -------------------------------------------------------------------------------- /tests/test_types/test_span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_span.py -------------------------------------------------------------------------------- /tests/test_types/test_span_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_types/test_span_group.py -------------------------------------------------------------------------------- /tests/test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_utils/test_outline_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_utils/test_outline_metadata.py -------------------------------------------------------------------------------- /tests/test_utils/test_stringify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_utils/test_stringify.py -------------------------------------------------------------------------------- /tests/test_utils/test_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/mmda/HEAD/tests/test_utils/test_tools.py --------------------------------------------------------------------------------