├── .gitignore ├── LICENSE ├── README.md ├── benchmarker ├── __init__.py ├── __version__.py ├── cli │ ├── __init__.py │ └── l5 │ │ ├── __init__.py │ │ ├── baselines │ │ └── test_generations.txt.jsonl │ │ ├── common │ │ ├── __init__.py │ │ ├── callbacks │ │ │ ├── __init__.py │ │ │ ├── custom_progress_bar.py │ │ │ ├── save_prediction_callback.py │ │ │ └── save_transformer_checkpoint.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── datamodule.py │ │ ├── loggers │ │ │ ├── __init__.py │ │ │ ├── better_mlflow_logger.py │ │ │ └── file_logger.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ └── accuracy_metric.py │ │ ├── pl_modules │ │ │ ├── __init__.py │ │ │ ├── base_lightning_module.py │ │ │ └── l5_generation_module.py │ │ └── utils.py │ │ ├── create_memmaps.py │ │ └── train.py ├── config │ ├── __init__.py │ └── benchmarker_config.py ├── data │ ├── __init__.py │ ├── data_converter.py │ ├── document.py │ ├── model │ │ ├── __init__.py │ │ ├── example.py │ │ ├── feature.py │ │ └── span.py │ ├── reader │ │ ├── __init__.py │ │ ├── benchmark_dataset.py │ │ ├── common.py │ │ ├── corpus.py │ │ └── qa_strategies.py │ ├── slicer.py │ ├── t5.py │ └── utils.py ├── embedding │ ├── __init__.py │ ├── base.py │ ├── factory │ │ ├── __init__.py │ │ ├── common.py │ │ └── context.py │ ├── image │ │ ├── __init__.py │ │ └── discrete_vae │ │ │ ├── __init__.py │ │ │ └── discrete_embeddings.py │ ├── multiple.py │ ├── relative │ │ ├── __init__.py │ │ └── relative.py │ └── wrapper.py ├── input_loader │ ├── __init__.py │ ├── common_format.py │ └── data_loader.py ├── model │ ├── __init__.py │ └── t5.py └── utils │ ├── __init__.py │ ├── cmp_helpers.py │ ├── pregenerated.py │ └── training.py ├── create_memmaps.sh ├── downloaders └── industry_documents │ ├── download_pdfs.py │ ├── file_ids.txt.gz │ └── requirements.txt ├── examples └── due │ └── no_pretrain │ ├── train_1d_DeepForm.sh │ ├── train_1d_DocVQA.sh │ ├── train_1d_InfographicsVQA.sh │ ├── train_1d_KleisterCharity.sh │ ├── train_1d_PWC.sh │ ├── train_1d_TabFact.sh │ ├── train_1d_WikiTableQuestions.sh │ ├── train_2d_DeepForm.sh │ ├── train_2d_DocVQA.sh │ ├── train_2d_InfographicsVQA.sh │ ├── train_2d_KleisterCharity.sh │ ├── train_2d_PWC.sh │ ├── train_2d_TabFact.sh │ └── train_2d_WikiTableQuestions.sh ├── postprocessors ├── converter.py └── converter_pwc.py ├── requirements.txt ├── requirements └── core.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/README.md -------------------------------------------------------------------------------- /benchmarker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/__init__.py -------------------------------------------------------------------------------- /benchmarker/__version__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/__version__.py -------------------------------------------------------------------------------- /benchmarker/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/cli/l5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/cli/l5/baselines/test_generations.txt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/baselines/test_generations.txt.jsonl -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/callbacks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/callbacks/__init__.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/callbacks/custom_progress_bar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/callbacks/custom_progress_bar.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/callbacks/save_prediction_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/callbacks/save_prediction_callback.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/callbacks/save_transformer_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/callbacks/save_transformer_checkpoint.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/data/__init__.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/data/datamodule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/data/datamodule.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/loggers/better_mlflow_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/loggers/better_mlflow_logger.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/loggers/file_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/loggers/file_logger.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/metrics/__init__.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/metrics/accuracy_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/metrics/accuracy_metric.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/pl_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/pl_modules/base_lightning_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/pl_modules/base_lightning_module.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/pl_modules/l5_generation_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/pl_modules/l5_generation_module.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/common/utils.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/create_memmaps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/create_memmaps.py -------------------------------------------------------------------------------- /benchmarker/cli/l5/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/cli/l5/train.py -------------------------------------------------------------------------------- /benchmarker/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/config/__init__.py -------------------------------------------------------------------------------- /benchmarker/config/benchmarker_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/config/benchmarker_config.py -------------------------------------------------------------------------------- /benchmarker/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/data/data_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/data_converter.py -------------------------------------------------------------------------------- /benchmarker/data/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/document.py -------------------------------------------------------------------------------- /benchmarker/data/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/model/__init__.py -------------------------------------------------------------------------------- /benchmarker/data/model/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/model/example.py -------------------------------------------------------------------------------- /benchmarker/data/model/feature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/model/feature.py -------------------------------------------------------------------------------- /benchmarker/data/model/span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/model/span.py -------------------------------------------------------------------------------- /benchmarker/data/reader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/reader/__init__.py -------------------------------------------------------------------------------- /benchmarker/data/reader/benchmark_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/reader/benchmark_dataset.py -------------------------------------------------------------------------------- /benchmarker/data/reader/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/reader/common.py -------------------------------------------------------------------------------- /benchmarker/data/reader/corpus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/reader/corpus.py -------------------------------------------------------------------------------- /benchmarker/data/reader/qa_strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/reader/qa_strategies.py -------------------------------------------------------------------------------- /benchmarker/data/slicer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/slicer.py -------------------------------------------------------------------------------- /benchmarker/data/t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/t5.py -------------------------------------------------------------------------------- /benchmarker/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/data/utils.py -------------------------------------------------------------------------------- /benchmarker/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/embedding/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/base.py -------------------------------------------------------------------------------- /benchmarker/embedding/factory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/factory/__init__.py -------------------------------------------------------------------------------- /benchmarker/embedding/factory/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/factory/common.py -------------------------------------------------------------------------------- /benchmarker/embedding/factory/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/factory/context.py -------------------------------------------------------------------------------- /benchmarker/embedding/image/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/embedding/image/discrete_vae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/embedding/image/discrete_vae/discrete_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/image/discrete_vae/discrete_embeddings.py -------------------------------------------------------------------------------- /benchmarker/embedding/multiple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/multiple.py -------------------------------------------------------------------------------- /benchmarker/embedding/relative/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/embedding/relative/relative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/relative/relative.py -------------------------------------------------------------------------------- /benchmarker/embedding/wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/embedding/wrapper.py -------------------------------------------------------------------------------- /benchmarker/input_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/input_loader/common_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/input_loader/common_format.py -------------------------------------------------------------------------------- /benchmarker/input_loader/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/input_loader/data_loader.py -------------------------------------------------------------------------------- /benchmarker/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/model/__init__.py -------------------------------------------------------------------------------- /benchmarker/model/t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/model/t5.py -------------------------------------------------------------------------------- /benchmarker/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarker/utils/cmp_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/utils/cmp_helpers.py -------------------------------------------------------------------------------- /benchmarker/utils/pregenerated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/utils/pregenerated.py -------------------------------------------------------------------------------- /benchmarker/utils/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/benchmarker/utils/training.py -------------------------------------------------------------------------------- /create_memmaps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/create_memmaps.sh -------------------------------------------------------------------------------- /downloaders/industry_documents/download_pdfs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/downloaders/industry_documents/download_pdfs.py -------------------------------------------------------------------------------- /downloaders/industry_documents/file_ids.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/downloaders/industry_documents/file_ids.txt.gz -------------------------------------------------------------------------------- /downloaders/industry_documents/requirements.txt: -------------------------------------------------------------------------------- 1 | botocore 2 | boto3 3 | tqdm 4 | 5 | -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_DeepForm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_DeepForm.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_DocVQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_DocVQA.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_InfographicsVQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_InfographicsVQA.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_KleisterCharity.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_KleisterCharity.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_PWC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_PWC.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_TabFact.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_TabFact.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_1d_WikiTableQuestions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_1d_WikiTableQuestions.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_DeepForm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_DeepForm.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_DocVQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_DocVQA.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_InfographicsVQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_InfographicsVQA.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_KleisterCharity.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_KleisterCharity.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_PWC.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_PWC.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_TabFact.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_TabFact.sh -------------------------------------------------------------------------------- /examples/due/no_pretrain/train_2d_WikiTableQuestions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/examples/due/no_pretrain/train_2d_WikiTableQuestions.sh -------------------------------------------------------------------------------- /postprocessors/converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/postprocessors/converter.py -------------------------------------------------------------------------------- /postprocessors/converter_pwc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/postprocessors/converter_pwc.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . # Uses setup.py 2 | -------------------------------------------------------------------------------- /requirements/core.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/requirements/core.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/due-benchmark/baselines/HEAD/setup.py --------------------------------------------------------------------------------