├── .gitignore ├── examples └── __init__.py ├── pdfstructure ├── __init__.py ├── analysis │ ├── __init__.py │ ├── annotate.py │ ├── sizemapper.py │ └── styledistribution.py ├── hierarchy │ ├── __init__.py │ ├── detectheader.py │ ├── headercompare.py │ ├── parser.py │ └── traversal.py ├── model │ ├── __init__.py │ ├── document.py │ └── style.py ├── printer.py ├── source.py └── utils.py ├── readme.md ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── helper.py ├── resources ├── 5648.pdf ├── IE00BM67HT60-ATB-FS-DE-2020-2-28.pdf ├── SameSize_BoldTitle.pdf ├── SameSize_EnumeratedTitle.pdf ├── SameStyleOnly.pdf ├── interview_cheatsheet-excerpt.png ├── interview_cheatsheet.pdf ├── lorem.pdf ├── paper.pdf ├── parsed │ ├── interview_cheatsheet.json │ └── interview_cheatsheet_pretty.txt └── samplepptx.pdf ├── test_custom_use_cases.py ├── test_document.py ├── test_headercompare.py ├── test_hierarchy.py ├── test_printer.py ├── test_style_analyser.py ├── test_traversal.py └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | .idea 3 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdfstructure/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdfstructure/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdfstructure/analysis/annotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/analysis/annotate.py -------------------------------------------------------------------------------- /pdfstructure/analysis/sizemapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/analysis/sizemapper.py -------------------------------------------------------------------------------- /pdfstructure/analysis/styledistribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/analysis/styledistribution.py -------------------------------------------------------------------------------- /pdfstructure/hierarchy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdfstructure/hierarchy/detectheader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/hierarchy/detectheader.py -------------------------------------------------------------------------------- /pdfstructure/hierarchy/headercompare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/hierarchy/headercompare.py -------------------------------------------------------------------------------- /pdfstructure/hierarchy/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/hierarchy/parser.py -------------------------------------------------------------------------------- /pdfstructure/hierarchy/traversal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/hierarchy/traversal.py -------------------------------------------------------------------------------- /pdfstructure/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pdfstructure/model/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/model/document.py -------------------------------------------------------------------------------- /pdfstructure/model/style.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/model/style.py -------------------------------------------------------------------------------- /pdfstructure/printer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/printer.py -------------------------------------------------------------------------------- /pdfstructure/source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/source.py -------------------------------------------------------------------------------- /pdfstructure/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/pdfstructure/utils.py -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/readme.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/helper.py -------------------------------------------------------------------------------- /tests/resources/5648.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/5648.pdf -------------------------------------------------------------------------------- /tests/resources/IE00BM67HT60-ATB-FS-DE-2020-2-28.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/IE00BM67HT60-ATB-FS-DE-2020-2-28.pdf -------------------------------------------------------------------------------- /tests/resources/SameSize_BoldTitle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/SameSize_BoldTitle.pdf -------------------------------------------------------------------------------- /tests/resources/SameSize_EnumeratedTitle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/SameSize_EnumeratedTitle.pdf -------------------------------------------------------------------------------- /tests/resources/SameStyleOnly.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/SameStyleOnly.pdf -------------------------------------------------------------------------------- /tests/resources/interview_cheatsheet-excerpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/interview_cheatsheet-excerpt.png -------------------------------------------------------------------------------- /tests/resources/interview_cheatsheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/interview_cheatsheet.pdf -------------------------------------------------------------------------------- /tests/resources/lorem.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/lorem.pdf -------------------------------------------------------------------------------- /tests/resources/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/paper.pdf -------------------------------------------------------------------------------- /tests/resources/parsed/interview_cheatsheet.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/parsed/interview_cheatsheet.json -------------------------------------------------------------------------------- /tests/resources/parsed/interview_cheatsheet_pretty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/parsed/interview_cheatsheet_pretty.txt -------------------------------------------------------------------------------- /tests/resources/samplepptx.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/resources/samplepptx.pdf -------------------------------------------------------------------------------- /tests/test_custom_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_custom_use_cases.py -------------------------------------------------------------------------------- /tests/test_document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_document.py -------------------------------------------------------------------------------- /tests/test_headercompare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_headercompare.py -------------------------------------------------------------------------------- /tests/test_hierarchy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_hierarchy.py -------------------------------------------------------------------------------- /tests/test_printer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_printer.py -------------------------------------------------------------------------------- /tests/test_style_analyser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_style_analyser.py -------------------------------------------------------------------------------- /tests/test_traversal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_traversal.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrizH/pdfstructure/HEAD/tests/test_utils.py --------------------------------------------------------------------------------