├── .github
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── config.yml
│ └── feature_request.md
├── example.png
├── layout-parser.png
├── lp.png
└── workflows
│ ├── ci.yml
│ └── release.yml
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── dev-requirements.txt
├── docs
├── Makefile
├── api_doc
│ ├── elements.rst
│ ├── io.rst
│ ├── models.rst
│ ├── ocr.rst
│ └── visualization.rst
├── conf.py
├── example
│ ├── deep_layout_parsing
│ │ ├── index.rst
│ │ ├── output_21_0.png
│ │ └── output_7_0.png
│ ├── load_coco
│ │ ├── index.rst
│ │ ├── output_10_0.png
│ │ ├── output_15_0.png
│ │ └── output_8_0.png
│ └── parse_ocr
│ │ ├── index.rst
│ │ ├── output_14_0.png
│ │ ├── output_17_0.png
│ │ ├── output_19_0.png
│ │ ├── output_25_0.png
│ │ └── output_6_1.png
├── index.rst
├── make.bat
└── notes
│ ├── installation.md
│ ├── intersection.png
│ ├── modelzoo.md
│ ├── quickstart.rst
│ ├── shape_operations.md
│ └── union.png
├── examples
├── Customizing Layout Models with Label Studio Annotation
│ ├── Customizing Layout Models with Label Studio Annotation.ipynb
│ ├── README.md
│ ├── download_annotation.py
│ ├── pipeline-overview.jpg
│ └── task-overview.png
├── Deep Layout Parsing.ipynb
├── Load and visualize layout annotations in the COCO format.ipynb
├── OCR Tables and Parse the Output.ipynb
└── data
│ ├── example-table.jpeg
│ └── paper-image.jpg
├── installation.md
├── setup.cfg
├── setup.py
├── src
└── layoutparser
│ ├── __init__.py
│ ├── elements
│ ├── __init__.py
│ ├── base.py
│ ├── errors.py
│ ├── layout.py
│ ├── layout_elements.py
│ └── utils.py
│ ├── file_utils.py
│ ├── io
│ ├── __init__.py
│ ├── basic.py
│ └── pdf.py
│ ├── misc
│ └── NotoSerifCJKjp-Regular.otf
│ ├── models
│ ├── __init__.py
│ ├── auto_layoutmodel.py
│ ├── base_catalog.py
│ ├── base_layoutmodel.py
│ ├── detectron2
│ │ ├── __init__.py
│ │ ├── catalog.py
│ │ └── layoutmodel.py
│ ├── effdet
│ │ ├── __init__.py
│ │ ├── catalog.py
│ │ └── layoutmodel.py
│ ├── model_config.py
│ └── paddledetection
│ │ ├── __init__.py
│ │ ├── catalog.py
│ │ └── layoutmodel.py
│ ├── ocr
│ ├── __init__.py
│ ├── base.py
│ ├── gcv_agent.py
│ └── tesseract_agent.py
│ ├── tools
│ ├── __init__.py
│ └── shape_operations.py
│ └── visualization.py
├── tests
├── fixtures
│ ├── io
│ │ ├── empty.pdf
│ │ ├── example.pdf
│ │ ├── generate_test_jsons.py
│ │ ├── interval.json
│ │ ├── interval_textblock.json
│ │ ├── layout.csv
│ │ ├── layout.json
│ │ ├── layout_textblock.csv
│ │ ├── layout_textblock.json
│ │ ├── quadrilateral.json
│ │ ├── quadrilateral_textblock.json
│ │ ├── rectangle.json
│ │ └── rectangle_textblock.json
│ ├── model
│ │ ├── config.yml
│ │ ├── layout_detection_reference.jpg
│ │ ├── layout_detection_reference.json
│ │ └── test_model_image.jpg
│ └── ocr
│ │ ├── test_gcv_image.jpg
│ │ ├── test_gcv_response.json
│ │ └── test_tesseract_response.pickle
├── test_elements.py
├── test_io.py
├── test_model.py
├── test_ocr.py
├── test_tools.py
└── test_visualization.py
└── tests_deps
├── test_file_utils.py
├── test_only_detectron2.py
├── test_only_effdet.py
└── test_only_paddledetection.py
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | In the interest of fostering an open and welcoming environment, we as
7 | contributors and maintainers pledge to make participation in our project and
8 | our community a harassment-free experience for everyone, regardless of age, body
9 | size, disability, ethnicity, sex characteristics, gender identity and expression,
10 | level of experience, education, socio-economic status, nationality, personal
11 | appearance, race, religion, or sexual identity and orientation.
12 |
13 | ## Our Standards
14 |
15 | Examples of behavior that contributes to creating a positive environment
16 | include:
17 |
18 | * Using welcoming and inclusive language
19 | * Being respectful of differing viewpoints and experiences
20 | * Gracefully accepting constructive criticism
21 | * Focusing on what is best for the community
22 | * Showing empathy towards other community members
23 |
24 | Examples of unacceptable behavior by participants include:
25 |
26 | * The use of sexualized language or imagery and unwelcome sexual attention or
27 | advances
28 | * Trolling, insulting/derogatory comments, and personal or political attacks
29 | * Public or private harassment
30 | * Publishing others' private information, such as a physical or electronic
31 | address, without explicit permission
32 | * Other conduct which could reasonably be considered inappropriate in a
33 | professional setting
34 |
35 | ## Our Responsibilities
36 |
37 | Project maintainers are responsible for clarifying the standards of acceptable
38 | behavior and are expected to take appropriate and fair corrective action in
39 | response to any instances of unacceptable behavior.
40 |
41 | Project maintainers have the right and responsibility to remove, edit, or
42 | reject comments, commits, code, wiki edits, issues, and other contributions
43 | that are not aligned to this Code of Conduct, or to ban temporarily or
44 | permanently any contributor for other behaviors that they deem inappropriate,
45 | threatening, offensive, or harmful.
46 |
47 | ## Scope
48 |
49 | This Code of Conduct applies within all project spaces, and it also applies when
50 | an individual is representing the project or its community in public spaces.
51 | Examples of representing a project or community include using an official
52 | project e-mail address, posting via an official social media account, or acting
53 | as an appointed representative at an online or offline event. Representation of
54 | a project may be further defined and clarified by project maintainers.
55 |
56 | ## Enforcement
57 |
58 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
59 | reported by contacting the project team at layoutparser@gmail.com. All
60 | complaints will be reviewed and investigated and will result in a response that
61 | is deemed necessary and appropriate to the circumstances. The project team is
62 | obligated to maintain confidentiality with regard to the reporter of an incident.
63 | Further details of specific enforcement policies may be posted separately.
64 |
65 | Project maintainers who do not follow or enforce the Code of Conduct in good
66 | faith may face temporary or permanent repercussions as determined by other
67 | members of the project's leadership.
68 |
69 | ## Attribution
70 |
71 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
72 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
73 |
74 | [homepage]: https://www.contributor-covenant.org
75 |
76 | For answers to common questions about this code of conduct, see
77 | https://www.contributor-covenant.org/faq
78 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Layout Parser
2 |
3 | 🙌 Thank you for reading this and plan to contribute! We hope you can join us and work on this exciting project that can transform document image analysis pipelines with the full power of Deep Learning.
4 |
5 | All kinds of contributions are welcome, including but not limited to:
6 |
7 | - Better documentation and examples for more use cases
8 | - New pre-trained layout detection models
9 | - New features
10 |
11 | ## Planned features
12 |
13 | We are planning to improve different aspects of Layout Parser, any feedbacks and contributions would be great!
14 |
15 | ### Layout Modeling
16 |
17 | (Pre-trained) layout models are one of the most important components in Layout Parser, and we are planning to broaden the support for layout models:
18 |
19 | - Support framework other than Detectron2, e.g., [MMOCR](https://github.com/open-mmlab/mmocr). It may lead to easier installation and support for more application scenarios like receipt or invoice detection.
20 | - Support segmentation-based models, e.g., [dhSegment](https://github.com/dhlab-epfl/dhSegment)
21 | - Better customized training of layout detection models, see [layout-model-training](https://github.com/Layout-Parser/layout-model-training)
22 | - Reproducing novel layout models in the current framework, e.g., [CascadeTabNet](https://github.com/DevashishPrasad/CascadeTabNet)
23 |
24 | We are also working on the Layout Parser platform that can support users' sharing their own models. Please check [community-platform](https://github.com/Layout-Parser/community-platform) for more detail.
25 |
26 | ### Advanced Layout Pipeline
27 |
28 | - Support defining `Pipeline` that specifies an end-to-end layout processing pipeline for complex documents
29 |
30 | ### Command Line Tool and Layout Detection Service
31 |
32 | Layout Parser can be easily turned into a command line tool or service to process documents in bulk
33 |
34 | - Build a command line tool based on `Click` that supports commands like `layoutparser process --path `
35 | - Build a RESTful Layout Parser service based on tools like `FastAPI` with similar supports as the command line tool
36 | - Performance improvements for such services
37 |
38 | ### Easy Installation and Deployment
39 |
40 | - Better ways for installing Detectron2 and related components on Windows machines
41 | - A Docker configuration for installing the Layout Parser
42 |
43 | ## How to Contribute?
44 |
45 | This how-to-guide is abridged from the [MMOCR Repository](https://github.com/open-mmlab/mmocr/blob/main/.github/CONTRIBUTING.md).
46 |
47 | ### Main Steps
48 |
49 | 1. Fork and pull the latest Layout Parser Repository
50 | 2. Checkout a new branch (do not use main branch for PRs)
51 | 3. Commit your changes
52 | 4. Create a PR
53 |
54 | **Notes**:
55 | 1. If you plan to add some new features that involve big changes, please open an issue to discuss with us first
56 | 2. If you are the author of some papers and would like to include your method into Layout Parser, please let us know (open an issue or contact the maintainers). Your contribution would be much appreciated.
57 | 3. For new features and new modules, unit tests are required to improve the code robustness
58 | 4. You might want to run `pip install -r dev-requirements.txt` to install the dev-dependencies.
59 |
60 | ### Code Style
61 |
62 | 1. We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
63 | 2. We use the following tools for linting and formatting:
64 | - pylint: linter
65 | - black: formatter
66 | 3. We suggest adding [type hints](https://docs.python.org/3/library/typing.html) for all APIs.
67 |
68 | Sincere thanks,
69 |
70 | Zejiang (Shannon) Shen
71 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: 'bug'
6 | assignees: ''
7 | ---
8 |
9 | **Describe the bug**
10 | A clear and concise description of what the bug is.
11 |
12 | **Checklist**
13 |
14 | 1. I have searched related issues but cannot get the expected help.
15 | 2. The bug has not been fixed in the latest version, see the [Layout Parser Releases](https://github.com/Layout-Parser/layout-parser/releases/)
16 |
17 | **To Reproduce**
18 | Steps to reproduce the behavior:
19 | 1. What command or script did you run?
20 | ```none
21 | A placeholder for the command.
22 | ```
23 |
24 | **Environment**
25 | 1. Please describe your Platform [Windows/MacOS/Linux]
26 | 2. Please show the Layout Parser version
27 | 2. You may add addition that may be helpful for locating the problem, such as
28 | - How you installed PyTorch [e.g., pip, conda, source]
29 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
30 |
31 | **Error traceback**
32 | If applicable, paste the error traceback here.
33 |
34 | **Screenshots**
35 | If applicable, add screenshots to help explain your problem.
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 | - name: Installation Guide
4 | url: https://layout-parser.readthedocs.io/en/latest/notes/installation.html
5 | about: |
6 | For any questions related to installation, especially installation on
7 | Windows platforms, please check the Installation Guide first.
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 | ---
8 |
9 | **Motivation**
10 | A clear and concise description of the motivation of the feature, and how relates to make Layout Parser better?
11 | You can also find examples in [Layout Parser CONTRIBUTING guidelines](../CONTRIBUTING.md)
12 |
13 | **Related resources**
14 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
15 |
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 |
--------------------------------------------------------------------------------
/.github/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/.github/example.png
--------------------------------------------------------------------------------
/.github/layout-parser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/.github/layout-parser.png
--------------------------------------------------------------------------------
/.github/lp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/.github/lp.png
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 | paths:
8 | - '**.py'
9 | pull_request:
10 |
11 | jobs:
12 |
13 | test_only_effdet_backend:
14 |
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v2
18 | - uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.7'
21 |
22 | - name: Test Dependency Support
23 | run: |
24 | pip install pytest
25 | pip install -e . # The bare layoutparser module
26 | pytest tests_deps/test_file_utils.py
27 |
28 | - name: Install only effdet deps
29 | run: |
30 | pip install pytest
31 | pip install -e ".[effdet]"
32 | pytest tests_deps/test_only_effdet.py
33 |
34 | test_only_detectron2_backend:
35 |
36 | runs-on: ubuntu-latest
37 | steps:
38 | - uses: actions/checkout@v2
39 | - uses: actions/setup-python@v2
40 | with:
41 | python-version: '3.7'
42 |
43 | - name: Install only Detectron2 deps
44 | run: |
45 | pip install pytest
46 | pip install -e .
47 | pip install torchvision && pip install "git+https://github.com/facebookresearch/detectron2.git@v0.5#egg=detectron2"
48 | pytest tests_deps/test_only_detectron2.py
49 |
50 | test_only_paddledetection_backend:
51 |
52 | runs-on: ubuntu-latest
53 | steps:
54 | - uses: actions/checkout@v2
55 | - uses: actions/setup-python@v2
56 | with:
57 | python-version: '3.7'
58 |
59 | - name: Install only PaddleDetection deps
60 | run: |
61 | pip install pytest
62 | pip install -e ".[paddledetection]"
63 | pytest tests_deps/test_only_paddledetection.py
64 | env:
65 | PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
66 |
67 | test_all_methods_all_backends:
68 | needs: [test_only_effdet_backend, test_only_detectron2_backend, test_only_paddledetection_backend]
69 | runs-on: ubuntu-latest
70 | strategy:
71 | matrix:
72 | python-version: [3.7, 3.8]
73 | steps:
74 | - uses: actions/checkout@v2
75 |
76 | - name: Set up Python ${{ matrix.python-version }}
77 | uses: actions/setup-python@v2
78 | with:
79 | python-version: ${{ matrix.python-version }}
80 |
81 | - name: Install library and dependencies
82 | run: |
83 | python -m pip install --upgrade pip
84 | pip install .
85 |
86 | - name: Lint with flake8
87 | run: |
88 | pip install flake8
89 | # stop the build if there are Python syntax errors or undefined names
90 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --ignore F821
91 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
92 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
93 |
94 | - name: Test with pytest
95 | run: |
96 | # Install additional requirements when running tests
97 | pip install ".[effdet]"
98 | pip install -r dev-requirements.txt
99 | pytest tests
100 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | release-pypi:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v2
12 | - name: Set up Python
13 | uses: actions/setup-python@v2
14 | with:
15 | python-version: '3.x'
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install --upgrade pip
19 | pip install setuptools wheel twine
20 | - name: Build and publish
21 | env:
22 | TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
23 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
24 | run: |
25 | python setup.py sdist bdist_wheel
26 | twine upload dist/*
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Examples files
2 | examples/Customizing Layout Models with Label Studio Annotation/downloaded-annotations
3 |
4 | *.bak
5 | .gitattributes
6 | .last_checked
7 | .gitconfig
8 | *.bak
9 | *.log
10 | *~
11 | ~*
12 | _tmp*
13 | tmp*
14 | tags
15 |
16 | # Byte-compiled / optimized / DLL files
17 | __pycache__/
18 | *.py[cod]
19 | *$py.class
20 |
21 | # C extensions
22 | *.so
23 |
24 | # Distribution / packaging
25 | .Python
26 | env/
27 | build/
28 | develop-eggs/
29 | dist/
30 | downloads/
31 | eggs/
32 | .eggs/
33 | lib/
34 | lib64/
35 | parts/
36 | sdist/
37 | var/
38 | wheels/
39 | *.egg-info/
40 | .installed.cfg
41 | *.egg
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .coverage
57 | .coverage.*
58 | .cache
59 | nosetests.xml
60 | coverage.xml
61 | *.cover
62 | .hypothesis/
63 |
64 | # Translations
65 | *.mo
66 | *.pot
67 |
68 | # Django stuff:
69 | *.log
70 | local_settings.py
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 |
82 | # PyBuilder
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # celery beat schedule file
92 | celerybeat-schedule
93 |
94 | # SageMath parsed files
95 | *.sage.py
96 |
97 | # dotenv
98 | .env
99 |
100 | # virtualenv
101 | .venv
102 | venv/
103 | ENV/
104 |
105 | # Spyder project settings
106 | .spyderproject
107 | .spyproject
108 |
109 | # Rope project settings
110 | .ropeproject
111 |
112 | # mkdocs documentation
113 | /site
114 |
115 | # mypy
116 | .mypy_cache/
117 |
118 | .vscode
119 | *.swp
120 |
121 | # osx generated files
122 | .DS_Store
123 | .DS_Store?
124 | .Trashes
125 | ehthumbs.db
126 | Thumbs.db
127 | .idea
128 |
129 | # pytest
130 | .pytest_cache
131 |
132 | # tools/trust-doc-nbs
133 | docs_src/.last_checked
134 |
135 | # symlinks to fastai
136 | docs_src/fastai
137 | tools/fastai
138 |
139 | # link checker
140 | checklink/cookies.txt
141 |
142 | # .gitconfig is now autogenerated
143 | .gitconfig
144 |
145 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | configuration: docs/conf.py
11 |
12 | # Optionally build your docs in additional formats such as PDF
13 | formats: all
14 |
15 | # Optionally set the version of Python and requirements required to build your docs
16 | python:
17 | version: 3.7
18 | install:
19 | - method: pip
20 | path: .
21 | extra_requirements:
22 | - effdet
23 | - requirements: dev-requirements.txt
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | include src/layoutparser/misc/*.otf
4 | recursive-exclude * __pycache__
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | A unified toolkit for Deep Learning Based Document Image Analysis
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | ---
21 |
22 | ## What is LayoutParser
23 |
24 | 
25 |
26 | LayoutParser aims to provide a wide range of tools that aims to streamline Document Image Analysis (DIA) tasks. Please check the LayoutParser [demo video](https://youtu.be/8yA5xB4Dg8c) (1 min) or [full talk](https://www.youtube.com/watch?v=YG0qepPgyGY) (15 min) for details. And here are some key features:
27 |
28 | - LayoutParser provides a rich repository of deep learning models for layout detection as well as a set of unified APIs for using them. For example,
29 |
30 |
31 | Perform DL layout detection in 4 lines of code
32 |
33 | ```python
34 | import layoutparser as lp
35 | model = lp.AutoLayoutModel('lp://EfficientDete/PubLayNet')
36 | # image = Image.open("path/to/image")
37 | layout = model.detect(image)
38 | ```
39 |
40 |
41 |
42 | - LayoutParser comes with a set of layout data structures with carefully designed APIs that are optimized for document image analysis tasks. For example,
43 |
44 |
45 | Selecting layout/textual elements in the left column of a page
46 |
47 | ```python
48 | image_width = image.size[0]
49 | left_column = lp.Interval(0, image_width/2, axis='x')
50 | layout.filter_by(left_column, center=True) # select objects in the left column
51 | ```
52 |
53 |
54 |
55 |
56 | Performing OCR for each detected Layout Region
57 |
58 | ```python
59 | ocr_agent = lp.TesseractAgent()
60 | for layout_region in layout:
61 | image_segment = layout_region.crop(image)
62 | text = ocr_agent.detect(image_segment)
63 | ```
64 |
65 |
66 |
67 |
68 | Flexible APIs for visualizing the detected layouts
69 |
70 | ```python
71 | lp.draw_box(image, layout, box_width=1, show_element_id=True, box_alpha=0.25)
72 | ```
73 |
74 |
75 |
76 |
77 |
78 |
79 | Loading layout data stored in json, csv, and even PDFs
80 |
81 | ```python
82 | layout = lp.load_json("path/to/json")
83 | layout = lp.load_csv("path/to/csv")
84 | pdf_layout = lp.load_pdf("path/to/pdf")
85 | ```
86 |
87 |
88 |
89 | - LayoutParser is also a open platform that enables the sharing of layout detection models and DIA pipelines among the community.
90 |
91 | Check the LayoutParser open platform
92 |
93 |
94 |
95 | Submit your models/pipelines to LayoutParser
96 |
97 |
98 | ## Installation
99 |
100 | After several major updates, layoutparser provides various functionalities and deep learning models from different backends. But it still easy to install layoutparser, and we designed the installation method in a way such that you can choose to install only the needed dependencies for your project:
101 |
102 | ```bash
103 | pip install layoutparser # Install the base layoutparser library with
104 | pip install "layoutparser[layoutmodels]" # Install DL layout model toolkit
105 | pip install "layoutparser[ocr]" # Install OCR toolkit
106 | ```
107 |
108 | Extra steps are needed if you want to use Detectron2-based models. Please check [installation.md](installation.md) for additional details on layoutparser installation.
109 |
110 | ## Examples
111 |
112 | We provide a series of examples for to help you start using the layout parser library:
113 |
114 | 1. [Table OCR and Results Parsing](https://github.com/Layout-Parser/layout-parser/blob/main/examples/OCR%20Tables%20and%20Parse%20the%20Output.ipynb): `layoutparser` can be used for conveniently OCR documents and convert the output in to structured data.
115 |
116 | 2. [Deep Layout Parsing Example](https://github.com/Layout-Parser/layout-parser/blob/main/examples/Deep%20Layout%20Parsing.ipynb): With the help of Deep Learning, `layoutparser` supports the analysis very complex documents and processing of the hierarchical structure in the layouts.
117 |
118 | ## Contributing
119 |
120 | We encourage you to contribute to Layout Parser! Please check out the [Contributing guidelines](.github/CONTRIBUTING.md) for guidelines about how to proceed. Join us!
121 |
122 | ## Citing `layoutparser`
123 |
124 | If you find `layoutparser` helpful to your work, please consider citing our tool and [paper](https://arxiv.org/pdf/2103.15348.pdf) using the following BibTeX entry.
125 |
126 | ```
127 | @article{shen2021layoutparser,
128 | title={LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis},
129 | author={Shen, Zejiang and Zhang, Ruochen and Dell, Melissa and Lee, Benjamin Charles Germain and Carlson, Jacob and Li, Weining},
130 | journal={arXiv preprint arXiv:2103.15348},
131 | year={2021}
132 | }
133 | ```
--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | torch
3 | numpy
4 | opencv-python
5 | pandas
6 | docutils==0.16
7 | Sphinx==3.0.0
8 | recommonmark==0.6.0
9 | sphinx-markdown-tables
10 | sphinx_rtd_theme
11 | google-cloud-vision==1
12 | pytesseract
13 | pycocotools
14 | git+https://github.com/facebookresearch/detectron2.git@v0.4#egg=detectron2
15 | paddlepaddle
16 | effdet
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/api_doc/elements.rst:
--------------------------------------------------------------------------------
1 | Layout Elements
2 | ================================
3 |
4 |
5 | Coordinate System
6 | --------------------------------
7 |
8 | .. autoclass:: layoutparser.elements.Interval
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 | .. autoclass:: layoutparser.elements.Rectangle
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
18 | .. autoclass:: layoutparser.elements.Quadrilateral
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 |
24 | TextBlock
25 | --------------------------------
26 |
27 | .. autoclass:: layoutparser.elements.TextBlock
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
31 |
32 | Layout
33 | --------------------------------
34 |
35 | .. autoclass:: layoutparser.elements.Layout
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/api_doc/io.rst:
--------------------------------------------------------------------------------
1 | Load and Export Layout Data
2 | ================================
3 |
4 |
5 | `Dataframe` and CSV
6 | --------------------------------
7 |
8 | .. autofunction:: layoutparser.io.load_dataframe
9 |
10 | .. autofunction:: layoutparser.io.load_csv
11 |
12 |
13 | `Dict` and JSON
14 | --------------------------------
15 |
16 | .. autofunction:: layoutparser.io.load_dict
17 |
18 | .. autofunction:: layoutparser.io.load_json
19 |
20 |
21 | PDF
22 | --------------------------------
23 |
24 | .. autofunction:: layoutparser.io.load_pdf
25 |
26 |
27 | Other Formats
28 | --------------------------------
29 | Stay tuned! We are working on to support more formats.
--------------------------------------------------------------------------------
/docs/api_doc/models.rst:
--------------------------------------------------------------------------------
1 | Layout Detection Models
2 | ================================
3 |
4 |
5 | .. autoclass:: layoutparser.models.Detectron2LayoutModel
6 | :members:
7 | :undoc-members:
8 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/api_doc/ocr.rst:
--------------------------------------------------------------------------------
1 | Text Recognition Tool
2 | ================================
3 |
4 |
5 | Google Cloud Vision API
6 | --------------------------------
7 |
8 | .. autoclass:: layoutparser.ocr.GCVFeatureType
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 | .. autoclass:: layoutparser.ocr.GCVAgent
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
18 |
19 | Tesseract OCR API
20 | --------------------------------
21 |
22 | .. autoclass:: layoutparser.ocr.TesseractFeatureType
23 | :members:
24 | :undoc-members:
25 | :show-inheritance:
26 |
27 | .. autoclass:: layoutparser.ocr.TesseractAgent
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/api_doc/visualization.rst:
--------------------------------------------------------------------------------
1 | Layout and Text Visualization
2 | ================================
3 |
4 | .. automodule:: layoutparser.visualization
5 | :members:
6 | :undoc-members:
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Configuration file for the Sphinx documentation builder.
16 | #
17 | # This file only contains a selection of the most common options. For a full
18 | # list see the documentation:
19 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
20 |
21 | # -- Path setup --------------------------------------------------------------
22 |
23 | # If extensions (or modules to document with autodoc) are in another directory,
24 | # add these directories to sys.path here. If the directory is relative to the
25 | # documentation root, use os.path.abspath to make it absolute, like shown here.
26 | #
27 | import os
28 | import sys
29 | sys.path.insert(0, os.path.abspath('../src'))
30 | import layoutparser
31 |
32 | # -- Project information -----------------------------------------------------
33 |
34 | project = 'Layout Parser'
35 | copyright = '2020-2021, Layout Parser Contributors'
36 | author = 'Layout Parser Contributors'
37 |
38 | # The full version, including alpha/beta/rc tags
39 | release = layoutparser.__version__
40 |
41 |
42 | # -- General configuration ---------------------------------------------------
43 |
44 | # Add any Sphinx extension module names here, as strings. They can be
45 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
46 | # ones.
47 | extensions = [
48 | "recommonmark",
49 | "sphinx.ext.autodoc",
50 | "sphinx.ext.napoleon",
51 | "sphinx.ext.intersphinx",
52 | "sphinx.ext.todo",
53 | "sphinx.ext.coverage",
54 | "sphinx.ext.mathjax",
55 | "sphinx.ext.viewcode",
56 | "sphinx.ext.githubpages",
57 | "sphinx_markdown_tables"
58 | ]
59 |
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ['_templates']
62 | source_suffix = [".rst", ".md"]
63 |
64 | # List of patterns, relative to source directory, that match files and
65 | # directories to ignore when looking for source files.
66 | # This pattern also affects html_static_path and html_extra_path.
67 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
68 |
69 |
70 | # -- Options for HTML output -------------------------------------------------
71 |
72 | # The theme to use for HTML and HTML Help pages. See the documentation for
73 | # a list of builtin themes.
74 | #
75 | html_theme = 'sphinx_rtd_theme'
76 |
77 | # Add any paths that contain custom static files (such as style sheets) here,
78 | # relative to this directory. They are copied after the builtin static files,
79 | # so a file named "default.css" will overwrite the builtin "default.css".
80 | html_static_path = ['_static']
81 |
82 |
83 | # Additional Configurations
84 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
85 | autodoc_member_order = 'bysource'
86 | autoclass_content = 'both'
87 |
88 | # [TODO] Solve the issue for functools.wrappers cause **kwargs in function declarations
--------------------------------------------------------------------------------
/docs/example/deep_layout_parsing/index.rst:
--------------------------------------------------------------------------------
1 | Deep Layout Parsing
2 | ===================
3 |
4 | In this tutorial, we will show how to use the ``layoutparser`` API to
5 |
6 | 1. Load Deep Learning Layout Detection models and predict the layout of
7 | the paper image
8 | 2. Use the coordinate system to parse the output
9 |
10 | The ``paper-image`` is from https://arxiv.org/abs/2004.08686.
11 |
12 | .. code:: python
13 |
14 | import layoutparser as lp
15 | import cv2
16 |
17 | Use Layout Models to detect complex layout
18 | ------------------------------------------
19 |
20 | ``layoutparser`` can identify the layout of the given document with only
21 | 4 lines of code.
22 |
23 | .. code:: python
24 |
25 | image = cv2.imread("data/paper-image.jpg")
26 | image = image[..., ::-1]
27 | # Convert the image from BGR (cv2 default loading style)
28 | # to RGB
29 |
30 | .. code:: python
31 |
32 | model = lp.Detectron2LayoutModel('lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config',
33 | extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8],
34 | label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"})
35 | # Load the deep layout model from the layoutparser API
36 | # For all the supported model, please check the Model
37 | # Zoo Page: https://layout-parser.readthedocs.io/en/latest/notes/modelzoo.html
38 |
39 | .. code:: python
40 |
41 | layout = model.detect(image)
42 | # Detect the layout of the input image
43 |
44 | .. code:: python
45 |
46 | lp.draw_box(image, layout, box_width=3)
47 | # Show the detected layout of the input image
48 |
49 |
50 |
51 |
52 | .. image:: output_7_0.png
53 |
54 |
55 |
56 | Check the results from the model
57 | --------------------------------
58 |
59 | .. code:: python
60 |
61 | type(layout)
62 |
63 |
64 |
65 |
66 | .. parsed-literal::
67 |
68 | layoutparser.elements.Layout
69 |
70 |
71 |
72 | The ``layout`` variables is a ``Layout`` instance, which is inherited
73 | from list and supports handy methods for layout processing.
74 |
75 | .. code:: python
76 |
77 | layout[0]
78 |
79 |
80 |
81 |
82 | .. parsed-literal::
83 |
84 | TextBlock(block=Rectangle(x_1=646.4182739257812, y_1=1420.1715087890625, x_2=1132.8687744140625, y_2=1479.7222900390625), text=, id=None, type=Text, parent=None, next=None, score=0.9996440410614014)
85 |
86 |
87 |
88 | ``layout`` contains a series of ``TextBlock``\ s. They store the
89 | coordinates in the ``.block`` variable and other information of the
90 | blocks like block type in ``.type``, text in ``.text``, etc. More
91 | information can be found at the
92 | `documentation `__.
93 |
94 | Use the coordinate system to process the detected layout
95 | --------------------------------------------------------
96 |
97 | Firstly we filter text region of specific type:
98 |
99 | .. code:: python
100 |
101 | text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
102 | figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
103 |
104 | As there could be text region detected inside the figure region, we just
105 | drop them:
106 |
107 | .. code:: python
108 |
109 | text_blocks = lp.Layout([b for b in text_blocks \
110 | if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
111 |
112 | Finally sort the text regions and assign ids:
113 |
114 | .. code:: python
115 |
116 | h, w = image.shape[:2]
117 |
118 | left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
119 |
120 | left_blocks = text_blocks.filter_by(left_interval, center=True)
121 | left_blocks.sort(key = lambda b:b.coordinates[1], inplace=True)
122 |
123 | right_blocks = [b for b in text_blocks if b not in left_blocks]
124 | right_blocks.sort(key = lambda b:b.coordinates[1], inplace=True)
125 |
126 | # And finally combine the two list and add the index
127 | # according to the order
128 | text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
129 |
130 | Visualize the cleaned text blocks:
131 |
132 | .. code:: python
133 |
134 | lp.draw_box(image, text_blocks,
135 | box_width=3,
136 | show_element_id=True)
137 |
138 |
139 |
140 |
141 | .. image:: output_21_0.png
142 |
143 |
144 |
145 | Fetch the text inside each text region
146 | ---------------------------------------
147 |
148 | We can also combine with the OCR functionality in ``layoutparser`` to
149 | fetch the text in the document.
150 |
151 | .. code:: python
152 |
153 | ocr_agent = lp.TesseractAgent(languages='eng')
154 | # Initialize the tesseract ocr engine. You might need
155 | # to install the OCR components in layoutparser:
156 | # pip install layoutparser[ocr]
157 |
158 | .. code:: python
159 |
160 | for block in text_blocks:
161 | segment_image = (block
162 | .pad(left=5, right=5, top=5, bottom=5)
163 | .crop_image(image))
164 | # add padding in each image segment can help
165 | # improve robustness
166 |
167 | text = ocr_agent.detect(segment_image)
168 | block.set(text=text, inplace=True)
169 |
170 | .. code:: python
171 |
172 | for txt in text_blocks.get_texts():
173 | print(txt, end='\n---\n')
174 |
175 |
176 | .. parsed-literal::
177 |
178 | Figure 7: Annotation Examples in HJDataset. (a) and (b) show two examples for the labeling of main pages. The boxes
179 | are colored differently to reflect the layout element categories. Illustrated in (c), the items in each index page row are
180 | categorized as title blocks, and the annotations are denser.
181 | ---
182 | tion over union (IOU) level [0.50:0.95]’, on the test data. In
183 | general, the high mAP values indicate accurate detection of
184 | the layout elements. The Faster R-CNN and Mask R-CNN
185 | achieve comparable results, better than RetinaNet. Notice-
186 | ably, the detections for small blocks like title are less pre-
187 | cise, and the accuracy drops sharply for the title category. In
188 | Figure 8, (a) and (b) illustrate the accurate prediction results
189 | of the Faster R-CNN model.
190 | ---
191 | We also examine how our dataset can help with
192 | world document digitization application. When digitizing
193 | new publications, researchers usually do not generate large
194 | scale ground truth data to train their layout analysis models.
195 | If they are able to adapt our dataset, or models trained on
196 | our dataset, to develop models on their data, they can build
197 | their pipelines more efficiently and develop more accurate
198 | models. To this end, we conduct two experiments. First we
199 | examine how layout analysis models trained on the main
200 | pages can be used for understanding index pages. More-
201 | over, we study how the pre-trained models perform on other
202 | historical Japanese documents.
203 | ---
204 | Table 4 compares the performance of five Faster R-CNN
205 | models that are trained differently on index pages. If the
206 | model loads pre-trained weights from HJDataset, it includes
207 | information learned from main pages. Models trained over
208 | ---
209 | ?This is a core metric developed for the COCO competition [| 2] for
210 | evaluating the object detection quality.
211 | ---
212 | all the training data can be viewed as the benchmarks, while
213 | training with few samples (five in this case) are consid-
214 | ered to mimic real-world scenarios. Given different train-
215 | ing data, models pre-trained on HJDataset perform signifi-
216 | cantly better than those initialized with COCO weights. In-
217 | tuitively, models trained on more data perform better than
218 | those with fewer samples. We also directly use the model
219 | trained on main to predict index pages without fine-
220 | tuning. The low zero-shot prediction accuracy indicates the
221 | dissimilarity between index and main pages. The large
222 | increase in mAP from 0.344 to 0.471 after the model is
223 | ---
224 | Table 3: Detection mAP @ IOU [0.50:0.95] of different
225 | models for each category on the test set. All values are given
226 | as percentages.
227 | ---
228 | * For training Mask R-CNN, the segmentation masks are the quadri-
229 | lateral regions for each block. Compared to the rectangular bounding
230 | boxes, they delineate the text region more accurately.
231 | ---
232 |
233 |
--------------------------------------------------------------------------------
/docs/example/deep_layout_parsing/output_21_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/deep_layout_parsing/output_21_0.png
--------------------------------------------------------------------------------
/docs/example/deep_layout_parsing/output_7_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/deep_layout_parsing/output_7_0.png
--------------------------------------------------------------------------------
/docs/example/load_coco/index.rst:
--------------------------------------------------------------------------------
1 | Load COCO Layout Annotations
2 | ==============================================================
3 |
4 | Preparation
5 | -----------
6 |
7 | In this notebook, I will illustrate how to use LayoutParser to load and
8 | visualize the layout annotation in the COCO format.
9 |
10 | Before starting, please remember to download PubLayNet annotations and
11 | images from their
12 | `website `__
13 | (let’s just use the validation set for now as the training set is very
14 | large). And let’s put all extracted files in the
15 | ``data/publaynet/annotations`` and ``data/publaynet/val`` folder.
16 |
17 | And we need to install an additional library for conveniently handling
18 | the COCO data format:
19 |
20 | .. code:: bash
21 |
22 | pip install pycocotools
23 |
24 | OK - Let’s get on the code:
25 |
26 | Loading and visualizing layouts using Layout-Parser
27 | ---------------------------------------------------
28 |
29 | .. code:: python
30 |
31 | from pycocotools.coco import COCO
32 | import layoutparser as lp
33 | import random
34 | import cv2
35 |
36 | .. code:: python
37 |
38 | def load_coco_annotations(annotations, coco=None):
39 | """
40 | Args:
41 | annotations (List):
42 | a list of coco annotaions for the current image
43 | coco (`optional`, defaults to `False`):
44 | COCO annotation object instance. If set, this function will
45 | convert the loaded annotation category ids to category names
46 | set in COCO.categories
47 | """
48 | layout = lp.Layout()
49 |
50 | for ele in annotations:
51 |
52 | x, y, w, h = ele['bbox']
53 |
54 | layout.append(
55 | lp.TextBlock(
56 | block = lp.Rectangle(x, y, w+x, h+y),
57 | type = ele['category_id'] if coco is None else coco.cats[ele['category_id']]['name'],
58 | id = ele['id']
59 | )
60 | )
61 |
62 | return layout
63 |
64 | The ``load_coco_annotations`` function will help convert COCO
65 | annotations into the layoutparser objects.
66 |
67 | .. code:: python
68 |
69 | COCO_ANNO_PATH = 'data/publaynet/annotations/val.json'
70 | COCO_IMG_PATH = 'data/publaynet/val'
71 |
72 | coco = COCO(COCO_ANNO_PATH)
73 |
74 |
75 | .. parsed-literal::
76 |
77 | loading annotations into memory...
78 | Done (t=1.17s)
79 | creating index...
80 | index created!
81 |
82 |
83 | .. code:: python
84 |
85 | color_map = {
86 | 'text': 'red',
87 | 'title': 'blue',
88 | 'list': 'green',
89 | 'table': 'purple',
90 | 'figure': 'pink',
91 | }
92 |
93 |
94 | for image_id in random.sample(coco.imgs.keys(), 1):
95 | image_info = coco.imgs[image_id]
96 | annotations = coco.loadAnns(coco.getAnnIds([image_id]))
97 |
98 | image = cv2.imread(f'{COCO_IMG_PATH}/{image_info["file_name"]}')
99 | layout = load_coco_annotations(annotations, coco)
100 |
101 | viz = lp.draw_box(image, layout, color_map=color_map)
102 | display(viz) # show the results
103 |
104 |
105 |
106 | .. image:: output_8_0.png
107 |
108 |
109 | You could add more information in the visualization.
110 |
111 | .. code:: python
112 |
113 | lp.draw_box(image,
114 | [b.set(id=f'{b.id}/{b.type}') for b in layout],
115 | color_map=color_map,
116 | show_element_id=True, id_font_size=10,
117 | id_text_background_color='grey',
118 | id_text_color='white')
119 |
120 |
121 |
122 |
123 | .. image:: output_10_0.png
124 |
125 |
126 |
127 | Model Predictions on loaded data
128 | --------------------------------
129 |
130 | We could also check how the trained layout model performs on the input
131 | image. Following this
132 | `instruction `__,
133 | we could conveniently load a layout prediction model and run predictions
134 | on the existing image.
135 |
136 | .. code:: python
137 |
138 | model = lp.Detectron2LayoutModel('lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config',
139 | extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8],
140 | label_map={0: "text", 1: "title", 2: "list", 3:"table", 4:"figure"})
141 |
142 | .. code:: python
143 |
144 | layout_predicted = model.detect(image)
145 |
146 | .. code:: python
147 |
148 | lp.draw_box(image,
149 | [b.set(id=f'{b.type}/{b.score:.2f}') for b in layout_predicted],
150 | color_map=color_map,
151 | show_element_id=True, id_font_size=10,
152 | id_text_background_color='grey',
153 | id_text_color='white')
154 |
155 |
156 |
157 |
158 | .. image:: output_15_0.png
159 |
160 |
161 |
--------------------------------------------------------------------------------
/docs/example/load_coco/output_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/load_coco/output_10_0.png
--------------------------------------------------------------------------------
/docs/example/load_coco/output_15_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/load_coco/output_15_0.png
--------------------------------------------------------------------------------
/docs/example/load_coco/output_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/load_coco/output_8_0.png
--------------------------------------------------------------------------------
/docs/example/parse_ocr/output_14_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/parse_ocr/output_14_0.png
--------------------------------------------------------------------------------
/docs/example/parse_ocr/output_17_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/parse_ocr/output_17_0.png
--------------------------------------------------------------------------------
/docs/example/parse_ocr/output_19_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/parse_ocr/output_19_0.png
--------------------------------------------------------------------------------
/docs/example/parse_ocr/output_25_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/parse_ocr/output_25_0.png
--------------------------------------------------------------------------------
/docs/example/parse_ocr/output_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/example/parse_ocr/output_6_1.png
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. Layout Parser documentation master file, created by
2 | sphinx-quickstart on Sun Jun 14 23:23:41 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Layout Parser's documentation!
7 | ================================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Notes
12 |
13 | notes/installation.md
14 | notes/modelzoo.md
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 | :caption: Examples
19 |
20 | example/parse_ocr/index
21 | example/deep_layout_parsing/index
22 | example/load_coco/index
23 |
24 | .. toctree::
25 | :maxdepth: 2
26 | :caption: API Reference
27 |
28 | api_doc/elements
29 | notes/shape_operations.md
30 | api_doc/ocr
31 | api_doc/models
32 | api_doc/visualization
33 | api_doc/io
34 |
35 | Indices and tables
36 | ==================
37 |
38 | * :ref:`genindex`
39 | * :ref:`search`
40 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/notes/installation.md:
--------------------------------------------------------------------------------
1 | ../../installation.md
--------------------------------------------------------------------------------
/docs/notes/intersection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/notes/intersection.png
--------------------------------------------------------------------------------
/docs/notes/modelzoo.md:
--------------------------------------------------------------------------------
1 | # Model Zoo
2 |
3 | We provide a spectrum of pre-trained models on different datasets.
4 |
5 | ## Example Usage:
6 |
7 | ```python
8 | import layoutparser as lp
9 | model = lp.Detectron2LayoutModel(
10 | config_path ='lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config', # In model catalog
11 | label_map ={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}, # In model`label_map`
12 | extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8] # Optional
13 | )
14 | model.detect(image)
15 | ```
16 |
17 | ## Model Catalog
18 |
19 | | Dataset | Model | Config Path | Eval Result (mAP) |
20 | |-----------------------------------------------------------------------|--------------------------------------------------------------------------------------------|--------------------------------------------------------|---------------------------------------------------------------------------|
21 | | [HJDataset](https://dell-research-harvard.github.io/HJDataset/) | [faster_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1) | lp://HJDataset/faster_rcnn_R_50_FPN_3x/config | |
22 | | [HJDataset](https://dell-research-harvard.github.io/HJDataset/) | [mask_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1) | lp://HJDataset/mask_rcnn_R_50_FPN_3x/config | |
23 | | [HJDataset](https://dell-research-harvard.github.io/HJDataset/) | [retinanet_R_50_FPN_3x](https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1) | lp://HJDataset/retinanet_R_50_FPN_3x/config | |
24 | | [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | [faster_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1) | lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config | |
25 | | [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | [mask_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1) | lp://PubLayNet/mask_rcnn_R_50_FPN_3x/config | |
26 | | [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | [mask_rcnn_X_101_32x8d_FPN_3x](https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1) | lp://PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/config | 88.98 [eval.csv](https://www.dropbox.com/s/15ytg3fzmc6l59x/eval.csv?dl=0) |
27 | | [PrimaLayout](https://www.primaresearch.org/dataset/) | [mask_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1) | lp://PrimaLayout/mask_rcnn_R_50_FPN_3x/config | 69.35 [eval.csv](https://www.dropbox.com/s/9uuql57uedvb9mo/eval.csv?dl=0) |
28 | | [NewspaperNavigator](https://news-navigator.labs.loc.gov/) | [faster_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1) | lp://NewspaperNavigator/faster_rcnn_R_50_FPN_3x/config | |
29 | | [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) | [faster_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1) | lp://TableBank/faster_rcnn_R_50_FPN_3x/config | 89.78 [eval.csv](https://www.dropbox.com/s/1uwnz58hxf96iw2/eval.csv?dl=0) |
30 | | [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) | [faster_rcnn_R_101_FPN_3x](https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1) | lp://TableBank/faster_rcnn_R_101_FPN_3x/config | 91.26 [eval.csv](https://www.dropbox.com/s/e1kq8thkj2id1li/eval.csv?dl=0) |
31 | | [Math Formula Detection(MFD)](http://transcriptorium.eu/~htrcontest/MathsICDAR2021/) | [faster_rcnn_R_50_FPN_3x](https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1) | lp://MFD/faster_rcnn_R_50_FPN_3x/config | 79.68 [eval.csv](https://www.dropbox.com/s/1yvrs29jjybrlpw/eval.csv?dl=0) |
32 |
33 |
34 | * For PubLayNet models, we suggest using `mask_rcnn_X_101_32x8d_FPN_3x` model as it's trained on the whole training set, while others are only trained on the validation set (the size is only around 1/50). You could expect a 15% AP improvement using the `mask_rcnn_X_101_32x8d_FPN_3x` model.
35 |
36 | ## Model `label_map`
37 |
38 | | Dataset | Label Map |
39 | | ------------------------------------------------------------ | ------------------------------------------------------------ |
40 | | [HJDataset](https://dell-research-harvard.github.io/HJDataset/) | `{1:"Page Frame", 2:"Row", 3:"Title Region", 4:"Text Region", 5:"Title", 6:"Subtitle", 7:"Other"}` |
41 | | [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | `{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}` |
42 | | [PrimaLayout](https://www.primaresearch.org/dataset/) | `{1:"TextRegion", 2:"ImageRegion", 3:"TableRegion", 4:"MathsRegion", 5:"SeparatorRegion", 6:"OtherRegion"}` |
43 | | [NewspaperNavigator](https://news-navigator.labs.loc.gov/) | `{0: "Photograph", 1: "Illustration", 2: "Map", 3: "Comics/Cartoon", 4: "Editorial Cartoon", 5: "Headline", 6: "Advertisement"}` |
44 | | [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) | `{0: "Table"}` |
45 | | [MFD](http://transcriptorium.eu/~htrcontest/MathsICDAR2021/) | `{1: "Equation"}` |
--------------------------------------------------------------------------------
/docs/notes/quickstart.rst:
--------------------------------------------------------------------------------
1 | Quickstart
2 | ================================
3 |
4 |
5 | Installation
6 | --------------------------------
7 |
8 | Use pip or conda to install the library:
9 |
10 | .. code-block:: bash
11 |
12 | pip install layoutparser
13 |
14 | # Install Detectron2 for using DL Layout Detection Model
15 | pip install 'git+https://github.com/facebookresearch/detectron2.git@v0.1.3#egg=detectron2'
16 |
17 | # Install the ocr components when necessary
18 | pip install layoutparser[ocr]
19 |
20 | This by default will install the CPU version of the Detectron2, and it should be able to run on most of the computers. But if you have a GPU, you can consider the GPU version of the Detectron2, referring to the `official instructions `_.
--------------------------------------------------------------------------------
/docs/notes/shape_operations.md:
--------------------------------------------------------------------------------
1 | # Shape Operations
2 |
3 | [BETA: the API and behavior *will* be changed in the future.]
4 |
5 | Starting from v0.2, Layout Parser provides supports for two types of shape operations, `union` and `intersection`, across all `BaseCoordElement`s and `TextBlock`. We've made some design choices to construct a set of generalized APIs across different shape classes, detailed as follows:
6 |
7 | ## The `union` Operation
8 |
9 | 
10 | ▲ The Illustration of Union Operations. The resulting matrix are symmetric so only the lower triangular region is left empty. Each cell shows the visualization of the shape objects, their coordinates, and their object class. For the output visualization, the gray and dashed line delineates the original obj1 and obj2, respectively, for reference.
11 |
12 | **Notes**:
13 | 1. The x-interval and y-interval are both from the `Interval` Class but with different axes. It's ill-defined to union two intervals from different axes so in this case Layout Parser will raise an `InvalidShapeError`.
14 | 2. The union of two rectangles is still a rectangle, which is the minimum covering rectangle of the two input rectangles.
15 | 3. For the outputs associated with `Quadrilateral` inputs, please see details in the [Problems related to the Quadrilateral Class](#problems-related-to-the-quadrilateral-class) section.
16 |
17 | ## The `intersect` Operation
18 |
19 | 
20 | ▲ The Illustration of Union Operations. Similar to the previous visualization, the resulting matrix are symmetric so only the lower triangular region is left empty. Each cell shows the visualization of the shape objects, their coordinates, and their object class. For the output visualization, the gray and dashed line delineates the original obj1 and obj2, respectively, for reference.
21 |
22 | ## Problems related to the `Quadrilateral` Class
23 |
24 | It is possible to generate arbitrary shapes when performing shape operations on `Quadrilateral` objects. Currently Layout Parser does not provide the support for `Polygon` objects (but we plan to support that object in the near future), thus it becomes tricky to add support for these operations for `Quadrilateral`. The temporary solution is that:
25 | 1. When performing shape operations on `Quadrilateral` objects, Layout Parser will raise `NotSupportedShapeError`.
26 | 2. A workaround is to set `strict=False` in the input (i.e., `obj1.union(obj2, strict=False)`). In this case, any quadrilateral objects will be converted to `Rectangle`s first and the operation is executed. The results may not be *strictly* equivalent to those performed on the original objects.
--------------------------------------------------------------------------------
/docs/notes/union.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/docs/notes/union.png
--------------------------------------------------------------------------------
/examples/Customizing Layout Models with Label Studio Annotation/README.md:
--------------------------------------------------------------------------------
1 |
2 |
Customizing LayoutParser Models with Label Studio Annotation
3 | With Scientific Document Parsing as an example
4 |
5 | ---
6 |
7 | [Webinar Video](https://www.youtube.com/watch?v=puOKTFXRyr4) | [Slides](https://szj.io/assets/files/talks/2022-Feb-LayoutParser-and-Label-Studio-Webinar.pdf) | [Notebooks](Customizing%20Layout%20Models%20with%20Label%20Studio%20Annotation.ipynb)
8 |
9 |
10 | 
11 |
--------------------------------------------------------------------------------
/examples/Customizing Layout Models with Label Studio Annotation/download_annotation.py:
--------------------------------------------------------------------------------
1 | import pdf2image
2 | import tempfile
3 | import urllib.request
4 | import pandas as pd
5 | import zipfile
6 |
7 | opener = urllib.request.build_opener()
8 | opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
9 | urllib.request.install_opener(opener)
10 |
11 | def download_auxiliary_paper_images(target_path: str = "downloaded-annotations"):
12 |
13 | data_to_download = pd.DataFrame(
14 | [
15 | ["1810.04805v2", 10, "1810.04805v2-10_ea8f.jpg"],
16 | ["1810.04805v2", 11, "1810.04805v2-11_213f.jpg"],
17 | ["1810.04805v2", 9, "1810.04805v2-9_dc05.jpg"],
18 | ["1908.03557v1", 10, "1908.03557v1-10_fa12.jpg"],
19 | ["1908.03557v1", 11, "1908.03557v1-11_a737.jpg"],
20 | ],
21 | columns=["arxiv_id", "page", "filename"],
22 | )
23 |
24 | for arxiv_id, gp in data_to_download.groupby("arxiv_id"):
25 | with tempfile.TemporaryDirectory() as tempdir:
26 | arxiv_link = f"http://arxiv.org/pdf/{arxiv_id}.pdf"
27 | urllib.request.urlretrieve(arxiv_link, f"{tempdir}/{arxiv_id}.pdf")
28 | pdf_images = pdf2image.convert_from_path(
29 | f"{tempdir}/{arxiv_id}.pdf", dpi=72
30 | )
31 | for _, row in gp.iterrows():
32 | pdf_images[row["page"]].save(f"{target_path}/images/{row['filename']}")
33 |
34 |
35 | ANNOTATION_FILE_PATH = "http://szj.io/assets/files/data/layoutparser-webinar-annotations-2022-Feb.zip"
36 |
37 | def download_zipped_annotations():
38 | filehandle, _ = urllib.request.urlretrieve(ANNOTATION_FILE_PATH)
39 | zip_ref = zipfile.ZipFile(filehandle, 'r')
40 | zip_ref.extractall("./") # extract file to dir
41 | zip_ref.close() # close file
42 |
43 | if __name__ == "__main__":
44 | download_zipped_annotations()
45 | download_auxiliary_paper_images()
--------------------------------------------------------------------------------
/examples/Customizing Layout Models with Label Studio Annotation/pipeline-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/examples/Customizing Layout Models with Label Studio Annotation/pipeline-overview.jpg
--------------------------------------------------------------------------------
/examples/Customizing Layout Models with Label Studio Annotation/task-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/examples/Customizing Layout Models with Label Studio Annotation/task-overview.png
--------------------------------------------------------------------------------
/examples/data/example-table.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/examples/data/example-table.jpeg
--------------------------------------------------------------------------------
/examples/data/paper-image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/examples/data/paper-image.jpg
--------------------------------------------------------------------------------
/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | ## Install Python
4 |
5 | LayoutParser is a Python package that requires Python >= 3.6. If you do not have Python installed on your computer, you might want to turn to [the official instruction](https://www.python.org/downloads/) to download and install the appropriate version of Python.
6 |
7 |
8 |
9 | ## Install the LayoutParser library
10 |
11 | After several major updates, LayoutParser provides various functionalities and deep learning models from different backends. However, you might only need a fraction of the functions, and it would be redundant for you to install all the dependencies when they are not required. Therefore, we design highly customizable ways for installing the LayoutParser library:
12 |
13 |
14 | | Command | Description |
15 | | --- | --- |
16 | | `pip install layoutparser` | **Install the base LayoutParser Library**
It will support all key functions in LayoutParser, including:
1. Layout Data Structure and operations
2. Layout Visualization
3. Load/export the layout data |
17 | | `pip install "layoutparser[effdet]"` | **Install LayoutParser with Layout Detection Model Support**
It will install the LayoutParser base library as well as
supporting dependencies for the ***EfficientDet***-based layout detection models. |
18 | | `pip install layoutparser torchvision && pip install "git+https://github.com/facebookresearch/detectron2.git@v0.5#egg=detectron2"` | **Install LayoutParser with Layout Detection Model Support**
It will install the LayoutParser base library as well as
supporting dependencies for the ***Detectron2***-based layout detection models. See details in [Additional Instruction: Install Detectron2 Layout Model Backend](#additional-instruction-install-detectron2-layout-model-backend). |
19 | | `pip install "layoutparser[paddledetection]"` | **Install LayoutParser with Layout Detection Model Support**
It will install the LayoutParser base library as well as
supporting dependencies for the ***PaddleDetection***-based layout detection models. |
20 | | `pip install "layoutparser[ocr]"` | **Install LayoutParser with OCR Support**
It will install the LayoutParser base library as well as
supporting dependencies for performing OCRs. See details in [Additional Instruction: Install OCR utils](#additional-instruction-install-ocr-utils). |
21 |
22 | ### Additional Instruction: Install Detectron2 Layout Model Backend
23 |
24 | #### For Mac OS and Linux Users
25 |
26 | If you would like to use the Detectron2 models for layout detection, you might need to run the following command:
27 |
28 | ```bash
29 | pip install layoutparser torchvision && pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.5#egg=detectron2"
30 | ```
31 |
32 | This might take some time as the command will *compile* the library. If you also want to install a Detectron2 version
33 | with GPU support or encounter some issues during the installation process, please refer to the official Detectron2
34 | [installation instruction](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md) for detailed
35 | information.
36 |
37 | #### For Windows users
38 |
39 | As reported by many users, the installation of Detectron2 can be rather tricky on Windows platforms. In our extensive tests, we find that it is nearly impossible to provide a one-line installation command for Windows users. As a workaround solution, for now we list the possible challenges for installing Detectron2 on Windows, and attach helpful resources for solving them. We are also investigating other possibilities to avoid installing Detectron2 to use pre-trained models. If you have any suggestions or ideas, please feel free to [submit an issue](https://github.com/Layout-Parser/layout-parser/issues) in our repo.
40 |
41 | 1. Challenges for installing `pycocotools`
42 | - You can find detailed instructions on [this post](https://changhsinlee.com/pycocotools/) from Chang Hsin Lee.
43 | - Another solution is try to install `pycocotools-windows`, see https://github.com/cocodataset/cocoapi/issues/415.
44 | 2. Challenges for installing `Detectron2`
45 | - [@ivanpp](https://github.com/ivanpp) curates a detailed description for installing `Detectron2` on Windows: [Detectron2 walkthrough (Windows)](https://ivanpp.cc/detectron2-walkthrough-windows/#step3installdetectron2)
46 | - `Detectron2` maintainers claim that they won't provide official support for Windows (see [1](https://github.com/facebookresearch/detectron2/issues/9#issuecomment-540974288) and [2](https://detectron2.readthedocs.io/en/latest/tutorials/install.html)), but Detectron2 is continuously built on windows with CircleCI (see [3](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md#common-installation-issues)). Hopefully this situation will be improved in the future.
47 |
48 |
49 | ### Additional Instructions: Install OCR utils
50 |
51 | Layout Parser also comes with supports for OCR functions. In order to use them, you need to install the OCR utils via:
52 |
53 | ```bash
54 | pip install "layoutparser[ocr]"
55 | ```
56 |
57 | Additionally, if you want to use the Tesseract-OCR engine, you also need to install it on your computer. Please check the
58 | [official documentation](https://tesseract-ocr.github.io/tessdoc/Installation.html) for detailed installation instructions.
59 |
60 | ## Known issues
61 |
62 | Error: instantiating `lp.GCVAgent.with_credential` returns module 'google.cloud.vision' has no attribute 'types'.
63 |
64 |
65 | In this case, you have a newer version of the google-cloud-vision. Please consider downgrading the API using:
66 | ```bash
67 | pip install -U layoutparser[ocr]
68 | ```
69 |
70 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = layoutparser
3 | description = Layout Parser is a deep learning assisted tool for Document Image Layout Analysis.
4 | keywords = layout analysis, deep learning
5 | license = Apache-2.0
6 | classifiers =
7 | Intended Audience :: Developers
8 | Intended Audience :: Education
9 | Intended Audience :: Science/Research
10 | License :: OSI Approved :: Apache Software License
11 | Programming Language :: Python :: 3
12 | Programming Language :: Python :: 3.6
13 | Programming Language :: Python :: 3.7
14 | Programming Language :: Python :: 3.8
15 | Programming Language :: Python :: 3.9
16 | Topic :: Scientific/Engineering :: Artificial Intelligence
17 |
18 | [options]
19 | zip_safe = False
20 | package_dir=
21 | =src
22 | packages=find:
23 |
24 | [options.packages.find]
25 | where=src
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from setuptools import setup, find_packages
16 | import os
17 |
18 | # A trick from https://github.com/jina-ai/jina/blob/79b302c93b01689e82cf4b52f46522eb7497c404/setup.py#L20
19 | pkg_name = 'layoutparser'
20 | libinfo_py = os.path.join('src', pkg_name, '__init__.py')
21 | libinfo_content = open(libinfo_py, 'r', encoding='utf8').readlines()
22 | version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][0]
23 | exec(version_line) # gives __version__
24 |
25 | setup(name = "layoutparser",
26 | version = __version__,
27 | author = "Zejiang Shen, Ruochen Zhang, and Layout Parser Model Contributors",
28 | author_email = "layoutparser@gmail.com",
29 | license = "Apache-2.0",
30 | url = "https://github.com/Layout-Parser/layout-parser",
31 | package_dir = {"": "src"},
32 | packages = find_packages("src"),
33 | description = "A unified toolkit for Deep Learning Based Document Image Analysis",
34 | long_description=open("README.md", "r", encoding="utf-8").read(),
35 | long_description_content_type="text/markdown",
36 | python_requires='>=3.6',
37 | install_requires=[
38 | "numpy",
39 | "opencv-python",
40 | "scipy",
41 | "pandas",
42 | "pillow",
43 | "pyyaml>=5.1",
44 | "iopath",
45 | "pdfplumber",
46 | "pdf2image",
47 | ],
48 | extras_require={
49 | "ocr": [
50 | 'google-cloud-vision==1',
51 | 'pytesseract'
52 | ],
53 | "gcv": [
54 | 'google-cloud-vision==1',
55 | ],
56 | "tesseract": [
57 | 'pytesseract'
58 | ],
59 | "layoutmodels": [
60 | "torch",
61 | "torchvision",
62 | "effdet"
63 | ],
64 | "effdet": [
65 | "torch",
66 | "torchvision",
67 | "effdet"
68 | ],
69 | "paddledetection": [
70 | "paddlepaddle==2.1.0"
71 | ],
72 | },
73 | include_package_data=True
74 | )
--------------------------------------------------------------------------------
/src/layoutparser/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __version__ = "0.3.4"
16 |
17 | import sys
18 |
19 | from .file_utils import (
20 | _LazyModule,
21 | is_detectron2_available,
22 | is_paddle_available,
23 | is_effdet_available,
24 | is_pytesseract_available,
25 | is_gcv_available,
26 | )
27 |
28 | _import_structure = {
29 | "elements": [
30 | "Interval",
31 | "Rectangle",
32 | "Quadrilateral",
33 | "TextBlock",
34 | "Layout"
35 | ],
36 | "visualization": [
37 | "draw_box",
38 | "draw_text"
39 | ],
40 | "io": [
41 | "load_json",
42 | "load_dict",
43 | "load_csv",
44 | "load_dataframe",
45 | "load_pdf"
46 | ],
47 | "file_utils":[
48 | "is_torch_available",
49 | "is_torch_cuda_available",
50 | "is_detectron2_available",
51 | "is_paddle_available",
52 | "is_pytesseract_available",
53 | "is_gcv_available",
54 | "requires_backends"
55 | ],
56 | "tools": [
57 | "generalized_connected_component_analysis_1d",
58 | "simple_line_detection",
59 | "group_textblocks_based_on_category"
60 | ]
61 | }
62 |
63 | _import_structure["models"] = ["AutoLayoutModel"]
64 |
65 | if is_detectron2_available():
66 | _import_structure["models.detectron2"] = ["Detectron2LayoutModel"]
67 |
68 | if is_paddle_available():
69 | _import_structure["models.paddledetection"] = ["PaddleDetectionLayoutModel"]
70 |
71 | if is_effdet_available():
72 | _import_structure["models.effdet"] = ["EfficientDetLayoutModel"]
73 |
74 | if is_pytesseract_available():
75 | _import_structure["ocr.tesseract_agent"] = [
76 | "TesseractAgent",
77 | "TesseractFeatureType",
78 | ]
79 |
80 | if is_gcv_available():
81 | _import_structure["ocr.gcv_agent"] = ["GCVAgent", "GCVFeatureType"]
82 |
83 | sys.modules[__name__] = _LazyModule(
84 | __name__,
85 | globals()["__file__"],
86 | _import_structure,
87 | module_spec=__spec__,
88 | extra_objects={"__version__": __version__},
89 | )
90 |
--------------------------------------------------------------------------------
/src/layoutparser/elements/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .base import BaseCoordElement, BaseLayoutElement
16 | from .layout_elements import (
17 | Interval,
18 | Rectangle,
19 | Quadrilateral,
20 | TextBlock,
21 | ALL_BASECOORD_ELEMENTS,
22 | BASECOORD_ELEMENT_NAMEMAP,
23 | BASECOORD_ELEMENT_INDEXMAP,
24 | )
25 | from .layout import Layout
--------------------------------------------------------------------------------
/src/layoutparser/elements/errors.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | class NotSupportedShapeError(Exception):
16 | """For now (v0.2), if the created shape might be a polygon (shapes with more than 4 vertices),
17 | layoutparser will raise NotSupportedShapeError. It is expected to be fixed in the future versions.
18 | See
19 | :ref:`shape_operations:problems-related-to-the-quadrilateral-class`.
20 | """
21 |
22 |
23 | class InvalidShapeError(Exception):
24 | """For shape operations like intersection of union, lp will raise the InvalidShapeError when
25 | invalid shapes are created (e.g., intersecting a rectangle and an interval).
26 | """
--------------------------------------------------------------------------------
/src/layoutparser/elements/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import List, Union, Dict, Dict, Any, Optional, Tuple
16 |
17 | import numpy as np
18 | from PIL import Image
19 |
20 |
21 | def cvt_coordinates_to_points(coords: Tuple[float, float, float, float]) -> np.ndarray:
22 |
23 | x_1, y_1, x_2, y_2 = coords
24 | return np.array(
25 | [
26 | [x_1, y_1], # Top Left
27 | [x_2, y_1], # Top Right
28 | [x_2, y_2], # Bottom Right
29 | [x_1, y_2], # Bottom Left
30 | ]
31 | )
32 |
33 |
34 | def cvt_points_to_coordinates(points: np.ndarray) -> Tuple[float, float, float, float]:
35 | x_1 = points[:, 0].min()
36 | y_1 = points[:, 1].min()
37 | x_2 = points[:, 0].max()
38 | y_2 = points[:, 1].max()
39 | return (x_1, y_1, x_2, y_2)
40 |
41 |
42 | def perspective_transformation(
43 | M: np.ndarray, points: np.ndarray, is_inv: bool = False
44 | ) -> np.ndarray:
45 |
46 | if is_inv:
47 | M = np.linalg.inv(M)
48 |
49 | src_mid = np.hstack([points, np.ones((points.shape[0], 1))]).T # 3x4
50 | dst_mid = np.matmul(M, src_mid)
51 |
52 | dst = (dst_mid / dst_mid[-1]).T[:, :2] # 4x2
53 |
54 | return dst
55 |
56 |
57 | def vertice_in_polygon(vertice: np.ndarray, polygon_points: np.ndarray) -> bool:
58 | # The polygon_points are ordered clockwise
59 |
60 | # The implementation is based on the algorithm from
61 | # https://demonstrations.wolfram.com/AnEfficientTestForAPointToBeInAConvexPolygon/
62 |
63 | points = polygon_points - vertice # shift the coordinates origin to the vertice
64 | edges = np.append(points, points[0:1, :], axis=0)
65 | return all([np.linalg.det([e1, e2]) >= 0 for e1, e2 in zip(edges, edges[1:])])
66 | # If the points are ordered clockwise, the det should <=0
67 |
68 |
69 | def polygon_area(xs: np.ndarray, ys: np.ndarray) -> float:
70 | """Calculate the area of polygons using
71 | `Shoelace Formula `_.
72 |
73 | Args:
74 | xs (`np.ndarray`): The x coordinates of the points
75 | ys (`np.ndarray`): The y coordinates of the points
76 | """
77 |
78 | # Refer to: https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
79 | # The formula is equivalent to the original one indicated in the wikipedia
80 | # page.
81 |
82 | return 0.5 * np.abs(np.dot(xs, np.roll(ys, 1)) - np.dot(ys, np.roll(xs, 1)))
--------------------------------------------------------------------------------
/src/layoutparser/file_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Some code are adapted from
16 | # https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
17 |
18 | from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
19 | import sys
20 | import os
21 | import logging
22 | import importlib.util
23 | from types import ModuleType
24 |
25 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
26 |
27 | # The package importlib_metadata is in a different place, depending on the python version.
28 | if sys.version_info < (3, 8):
29 | import importlib_metadata
30 | else:
31 | import importlib.metadata as importlib_metadata
32 |
33 | ###########################################
34 | ############ Layout Model Deps ############
35 | ###########################################
36 |
37 | _torch_available = importlib.util.find_spec("torch") is not None
38 | try:
39 | _torch_version = importlib_metadata.version("torch")
40 | logger.debug(f"PyTorch version {_torch_version} available.")
41 | except importlib_metadata.PackageNotFoundError:
42 | _torch_available = False
43 |
44 | _detectron2_available = importlib.util.find_spec("detectron2") is not None
45 | try:
46 | _detectron2_version = importlib_metadata.version("detectron2")
47 | logger.debug(f"Detectron2 version {_detectron2_version} available")
48 | except importlib_metadata.PackageNotFoundError:
49 | _detectron2_available = False
50 |
51 | _paddle_available = importlib.util.find_spec("paddle") is not None
52 | try:
53 | # The name of the paddlepaddle library:
54 | # Install name: pip install paddlepaddle
55 | # Import name: import paddle
56 | _paddle_version = importlib_metadata.version("paddlepaddle")
57 | logger.debug(f"Paddle version {_paddle_version} available.")
58 | except importlib_metadata.PackageNotFoundError:
59 | _paddle_available = False
60 |
61 | _effdet_available = importlib.util.find_spec("effdet") is not None
62 | try:
63 | _effdet_version = importlib_metadata.version("effdet")
64 | logger.debug(f"Effdet version {_effdet_version} available.")
65 | except importlib_metadata.PackageNotFoundError:
66 | _effdet_version = False
67 |
68 | ###########################################
69 | ############## OCR Tool Deps ##############
70 | ###########################################
71 |
72 | _pytesseract_available = importlib.util.find_spec("pytesseract") is not None
73 | try:
74 | _pytesseract_version = importlib_metadata.version("pytesseract")
75 | logger.debug(f"Pytesseract version {_pytesseract_version} available.")
76 | except importlib_metadata.PackageNotFoundError:
77 | _pytesseract_available = False
78 |
79 | try:
80 | _gcv_available = importlib.util.find_spec("google.cloud.vision") is not None
81 | try:
82 | _gcv_version = importlib_metadata.version(
83 | "google-cloud-vision"
84 | ) # This is slightly different
85 | logger.debug(f"Google Cloud Vision Utils version {_gcv_version} available.")
86 | except importlib_metadata.PackageNotFoundError:
87 | _gcv_available = False
88 | except ModuleNotFoundError:
89 | _gcv_available = False
90 |
91 |
92 | def is_torch_available():
93 | return _torch_available
94 |
95 |
96 | def is_torch_cuda_available():
97 | if is_torch_available():
98 | import torch
99 |
100 | return torch.cuda.is_available()
101 | else:
102 | return False
103 |
104 |
105 | def is_detectron2_available():
106 | return _detectron2_available
107 |
108 |
109 | def is_paddle_available():
110 | return _paddle_available
111 |
112 |
113 | def is_effdet_available():
114 | return _effdet_available
115 |
116 |
117 | def is_pytesseract_available():
118 | return _pytesseract_available
119 |
120 |
121 | def is_gcv_available():
122 | return _gcv_available
123 |
124 |
125 | PYTORCH_IMPORT_ERROR = """
126 | {0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
127 | installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
128 | """
129 |
130 | DETECTRON2_IMPORT_ERROR = """
131 | {0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the
132 | installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones
133 | that match your environment. Typically the following would work for MacOS or Linux CPU machines:
134 | pip install 'git+https://github.com/facebookresearch/detectron2.git@v0.4#egg=detectron2'
135 | """
136 |
137 | PADDLE_IMPORT_ERROR = """
138 | {0} requires the PaddlePaddle library but it was not found in your environment. Checkout the instructions on the
139 | installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
140 | """
141 |
142 | EFFDET_IMPORT_ERROR = """
143 | {0} requires the effdet library but it was not found in your environment. You can install it with pip:
144 | `pip install effdet`
145 | """
146 |
147 | PYTESSERACT_IMPORT_ERROR = """
148 | {0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
149 | `pip install pytesseract`
150 | """
151 |
152 | GCV_IMPORT_ERROR = """
153 | {0} requires the Google Cloud Vision Python utils but it was not found in your environment. You can install it with pip:
154 | `pip install google-cloud-vision==1`
155 | """
156 |
157 | BACKENDS_MAPPING = dict(
158 | [
159 | ("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
160 | ("detectron2", (is_detectron2_available, DETECTRON2_IMPORT_ERROR)),
161 | ("paddle", (is_paddle_available, PADDLE_IMPORT_ERROR)),
162 | ("effdet", (is_effdet_available, EFFDET_IMPORT_ERROR)),
163 | ("pytesseract", (is_pytesseract_available, PYTESSERACT_IMPORT_ERROR)),
164 | ("google-cloud-vision", (is_gcv_available, GCV_IMPORT_ERROR)),
165 | ]
166 | )
167 |
168 |
169 | def requires_backends(obj, backends):
170 | if not isinstance(backends, (list, tuple)):
171 | backends = [backends]
172 |
173 | name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
174 | if not all(BACKENDS_MAPPING[backend][0]() for backend in backends):
175 | raise ImportError(
176 | "".join([BACKENDS_MAPPING[backend][1].format(name) for backend in backends])
177 | )
178 |
179 |
180 | class _LazyModule(ModuleType):
181 | """
182 | Module class that surfaces all objects but only performs associated imports when the objects are requested.
183 | """
184 |
185 | # Adapted from HuggingFace
186 | # https://github.com/huggingface/transformers/blob/c37573806ab3526dd805c49cbe2489ad4d68a9d7/src/transformers/file_utils.py#L1990
187 |
188 | def __init__(
189 | self, name, module_file, import_structure, module_spec=None, extra_objects=None
190 | ):
191 | super().__init__(name)
192 | self._modules = set(import_structure.keys())
193 | self._class_to_module = {}
194 | for key, values in import_structure.items():
195 | for value in values:
196 | self._class_to_module[value] = key
197 | # Needed for autocompletion in an IDE
198 | self.__all__ = list(import_structure.keys()) + sum(
199 | import_structure.values(), []
200 | )
201 | self.__file__ = module_file
202 | self.__spec__ = module_spec
203 | self.__path__ = [os.path.dirname(module_file)]
204 | self._objects = {} if extra_objects is None else extra_objects
205 | self._name = name
206 | self._import_structure = import_structure
207 |
208 | # Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
209 | # The __package__ variable should be set
210 | # https://docs.python.org/3/reference/import.html#__package__
211 | self.__package__ = self.__name__
212 |
213 | # Needed for autocompletion in an IDE
214 | def __dir__(self):
215 | return super().__dir__() + self.__all__
216 |
217 | def __getattr__(self, name: str) -> Any:
218 | if name in self._objects:
219 | return self._objects[name]
220 | if name in self._modules:
221 | value = self._get_module(name)
222 | elif name in self._class_to_module.keys():
223 | module = self._get_module(self._class_to_module[name])
224 | value = getattr(module, name)
225 | else:
226 | raise AttributeError(f"module {self.__name__} has no attribute {name}")
227 |
228 | setattr(self, name, value)
229 | return value
230 |
231 | def _get_module(self, module_name: str):
232 | return importlib.import_module("." + module_name, self.__name__)
233 |
234 | def __reduce__(self):
235 | return (self.__class__, (self._name, self.__file__, self._import_structure))
236 |
--------------------------------------------------------------------------------
/src/layoutparser/io/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic import load_json, load_dict, load_csv, load_dataframe
2 | from .pdf import load_pdf
--------------------------------------------------------------------------------
/src/layoutparser/io/basic.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import ast
16 | import json
17 | from typing import List, Union, Dict, Dict, Any
18 |
19 | import pandas as pd
20 |
21 | from ..elements import (
22 | BaseLayoutElement,
23 | TextBlock,
24 | Layout,
25 | BASECOORD_ELEMENT_NAMEMAP,
26 | )
27 |
28 |
29 | def load_json(filename: str) -> Union[BaseLayoutElement, Layout]:
30 | """Load a JSON file and save it as a layout object with appropriate data types.
31 |
32 | Args:
33 | filename (str):
34 | The name of the JSON file.
35 |
36 | Returns:
37 | Union[BaseLayoutElement, Layout]:
38 | Based on the JSON file format, it will automatically parse
39 | the type of the data and load it accordingly.
40 | """
41 | with open(filename, "r") as fp:
42 | res = json.load(fp)
43 |
44 | return load_dict(res)
45 |
46 |
47 | def load_dict(data: Union[Dict, List[Dict]]) -> Union[BaseLayoutElement, Layout]:
48 | """Load a dict of list of dict representations of some layout data,
49 | automatically parse its type, and save it as any of BaseLayoutElement
50 | or Layout datatype.
51 |
52 | Args:
53 | data (Union[Dict, List]):
54 | A dict of list of dict representations of the layout data
55 |
56 | Raises:
57 | ValueError:
58 | If the data format is incompatible with the layout-data-JSON format,
59 | raise a `ValueError`.
60 | ValueError:
61 | If any `block_type` name is not in the available list of layout element
62 | names defined in `BASECOORD_ELEMENT_NAMEMAP`, raise a `ValueError`.
63 |
64 | Returns:
65 | Union[BaseLayoutElement, Layout]:
66 | Based on the dict format, it will automatically parse the type of
67 | the data and load it accordingly.
68 | """
69 | if isinstance(data, dict):
70 | if "page_data" in data:
71 | # It is a layout instance
72 | return Layout(load_dict(data["blocks"])._blocks, page_data=data["page_data"])
73 | else:
74 |
75 | if data["block_type"] not in BASECOORD_ELEMENT_NAMEMAP:
76 | raise ValueError(f"Invalid block_type {data['block_type']}")
77 |
78 | # Check if it is a textblock
79 | is_textblock = any(ele in data for ele in TextBlock._features)
80 | if is_textblock:
81 | return TextBlock.from_dict(data)
82 | else:
83 | return BASECOORD_ELEMENT_NAMEMAP[data["block_type"]].from_dict(data)
84 |
85 | elif isinstance(data, list):
86 | return Layout([load_dict(ele) for ele in data])
87 |
88 | else:
89 | raise ValueError(f"Invalid input JSON structure.")
90 |
91 |
92 | def load_csv(filename: str, block_type: str = None) -> Layout:
93 | """Load the Layout object from the given CSV file.
94 |
95 | Args:
96 | filename (str):
97 | The name of the CSV file. A row of the table represents
98 | an individual layout element.
99 |
100 | block_type (str):
101 | If there's no block_type column in the CSV file,
102 | you must pass in a block_type variable such that layout parser
103 | can appropriately detect the type of the layout elements.
104 |
105 | Returns:
106 | Layout:
107 | The parsed Layout object from the CSV file.
108 | """
109 |
110 | return load_dataframe(pd.read_csv(filename), block_type=block_type)
111 |
112 |
113 | def load_dataframe(df: pd.DataFrame, block_type: str = None) -> Layout:
114 | """Load the Layout object from the given dataframe.
115 |
116 | Args:
117 | df (pd.DataFrame):
118 |
119 | block_type (str):
120 | If there's no block_type column in the CSV file,
121 | you must pass in a block_type variable such that layout parser
122 | can appropriately detect the type of the layout elements.
123 |
124 | Returns:
125 | Layout:
126 | The parsed Layout object from the CSV file.
127 | """
128 | df = df.copy()
129 | if "points" in df.columns:
130 | if df["points"].dtype == object:
131 | df["points"] = df["points"].map(
132 | lambda x: ast.literal_eval(x) if not pd.isna(x) else x
133 | )
134 |
135 | if block_type is None:
136 | if "block_type" not in df.columns:
137 | raise ValueError(
138 | "`block_type` not specified both in dataframe and arguments"
139 | )
140 | else:
141 | df["block_type"] = block_type
142 |
143 | if any(col in TextBlock._features for col in df.columns):
144 | # Automatically setting index for textblock
145 | if "id" not in df.columns:
146 | df["id"] = df.index
147 |
148 | return load_dict(df.apply(lambda x: x.dropna().to_dict(), axis=1).to_list())
149 |
--------------------------------------------------------------------------------
/src/layoutparser/io/pdf.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import List, Union, Optional, Dict, Tuple
16 |
17 | import pdfplumber
18 | import pandas as pd
19 |
20 | from ..elements import Layout
21 | from .basic import load_dataframe
22 |
23 | DEFAULT_PDF_DPI = 72
24 |
25 |
26 | def extract_words_for_page(
27 | page: pdfplumber.page.Page,
28 | x_tolerance=1.5,
29 | y_tolerance=2,
30 | keep_blank_chars=False,
31 | use_text_flow=True,
32 | horizontal_ltr=True,
33 | vertical_ttb=True,
34 | extra_attrs=None,
35 | ) -> Layout:
36 | """The helper function used for extracting words from a pdfplumber page
37 | object.
38 |
39 | Returns:
40 | Layout: a layout object representing all extracted pdf tokens on this page.
41 | """
42 | if extra_attrs is None:
43 | extra_attrs = ["fontname", "size"]
44 |
45 | tokens = page.extract_words(
46 | x_tolerance=x_tolerance,
47 | y_tolerance=y_tolerance,
48 | keep_blank_chars=keep_blank_chars,
49 | use_text_flow=use_text_flow,
50 | horizontal_ltr=horizontal_ltr,
51 | vertical_ttb=vertical_ttb,
52 | extra_attrs=extra_attrs,
53 | )
54 |
55 | df = pd.DataFrame(tokens)
56 |
57 | if len(df) == 0:
58 | return Layout()
59 |
60 | df[["x0", "x1"]] = (
61 | df[["x0", "x1"]].clip(lower=0, upper=int(page.width)).astype("float")
62 | )
63 | df[["top", "bottom"]] = (
64 | df[["top", "bottom"]].clip(lower=0, upper=int(page.height)).astype("float")
65 | )
66 |
67 | page_tokens = load_dataframe(
68 | df.reset_index().rename(
69 | columns={
70 | "x0": "x_1",
71 | "x1": "x_2",
72 | "top": "y_1",
73 | "bottom": "y_2",
74 | "index": "id",
75 | "fontname": "type", # also loading fontname as "type"
76 | }
77 | ),
78 | block_type="rectangle",
79 | )
80 |
81 | return page_tokens
82 |
83 |
84 | def load_pdf(
85 | filename: str,
86 | load_images: bool = False,
87 | x_tolerance: int = 1.5,
88 | y_tolerance: int = 2,
89 | keep_blank_chars: bool = False,
90 | use_text_flow: bool = True,
91 | horizontal_ltr: bool = True,
92 | vertical_ttb: bool = True,
93 | extra_attrs: Optional[List[str]] = None,
94 | dpi: int = DEFAULT_PDF_DPI,
95 | ) -> Union[List[Layout], Tuple[List[Layout], List["Image.Image"]]]:
96 | """Load all tokens for each page from a PDF file, and save them
97 | in a list of Layout objects with the original page order.
98 |
99 | Args:
100 | filename (str): The path to the PDF file.
101 | load_images (bool, optional):
102 | Whether load screenshot for each page of the PDF file.
103 | When set to true, the function will return both the layout and
104 | screenshot image for each page.
105 | Defaults to False.
106 | x_tolerance (int, optional):
107 | The threshold used for extracting "word tokens" from the pdf file.
108 | It will merge the pdf characters into a word token if the difference
109 | between the x_2 of one character and the x_1 of the next is less than
110 | or equal to x_tolerance. See details in `pdf2plumber's documentation
111 | `_.
112 | Defaults to 1.5.
113 | y_tolerance (int, optional):
114 | The threshold used for extracting "word tokens" from the pdf file.
115 | It will merge the pdf characters into a word token if the difference
116 | between the y_2 of one character and the y_1 of the next is less than
117 | or equal to y_tolerance. See details in `pdf2plumber's documentation
118 | `_.
119 | Defaults to 2.
120 | keep_blank_chars (bool, optional):
121 | When keep_blank_chars is set to True, it will treat blank characters
122 | are treated as part of a word, not as a space between words. See
123 | details in `pdf2plumber's documentation
124 | `_.
125 | Defaults to False.
126 | use_text_flow (bool, optional):
127 | When use_text_flow is set to True, it will use the PDF's underlying
128 | flow of characters as a guide for ordering and segmenting the words,
129 | rather than presorting the characters by x/y position. (This mimics
130 | how dragging a cursor highlights text in a PDF; as with that, the
131 | order does not always appear to be logical.) See details in
132 | `pdf2plumber's documentation
133 | `_.
134 | Defaults to True.
135 | horizontal_ltr (bool, optional):
136 | When horizontal_ltr is set to True, it means the doc should read
137 | text from left to right, vice versa.
138 | Defaults to True.
139 | vertical_ttb (bool, optional):
140 | When vertical_ttb is set to True, it means the doc should read
141 | text from top to bottom, vice versa.
142 | Defaults to True.
143 | extra_attrs (Optional[List[str]], optional):
144 | Passing a list of extra_attrs (e.g., ["fontname", "size"]) will
145 | restrict each words to characters that share exactly the same
146 | value for each of those `attributes extracted by pdfplumber
147 | `_,
148 | and the resulting word dicts will indicate those attributes.
149 | See details in `pdf2plumber's documentation
150 | `_.
151 | Defaults to `["fontname", "size"]`.
152 | dpi (int, optional):
153 | When loading images of the pdf, you can also specify the resolution
154 | (or `DPI, dots per inch `_)
155 | for rendering the images. Higher DPI values mean clearer images (also
156 | larger file sizes).
157 | Setting dpi will also automatically resizes the extracted pdf_layout
158 | to match the sizes of the images. Therefore, when visualizing the
159 | pdf_layouts, it can be rendered appropriately.
160 | Defaults to `DEFAULT_PDF_DPI=72`, which is also the default rendering dpi
161 | from the pdfplumber PDF parser.
162 |
163 | Returns:
164 | List[Layout]:
165 | When `load_images=False`, it will only load the pdf_tokens from
166 | the PDF file. Each element of the list denotes all the tokens appeared
167 | on a single page, and the list is ordered the same as the original PDF
168 | page order.
169 | Tuple[List[Layout], List["Image.Image"]]:
170 | When `load_images=True`, besides the `all_page_layout`, it will also
171 | return a list of page images.
172 |
173 | Examples::
174 | >>> import layoutparser as lp
175 | >>> pdf_layout = lp.load_pdf("path/to/pdf")
176 | >>> pdf_layout[0] # the layout for page 0
177 | >>> pdf_layout, pdf_images = lp.load_pdf("path/to/pdf", load_images=True)
178 | >>> lp.draw_box(pdf_images[0], pdf_layout[0])
179 | """
180 |
181 | plumber_pdf_object = pdfplumber.open(filename)
182 |
183 | all_page_layout = []
184 | for page_id in range(len(plumber_pdf_object.pages)):
185 | cur_page = plumber_pdf_object.pages[page_id]
186 |
187 | page_tokens = extract_words_for_page(
188 | cur_page,
189 | x_tolerance=x_tolerance,
190 | y_tolerance=y_tolerance,
191 | keep_blank_chars=keep_blank_chars,
192 | use_text_flow=use_text_flow,
193 | horizontal_ltr=horizontal_ltr,
194 | vertical_ttb=vertical_ttb,
195 | extra_attrs=extra_attrs,
196 | )
197 |
198 | # Adding metadata for the current page
199 | page_tokens.page_data["width"] = float(cur_page.width)
200 | page_tokens.page_data["height"] = float(cur_page.height)
201 | page_tokens.page_data["index"] = page_id
202 |
203 | all_page_layout.append(page_tokens)
204 |
205 | if not load_images:
206 | return all_page_layout
207 | else:
208 | import pdf2image
209 |
210 | pdf_images = pdf2image.convert_from_path(filename, dpi=dpi)
211 |
212 | for page_id, page_image in enumerate(pdf_images):
213 | image_width, image_height = page_image.size
214 | page_layout = all_page_layout[page_id]
215 | layout_width = page_layout.page_data["width"]
216 | layout_height = page_layout.page_data["height"]
217 | if image_width != layout_width or image_height != layout_height:
218 | scale_x = image_width / layout_width
219 | scale_y = image_height / layout_height
220 | page_layout = page_layout.scale((scale_x, scale_y))
221 | page_layout.page_data["width"] = image_width
222 | page_layout.page_data["height"] = image_height
223 | all_page_layout[page_id] = page_layout
224 |
225 | return all_page_layout, pdf_images
--------------------------------------------------------------------------------
/src/layoutparser/misc/NotoSerifCJKjp-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/src/layoutparser/misc/NotoSerifCJKjp-Regular.otf
--------------------------------------------------------------------------------
/src/layoutparser/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .detectron2.layoutmodel import Detectron2LayoutModel
16 | from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
17 | from .effdet.layoutmodel import EfficientDetLayoutModel
18 | from .auto_layoutmodel import AutoLayoutModel
--------------------------------------------------------------------------------
/src/layoutparser/models/auto_layoutmodel.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import Optional, Dict, Union, List
16 | from collections import defaultdict
17 |
18 | from .model_config import (
19 | is_lp_layout_model_config_any_format,
20 | )
21 | from ..file_utils import (
22 | is_effdet_available,
23 | is_detectron2_available,
24 | is_paddle_available,
25 | )
26 |
27 | ALL_AVAILABLE_BACKENDS = dict()
28 | ALL_AVAILABLE_DATASETS = defaultdict(list)
29 |
30 | if is_effdet_available():
31 | from .effdet.layoutmodel import EfficientDetLayoutModel
32 | from .effdet.catalog import MODEL_CATALOG as _effdet_model_catalog
33 |
34 | # fmt: off
35 | ALL_AVAILABLE_BACKENDS[EfficientDetLayoutModel.DETECTOR_NAME] = EfficientDetLayoutModel
36 | for dataset_name in _effdet_model_catalog:
37 | ALL_AVAILABLE_DATASETS[dataset_name].append(EfficientDetLayoutModel.DETECTOR_NAME)
38 | # fmt: on
39 |
40 | if is_detectron2_available():
41 | from .detectron2.layoutmodel import Detectron2LayoutModel
42 | from .detectron2.catalog import MODEL_CATALOG as _detectron2_model_catalog
43 |
44 | # fmt: off
45 | ALL_AVAILABLE_BACKENDS[Detectron2LayoutModel.DETECTOR_NAME] = Detectron2LayoutModel
46 | for dataset_name in _detectron2_model_catalog:
47 | ALL_AVAILABLE_DATASETS[dataset_name].append(Detectron2LayoutModel.DETECTOR_NAME)
48 | # fmt: on
49 |
50 | if is_paddle_available():
51 | from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
52 | from .paddledetection.catalog import MODEL_CATALOG as _paddle_model_catalog
53 |
54 | # fmt: off
55 | ALL_AVAILABLE_BACKENDS[PaddleDetectionLayoutModel.DETECTOR_NAME] = PaddleDetectionLayoutModel
56 | for dataset_name in _paddle_model_catalog:
57 | ALL_AVAILABLE_DATASETS[dataset_name].append(PaddleDetectionLayoutModel.DETECTOR_NAME)
58 | # fmt: on
59 |
60 |
61 | def AutoLayoutModel(
62 | config_path: str,
63 | model_path: Optional[str] = None,
64 | label_map: Optional[Dict] = None,
65 | device: Optional[str] = None,
66 | extra_config: Optional[Union[Dict, List]] = None,
67 | ) -> "BaseLayoutModel":
68 | """[summary]
69 |
70 | Args:
71 | config_path (:obj:`str`):
72 | The path to the configuration file.
73 | model_path (:obj:`str`, None):
74 | The path to the saved weights of the model.
75 | If set, overwrite the weights in the configuration file.
76 | Defaults to `None`.
77 | label_map (:obj:`dict`, optional):
78 | The map from the model prediction (ids) to real
79 | word labels (strings). If the config is from one of the supported
80 | datasets, Layout Parser will automatically initialize the label_map.
81 | Defaults to `None`.
82 | device(:obj:`str`, optional):
83 | Whether to use cuda or cpu devices. If not set, LayoutParser will
84 | automatically determine the device to initialize the models on.
85 | extra_config (:obj:`dict`, optional):
86 | Extra configuration passed used for initializing the layout model.
87 |
88 | Returns:
89 | # BaseLayoutModel: the create LayoutModel instance
90 | """
91 | if not is_lp_layout_model_config_any_format(config_path):
92 | raise ValueError(f"Invalid model config_path {config_path}")
93 |
94 | # Try to search for the model keywords
95 | for backend_name in ALL_AVAILABLE_BACKENDS:
96 | if backend_name in config_path:
97 | return ALL_AVAILABLE_BACKENDS[backend_name](
98 | config_path,
99 | model_path=model_path,
100 | label_map=label_map,
101 | extra_config=extra_config,
102 | device=device,
103 | )
104 |
105 | # Try to search for the dataset keywords
106 | for dataset_name in ALL_AVAILABLE_DATASETS:
107 | if dataset_name in config_path:
108 | return ALL_AVAILABLE_BACKENDS[ALL_AVAILABLE_DATASETS[dataset_name][0]](
109 | config_path,
110 | model_path=model_path,
111 | label_map=label_map,
112 | extra_config=extra_config,
113 | device=device,
114 | )
115 |
116 | raise ValueError(f"No available model found for {config_path}")
--------------------------------------------------------------------------------
/src/layoutparser/models/base_catalog.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from iopath.common.file_io import HTTPURLHandler
16 | from iopath.common.file_io import PathManager as PathManagerBase
17 |
18 | # A trick learned from https://github.com/facebookresearch/detectron2/blob/65faeb4779e4c142484deeece18dc958c5c9ad18/detectron2/utils/file_io.py#L3
19 |
20 |
21 | class DropboxHandler(HTTPURLHandler):
22 | """
23 | Supports download and file check for dropbox links
24 | """
25 |
26 | def _get_supported_prefixes(self):
27 | return ["https://www.dropbox.com"]
28 |
29 | def _isfile(self, path):
30 | return path in self.cache_map
31 |
32 |
33 | PathManager = PathManagerBase()
34 | PathManager.register_handler(DropboxHandler())
--------------------------------------------------------------------------------
/src/layoutparser/models/base_layoutmodel.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import Optional, Tuple, Union, Dict
16 | from abc import ABC, abstractmethod
17 |
18 | from .model_config import LayoutModelConfig, add_identifier_for_config, layout_model_config_parser, is_lp_layout_model_config_any_format
19 | from ..file_utils import requires_backends
20 |
21 | class BaseLayoutModel(ABC):
22 |
23 | # TODO: Build a metaclass for lazy module loader
24 | @property
25 | @abstractmethod
26 | def DEPENDENCIES(self):
27 | """DEPENDENCIES lists all necessary dependencies for the class."""
28 | pass
29 |
30 | @property
31 | @abstractmethod
32 | def DETECTOR_NAME(self):
33 | pass
34 |
35 | @property
36 | @abstractmethod
37 | def MODEL_CATALOG(self) -> Dict[str, Dict[str, str]]:
38 | pass
39 |
40 | @abstractmethod
41 | def detect(self, image: Union["np.ndarray", "Image.Image"]):
42 | pass
43 |
44 |
45 | @abstractmethod
46 | def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
47 | """It will process the input images appropriately to the target format."""
48 | pass
49 |
50 | def _parse_config(self, config_path:str, identifier:str) -> Union[LayoutModelConfig, str]:
51 |
52 | if is_lp_layout_model_config_any_format(config_path):
53 | config_path = add_identifier_for_config(config_path, identifier)
54 | for dataset_name in self.MODEL_CATALOG:
55 | if dataset_name in config_path:
56 | default_model_arch = list(self.MODEL_CATALOG[dataset_name].keys())[0]
57 | # Use the first model_name for the dataset as the default_model_arch
58 | return layout_model_config_parser(config_path, self.DETECTOR_NAME, default_model_arch)
59 | raise ValueError(f"The config {config_path} is not a valid config for {self.__class__}, "
60 | f"possibly because there aren't models trained for the specified dataset.")
61 | else:
62 | return config_path
63 |
64 | def config_parser(self, config_path:str, model_path: Optional[str], allow_empty_path=False) -> Tuple[str, str]:
65 |
66 | config_path = self._parse_config(config_path, "config")
67 |
68 | if isinstance(config_path, str) and model_path is None:
69 | if not allow_empty_path:
70 | raise ValueError(
71 | f"Invalid config and model path pairs ({(config_path, model_path)}):"
72 | f"When config_path is a regular URL, the model_path should not be empty"
73 | )
74 | else:
75 | return config_path, model_path
76 | elif isinstance(config_path, LayoutModelConfig) and model_path is None:
77 | model_path = config_path.dual()
78 | else:
79 | model_path = self._parse_config(model_path, "weight")
80 |
81 | config_path = config_path if isinstance(config_path, str) else config_path.full
82 | model_path = model_path if isinstance(model_path, str) else model_path.full
83 | return config_path, model_path
84 |
85 | def __new__(cls, *args, **kwargs):
86 |
87 | requires_backends(cls, cls.DEPENDENCIES)
88 | return super().__new__(cls)
--------------------------------------------------------------------------------
/src/layoutparser/models/detectron2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import catalog as _UNUSED
16 | # A trick learned from
17 | # https://github.com/facebookresearch/detectron2/blob/62cf3a2b6840734d2717abdf96e2dd57ed6612a6/detectron2/checkpoint/__init__.py#L6
18 | from .layoutmodel import Detectron2LayoutModel
19 |
--------------------------------------------------------------------------------
/src/layoutparser/models/detectron2/catalog.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from iopath.common.file_io import PathHandler
16 |
17 | from ..base_catalog import PathManager
18 |
19 | MODEL_CATALOG = {
20 | "HJDataset": {
21 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6icw6at8m28a2ho/model_final.pth?dl=1",
22 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/893paxpy5suvlx9/model_final.pth?dl=1",
23 | "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/yxsloxu3djt456i/model_final.pth?dl=1",
24 | },
25 | "PubLayNet": {
26 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/dgy9c10wykk4lq4/model_final.pth?dl=1",
27 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/d9fc9tahfzyl6df/model_final.pth?dl=1",
28 | "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/57zjbwv6gh3srry/model_final.pth?dl=1",
29 | },
30 | "PrimaLayout": {
31 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1"
32 | },
33 | "NewspaperNavigator": {
34 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth?dl=1",
35 | },
36 | "TableBank": {
37 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/8v4uqmz1at9v72a/model_final.pth?dl=1",
38 | "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/6vzfk8lk9xvyitg/model_final.pth?dl=1",
39 | },
40 | "MFD": {
41 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7xel0i3iqpm2p8y/model_final.pth?dl=1",
42 | },
43 | }
44 |
45 | CONFIG_CATALOG = {
46 | "HJDataset": {
47 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1",
48 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1",
49 | "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1",
50 | },
51 | "PubLayNet": {
52 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1",
53 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1",
54 | "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1",
55 | },
56 | "PrimaLayout": {
57 | "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1"
58 | },
59 | "NewspaperNavigator": {
60 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1",
61 | },
62 | "TableBank": {
63 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1",
64 | "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1",
65 | },
66 | "MFD": {
67 | "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1",
68 | },
69 | }
70 |
71 | # fmt: off
72 | LABEL_MAP_CATALOG = {
73 | "HJDataset": {
74 | 1: "Page Frame",
75 | 2: "Row",
76 | 3: "Title Region",
77 | 4: "Text Region",
78 | 5: "Title",
79 | 6: "Subtitle",
80 | 7: "Other",
81 | },
82 | "PubLayNet": {
83 | 0: "Text",
84 | 1: "Title",
85 | 2: "List",
86 | 3: "Table",
87 | 4: "Figure"},
88 | "PrimaLayout": {
89 | 1: "TextRegion",
90 | 2: "ImageRegion",
91 | 3: "TableRegion",
92 | 4: "MathsRegion",
93 | 5: "SeparatorRegion",
94 | 6: "OtherRegion",
95 | },
96 | "NewspaperNavigator": {
97 | 0: "Photograph",
98 | 1: "Illustration",
99 | 2: "Map",
100 | 3: "Comics/Cartoon",
101 | 4: "Editorial Cartoon",
102 | 5: "Headline",
103 | 6: "Advertisement",
104 | },
105 | "TableBank": {
106 | 0: "Table"
107 | },
108 | "MFD": {
109 | 1: "Equation"
110 | },
111 | }
112 | # fmt: on
113 |
114 |
115 | class LayoutParserDetectron2ModelHandler(PathHandler):
116 | """
117 | Resolve anything that's in LayoutParser model zoo.
118 | """
119 |
120 | PREFIX = "lp://detectron2/"
121 |
122 | def _get_supported_prefixes(self):
123 | return [self.PREFIX]
124 |
125 | def _get_local_path(self, path, **kwargs):
126 | model_name = path[len(self.PREFIX) :]
127 |
128 | dataset_name, *model_name, data_type = model_name.split("/")
129 |
130 | if data_type == "weight":
131 | model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
132 | elif data_type == "config":
133 | model_url = CONFIG_CATALOG[dataset_name]["/".join(model_name)]
134 | else:
135 | raise ValueError(f"Unknown data_type {data_type}")
136 | return PathManager.get_local_path(model_url, **kwargs)
137 |
138 | def _open(self, path, mode="r", **kwargs):
139 | return PathManager.open(self._get_local_path(path), mode, **kwargs)
140 |
141 |
142 | PathManager.register_handler(LayoutParserDetectron2ModelHandler())
143 |
--------------------------------------------------------------------------------
/src/layoutparser/models/detectron2/layoutmodel.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import Union
16 | from PIL import Image
17 | import numpy as np
18 | import warnings
19 |
20 | from .catalog import MODEL_CATALOG, PathManager, LABEL_MAP_CATALOG
21 | from ..base_layoutmodel import BaseLayoutModel
22 | from ...elements import Rectangle, TextBlock, Layout
23 | from ...file_utils import is_torch_cuda_available, is_detectron2_available
24 |
25 | if is_detectron2_available():
26 | import detectron2.engine
27 | import detectron2.config
28 |
29 |
30 | __all__ = ["Detectron2LayoutModel"]
31 |
32 |
33 | class Detectron2LayoutModel(BaseLayoutModel):
34 | """Create a Detectron2-based Layout Detection Model
35 |
36 | Args:
37 | config_path (:obj:`str`):
38 | The path to the configuration file.
39 | model_path (:obj:`str`, None):
40 | The path to the saved weights of the model.
41 | If set, overwrite the weights in the configuration file.
42 | Defaults to `None`.
43 | label_map (:obj:`dict`, optional):
44 | The map from the model prediction (ids) to real
45 | word labels (strings). If the config is from one of the supported
46 | datasets, Layout Parser will automatically initialize the label_map.
47 | Defaults to `None`.
48 | device(:obj:`str`, optional):
49 | Whether to use cuda or cpu devices. If not set, LayoutParser will
50 | automatically determine the device to initialize the models on.
51 | extra_config (:obj:`list`, optional):
52 | Extra configuration passed to the Detectron2 model
53 | configuration. The argument will be used in the `merge_from_list
54 | `_ function.
56 | Defaults to `[]`.
57 |
58 | Examples::
59 | >>> import layoutparser as lp
60 | >>> model = lp.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config')
61 | >>> model.detect(image)
62 |
63 | """
64 |
65 | DEPENDENCIES = ["detectron2"]
66 | DETECTOR_NAME = "detectron2"
67 | MODEL_CATALOG = MODEL_CATALOG
68 |
69 | def __init__(
70 | self,
71 | config_path,
72 | model_path=None,
73 | label_map=None,
74 | extra_config=None,
75 | enforce_cpu=None,
76 | device=None,
77 | ):
78 |
79 | if enforce_cpu is not None:
80 | warnings.warn(
81 | "Setting enforce_cpu is deprecated. Please set `device` instead.",
82 | DeprecationWarning,
83 | )
84 |
85 | if extra_config is None:
86 | extra_config = []
87 |
88 | config_path, model_path = self.config_parser(
89 | config_path, model_path, allow_empty_path=True
90 | )
91 | config_path = PathManager.get_local_path(config_path)
92 |
93 | if label_map is None:
94 | if config_path.startswith("lp://"):
95 | dataset_name = config_path.lstrip("lp://").split("/")[1]
96 | label_map = LABEL_MAP_CATALOG[dataset_name]
97 | else:
98 | label_map = {}
99 |
100 | cfg = detectron2.config.get_cfg()
101 | cfg.merge_from_file(config_path)
102 | cfg.merge_from_list(extra_config)
103 |
104 | if model_path is not None:
105 | model_path = PathManager.get_local_path(model_path)
106 | # Because it will be forwarded to the detectron2 paths
107 | cfg.MODEL.WEIGHTS = model_path
108 |
109 | if is_torch_cuda_available():
110 | if device is None:
111 | device = "cuda"
112 | else:
113 | device = "cpu"
114 | cfg.MODEL.DEVICE = device
115 |
116 | self.cfg = cfg
117 |
118 | self.label_map = label_map
119 | self._create_model()
120 |
121 | def _create_model(self):
122 | self.model = detectron2.engine.DefaultPredictor(self.cfg)
123 |
124 | def gather_output(self, outputs):
125 |
126 | instance_pred = outputs["instances"].to("cpu")
127 |
128 | layout = Layout()
129 | scores = instance_pred.scores.tolist()
130 | boxes = instance_pred.pred_boxes.tensor.tolist()
131 | labels = instance_pred.pred_classes.tolist()
132 |
133 | for score, box, label in zip(scores, boxes, labels):
134 | x_1, y_1, x_2, y_2 = box
135 |
136 |
137 | label = self.label_map.get(label, label)
138 |
139 | cur_block = TextBlock(
140 | Rectangle(x_1, y_1, x_2, y_2), type=label, score=score
141 | )
142 | layout.append(cur_block)
143 |
144 | return layout
145 |
146 | def detect(self, image):
147 | """Detect the layout of a given image.
148 |
149 | Args:
150 | image (:obj:`np.ndarray` or `PIL.Image`): The input image to detect.
151 |
152 | Returns:
153 | :obj:`~layoutparser.Layout`: The detected layout of the input image
154 | """
155 |
156 | image = self.image_loader(image)
157 | outputs = self.model(image)
158 | layout = self.gather_output(outputs)
159 | return layout
160 |
161 | def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
162 | # Convert PIL Image Input
163 | if isinstance(image, Image.Image):
164 | if image.mode != "RGB":
165 | image = image.convert("RGB")
166 | image = np.array(image)
167 |
168 | return image
169 |
--------------------------------------------------------------------------------
/src/layoutparser/models/effdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import catalog as _UNUSED
16 | from .layoutmodel import EfficientDetLayoutModel
17 |
--------------------------------------------------------------------------------
/src/layoutparser/models/effdet/catalog.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from iopath.common.file_io import PathHandler
16 |
17 | from ..base_catalog import PathManager
18 |
19 | MODEL_CATALOG = {
20 | "PubLayNet": {
21 | "tf_efficientdet_d0": "https://www.dropbox.com/s/ukbw5s673633hsw/publaynet-tf_efficientdet_d0.pth.tar?dl=1",
22 | "tf_efficientdet_d1": "https://www.dropbox.com/s/gxy11xkkiwnpgog/publaynet-tf_efficientdet_d1.pth.tar?dl=1"
23 | },
24 | "MFD": {
25 | "tf_efficientdet_d0": "https://www.dropbox.com/s/dkr22iux7thlhel/mfd-tf_efficientdet_d0.pth.tar?dl=1",
26 | "tf_efficientdet_d1": "https://www.dropbox.com/s/icmbiaqr5s9bz1x/mfd-tf_efficientdet_d1.pth.tar?dl=1"
27 | }
28 | }
29 |
30 | # In effdet training scripts, it requires the label_map starting
31 | # from 1 instead of 0
32 | LABEL_MAP_CATALOG = {
33 | "PubLayNet": {
34 | 1: "Text",
35 | 2: "Title",
36 | 3: "List",
37 | 4: "Table",
38 | 5: "Figure"
39 | },
40 | "MFD": {
41 | 1: "Equation",
42 | }
43 | }
44 |
45 | class LayoutParserEfficientDetModelHandler(PathHandler):
46 | """
47 | Resolve anything that's in LayoutParser model zoo.
48 | """
49 |
50 | PREFIX = "lp://efficientdet/"
51 |
52 | def _get_supported_prefixes(self):
53 | return [self.PREFIX]
54 |
55 | def _get_local_path(self, path, **kwargs):
56 | model_name = path[len(self.PREFIX) :]
57 |
58 | dataset_name, *model_name, data_type = model_name.split("/")
59 |
60 | if data_type == "weight":
61 | model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
62 | else:
63 | raise ValueError(f"Unknown data_type {data_type}")
64 | return PathManager.get_local_path(model_url, **kwargs)
65 |
66 | def _open(self, path, mode="r", **kwargs):
67 | return PathManager.open(self._get_local_path(path), mode, **kwargs)
68 |
69 |
70 | PathManager.register_handler(LayoutParserEfficientDetModelHandler())
71 |
--------------------------------------------------------------------------------
/src/layoutparser/models/effdet/layoutmodel.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import List, Optional, Union, Dict, Any, Tuple
16 |
17 | from PIL import Image
18 | import numpy as np
19 |
20 | from .catalog import PathManager, LABEL_MAP_CATALOG, MODEL_CATALOG
21 | from ..base_layoutmodel import BaseLayoutModel
22 | from ...elements import Rectangle, TextBlock, Layout
23 |
24 | from ...file_utils import is_effdet_available, is_torch_cuda_available
25 |
26 | if is_effdet_available():
27 | import torch
28 | from effdet import create_model
29 | from effdet.data.transforms import (
30 | IMAGENET_DEFAULT_MEAN,
31 | IMAGENET_DEFAULT_STD,
32 | transforms_coco_eval,
33 | )
34 | else:
35 | # Copied from https://github.com/rwightman/efficientdet-pytorch/blob/c5b694aa34900fdee6653210d856ca8320bf7d4e/effdet/data/transforms.py#L13
36 | # Such that when effdet is not loaded, we'll still have default values for IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
37 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
38 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
39 | # IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
40 | # IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)
41 |
42 |
43 | class InputTransform:
44 | def __init__(
45 | self,
46 | image_size,
47 | mean=IMAGENET_DEFAULT_MEAN,
48 | std=IMAGENET_DEFAULT_STD,
49 | ):
50 |
51 | self.mean = mean
52 | self.std = std
53 |
54 | self.transform = transforms_coco_eval(
55 | image_size,
56 | interpolation="bilinear",
57 | use_prefetcher=True,
58 | fill_color="mean",
59 | mean=self.mean,
60 | std=self.std,
61 | )
62 |
63 | self.mean_tensor = torch.tensor([x * 255 for x in mean]).view(1, 3, 1, 1)
64 | self.std_tensor = torch.tensor([x * 255 for x in std]).view(1, 3, 1, 1)
65 |
66 | def preprocess(self, image: Image) -> Tuple["torch.Tensor", Dict]:
67 |
68 | image = image.convert("RGB")
69 | image_info = {"img_size": image.size}
70 |
71 | input, image_info = self.transform(image, image_info)
72 | image_info = {
73 | key: torch.tensor(val).unsqueeze(0) for key, val in image_info.items()
74 | }
75 |
76 | input = torch.tensor(input).unsqueeze(0)
77 | input = input.float().sub_(self.mean_tensor).div_(self.std_tensor)
78 |
79 | return input, image_info
80 |
81 |
82 | class EfficientDetLayoutModel(BaseLayoutModel):
83 | """Create a EfficientDet-based Layout Detection Model
84 |
85 | Args:
86 | config_path (:obj:`str`):
87 | The path to the configuration file.
88 | model_path (:obj:`str`, None):
89 | The path to the saved weights of the model.
90 | If set, overwrite the weights in the configuration file.
91 | Defaults to `None`.
92 | label_map (:obj:`dict`, optional):
93 | The map from the model prediction (ids) to real
94 | word labels (strings). If the config is from one of the supported
95 | datasets, Layout Parser will automatically initialize the label_map.
96 | Defaults to `None`.
97 | enforce_cpu(:obj:`bool`, optional):
98 | When set to `True`, it will enforce using cpu even if it is on a CUDA
99 | available device.
100 | extra_config (:obj:`dict`, optional):
101 | Extra configuration passed to the EfficientDet model
102 | configuration. Currently supported arguments:
103 | num_classes: specifying the number of classes for the models
104 | output_confidence_threshold: minmum object prediction confidence to retain
105 |
106 | Examples::
107 | >>> import layoutparser as lp
108 | >>> model = lp.EfficientDetLayoutModel("lp://PubLayNet/tf_efficientdet_d0/config")
109 | >>> model.detect(image)
110 |
111 | """
112 |
113 | DEPENDENCIES = ["effdet"]
114 | DETECTOR_NAME = "efficientdet"
115 | MODEL_CATALOG = MODEL_CATALOG
116 |
117 | DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD = 0.25
118 |
119 | def __init__(
120 | self,
121 | config_path: str,
122 | model_path: str = None,
123 | label_map: Optional[Dict] = None,
124 | extra_config: Optional[Dict] = None,
125 | enforce_cpu: bool = False,
126 | device: str = None,
127 | ):
128 |
129 | if is_torch_cuda_available():
130 | if device is None:
131 | device = "cuda"
132 | else:
133 | device = "cpu"
134 | self.device = device
135 |
136 | extra_config = extra_config if extra_config is not None else {}
137 |
138 | self._initialize_model(config_path, model_path, label_map, extra_config)
139 |
140 | self.output_confidence_threshold = extra_config.get(
141 | "output_confidence_threshold", self.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
142 | )
143 |
144 | self.preprocessor = InputTransform(self.config.image_size)
145 |
146 | def _initialize_model(
147 | self,
148 | config_path: str,
149 | model_path: Optional[str],
150 | label_map: Optional[Dict],
151 | extra_config: Optional[Dict],
152 | ):
153 |
154 | config_path, model_path = self.config_parser(config_path, model_path)
155 |
156 | if config_path.startswith("lp://"):
157 | # If it's officially supported by layoutparser
158 | dataset_name, model_name = config_path.lstrip("lp://").split("/")[1:3]
159 |
160 | if label_map is None:
161 | label_map = LABEL_MAP_CATALOG[dataset_name]
162 | num_classes = len(label_map)
163 |
164 | model_path = PathManager.get_local_path(model_path)
165 |
166 | self.model = create_model(
167 | model_name,
168 | num_classes=num_classes,
169 | bench_task="predict",
170 | pretrained=True,
171 | checkpoint_path=model_path,
172 | )
173 | else:
174 | assert (
175 | model_path is not None
176 | ), f"When the specified model is not layoutparser-based, you need to specify the model_path"
177 |
178 | assert (
179 | label_map is not None or "num_classes" in extra_config
180 | ), "When the specified model is not layoutparser-based, you need to specify the label_map or add num_classes in the extra_config"
181 |
182 | model_name = config_path
183 | model_path = PathManager.get_local_path(
184 | model_path
185 | ) # It might be an https URL
186 |
187 | num_classes = len(label_map) if label_map else extra_config["num_classes"]
188 |
189 | self.model = create_model(
190 | model_name,
191 | num_classes=num_classes,
192 | bench_task="predict",
193 | pretrained=True,
194 | checkpoint_path=model_path,
195 | )
196 |
197 | self.model.to(self.device)
198 | self.model.eval()
199 | self.config = self.model.config
200 | self.label_map = label_map if label_map is not None else {}
201 |
202 | def detect(self, image: Union["np.ndarray", "Image.Image"]):
203 |
204 | image = self.image_loader(image)
205 |
206 | model_inputs, image_info = self.preprocessor.preprocess(image)
207 |
208 | model_outputs = self.model(
209 | model_inputs.to(self.device),
210 | {key: val.to(self.device) for key, val in image_info.items()},
211 | )
212 |
213 | layout = self.gather_output(model_outputs)
214 | return layout
215 |
216 | def gather_output(self, model_outputs: "torch.Tensor") -> Layout:
217 |
218 | model_outputs = model_outputs.cpu().detach()
219 | box_predictions = Layout()
220 |
221 | for index, sample in enumerate(model_outputs):
222 | sample[:, 2] -= sample[:, 0]
223 | sample[:, 3] -= sample[:, 1]
224 |
225 | for det in sample:
226 |
227 | score = float(det[4])
228 | pred_cat = int(det[5])
229 | x, y, w, h = det[0:4].tolist()
230 |
231 | if (
232 | score < self.output_confidence_threshold
233 | ): # stop when below this threshold, scores in descending order
234 | break
235 |
236 | box_predictions.append(
237 | TextBlock(
238 | block=Rectangle(x, y, w + x, h + y),
239 | score=score,
240 | id=index,
241 | type=self.label_map.get(pred_cat, pred_cat),
242 | )
243 | )
244 |
245 | return box_predictions
246 |
247 | def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
248 |
249 | # Convert cv2 Image Input
250 | if isinstance(image, np.ndarray):
251 | # In this case, we assume the image is loaded by cv2
252 | # and the channel order is BGR
253 | image = image[..., ::-1]
254 | image = Image.fromarray(image, mode="RGB")
255 |
256 | return image
257 |
--------------------------------------------------------------------------------
/src/layoutparser/models/model_config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Inside layoutparser, we support the following formats for specifying layout model configs
17 | or weights:
18 |
19 | 1. URL-based formats:
20 | - A local path: ~/models/publaynet/path
21 | - Link to the models: https://web/url/to/models
22 |
23 | 2. LayoutParser Based Model/Config Path Formats:
24 | - Full format: lp:////
25 | - Short format: lp:///
26 | - Brief format: lp://
27 |
28 | For each LayoutParser-based format, you could also add a `config` or `weight` identifier
29 | after them:
30 | - Full format: lp://///
31 | - Short format: lp:////
32 | - Brief format: lp:///
33 | """
34 |
35 | from dataclasses import dataclass
36 |
37 | LAYOUT_PARSER_MODEL_PREFIX = "lp://"
38 | ALLOWED_LAYOUT_MODEL_IDENTIFIER_NAMES = ["config", "weight"]
39 |
40 |
41 | @dataclass
42 | class LayoutModelConfig:
43 |
44 | backend_name: str
45 | dataset_name: str
46 | model_arch: str
47 | identifier: str
48 |
49 | def __post_init__(self):
50 | assert self.identifier in ALLOWED_LAYOUT_MODEL_IDENTIFIER_NAMES
51 |
52 | @property
53 | def full(self):
54 | return LAYOUT_PARSER_MODEL_PREFIX + "/".join(
55 | [self.backend_name, self.dataset_name, self.model_arch, self.identifier]
56 | )
57 |
58 | @property
59 | def short(self):
60 | return LAYOUT_PARSER_MODEL_PREFIX + "/".join(
61 | [self.dataset_name, self.model_arch, self.identifier]
62 | )
63 |
64 | @property
65 | def brief(self):
66 | return LAYOUT_PARSER_MODEL_PREFIX + "/".join([self.dataset_name, self.model_arch])
67 |
68 | def dual(self):
69 | for identifier in ALLOWED_LAYOUT_MODEL_IDENTIFIER_NAMES:
70 | if identifier != self.identifier:
71 | break
72 |
73 | return self.__class__(
74 | backend_name=self.backend_name,
75 | dataset_name=self.dataset_name,
76 | model_arch=self.model_arch,
77 | identifier=identifier,
78 | )
79 |
80 |
81 | def is_lp_layout_model_config_any_format(config: str) -> bool:
82 | if not config.startswith(LAYOUT_PARSER_MODEL_PREFIX):
83 | return False
84 | if len(config[len(LAYOUT_PARSER_MODEL_PREFIX) :].split("/")) not in [1, 2, 3, 4]:
85 | return False
86 | return True
87 |
88 |
89 | def add_identifier_for_config(config: str, identifier: str) -> str:
90 | return config.rstrip("/").rstrip(f"/{identifier}") + f"/{identifier}"
91 |
92 |
93 | def layout_model_config_parser(
94 | config, backend_name=None, model_arch=None
95 | ) -> LayoutModelConfig:
96 |
97 | assert config.split("/")[-1] in ALLOWED_LAYOUT_MODEL_IDENTIFIER_NAMES, (
98 | f"The input config {config} does not contain identifier information."
99 | f"Consider run `config = add_identifier_for_config(config, identifier)` first."
100 | )
101 |
102 | parts = config[len(LAYOUT_PARSER_MODEL_PREFIX) :].split("/")
103 | if len(parts) == 4: # Full format
104 | backend_name, dataset_name, model_arch, identifier = parts
105 | elif len(parts) == 3: # Short format
106 | assert backend_name != None
107 |
108 | if parts[0] == backend_name:
109 | # lp:////
110 | assert model_arch != None
111 | _, dataset_name, identifier = parts
112 | else:
113 | # lp:////
114 | dataset_name, model_arch, identifier = parts
115 |
116 | elif len(parts) == 2: # brief format
117 | assert backend_name != None
118 | assert model_arch != None
119 | if parts[0] == backend_name:
120 | # lp:///
121 | raise ValueError(f"Invalid LP Model Config {config}")
122 |
123 | # lp:///
124 | dataset_name, identifier = parts
125 | else:
126 | raise ValueError(f"Invalid LP Model Config {config}")
127 |
128 | return LayoutModelConfig(
129 | backend_name=backend_name,
130 | dataset_name=dataset_name,
131 | model_arch=model_arch,
132 | identifier=identifier,
133 | )
134 |
--------------------------------------------------------------------------------
/src/layoutparser/models/paddledetection/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team and Paddle Detection model
2 | # contributors. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from . import catalog as _UNUSED
17 | from .layoutmodel import PaddleDetectionLayoutModel
18 |
--------------------------------------------------------------------------------
/src/layoutparser/models/paddledetection/catalog.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team and Paddle Detection model
2 | # contributors. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import os
17 | import logging
18 | from typing import Any, Optional
19 | from urllib.parse import urlparse
20 | import tarfile
21 | import uuid
22 |
23 | from iopath.common.file_io import PathHandler
24 | from iopath.common.file_io import HTTPURLHandler
25 | from iopath.common.file_io import get_cache_dir, file_lock
26 | from iopath.common.download import download
27 |
28 | from ..base_catalog import PathManager
29 |
30 | MODEL_CATALOG = {
31 | "PubLayNet": {
32 | "ppyolov2_r50vd_dcn_365e": "https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar",
33 | },
34 | "TableBank": {
35 | "ppyolov2_r50vd_dcn_365e": "https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar",
36 | # "ppyolov2_r50vd_dcn_365e_tableBank_latex": "https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar",
37 | # TODO: Train a single tablebank model for paddlepaddle
38 | },
39 | }
40 |
41 | # fmt: off
42 | LABEL_MAP_CATALOG = {
43 | "PubLayNet": {
44 | 0: "Text",
45 | 1: "Title",
46 | 2: "List",
47 | 3: "Table",
48 | 4: "Figure"},
49 | "TableBank": {
50 | 0: "Table"
51 | },
52 | }
53 | # fmt: on
54 |
55 |
56 | # Paddle model package everything in tar files, and each model's tar file should contain
57 | # the following files in the list:
58 | _TAR_FILE_NAME_LIST = [
59 | "inference.pdiparams",
60 | "inference.pdiparams.info",
61 | "inference.pdmodel",
62 | ]
63 |
64 |
65 | def _get_untar_directory(tar_file: str) -> str:
66 |
67 | base_path = os.path.dirname(tar_file)
68 | file_name = os.path.splitext(os.path.basename(tar_file))[0]
69 | target_folder = os.path.join(base_path, file_name)
70 |
71 | return target_folder
72 |
73 |
74 | def _untar_model_weights(model_tar):
75 | """untar model files"""
76 |
77 | model_dir = _get_untar_directory(model_tar)
78 |
79 | if not os.path.exists(
80 | os.path.join(model_dir, _TAR_FILE_NAME_LIST[0])
81 | ) or not os.path.exists(os.path.join(model_dir, _TAR_FILE_NAME_LIST[2])):
82 | # the path to save the decompressed file
83 | os.makedirs(model_dir, exist_ok=True)
84 | with tarfile.open(model_tar, "r") as tarobj:
85 | for member in tarobj.getmembers():
86 | filename = None
87 | for tar_file_name in _TAR_FILE_NAME_LIST:
88 | if tar_file_name in member.name:
89 | filename = tar_file_name
90 | if filename is None:
91 | continue
92 | file = tarobj.extractfile(member)
93 | with open(os.path.join(model_dir, filename), "wb") as model_file:
94 | model_file.write(file.read())
95 | return model_dir
96 |
97 |
98 | def is_cached_folder_exists_and_valid(cached):
99 | possible_extracted_model_folder = _get_untar_directory(cached)
100 | if not os.path.exists(possible_extracted_model_folder):
101 | return False
102 | for tar_file in _TAR_FILE_NAME_LIST:
103 | if not os.path.exists(os.path.join(possible_extracted_model_folder, tar_file)):
104 | return False
105 | return True
106 |
107 |
108 | class PaddleModelURLHandler(HTTPURLHandler):
109 | """
110 | Supports download and file check for Baidu Cloud links
111 | """
112 |
113 | MAX_FILENAME_LEN = 250
114 |
115 | def _get_supported_prefixes(self):
116 | return ["https://paddle-model-ecology.bj.bcebos.com"]
117 |
118 | def _isfile(self, path):
119 | return path in self.cache_map
120 |
121 | def _get_local_path(
122 | self,
123 | path: str,
124 | force: bool = False,
125 | cache_dir: Optional[str] = None,
126 | **kwargs: Any,
127 | ) -> str:
128 | """
129 | As paddle model stores all files in tar files, we need to extract them
130 | and get the newly extracted folder path. This function rewrites the base
131 | function to support the following situations:
132 |
133 | 1. If the tar file is not downloaded, it will download the tar file,
134 | extract it to the target folder, delete the downloaded tar file,
135 | and return the folder path.
136 | 2. If the extracted target folder is present, and all the necessary model
137 | files are present (specified in _TAR_FILE_NAME_LIST), it will
138 | return the folder path.
139 | 3. If the tar file is downloaded, but the extracted target folder is not
140 | present (or it doesn't contain the necessary files in _TAR_FILE_NAME_LIST),
141 | it will extract the tar file to the target folder, delete the tar file,
142 | and return the folder path.
143 |
144 | """
145 | self._check_kwargs(kwargs)
146 | if (
147 | force
148 | or path not in self.cache_map
149 | or not os.path.exists(self.cache_map[path])
150 | ):
151 | logger = logging.getLogger(__name__)
152 | parsed_url = urlparse(path)
153 | dirname = os.path.join(
154 | get_cache_dir(cache_dir), os.path.dirname(parsed_url.path.lstrip("/"))
155 | )
156 | filename = path.split("/")[-1]
157 | if len(filename) > self.MAX_FILENAME_LEN:
158 | filename = filename[:100] + "_" + uuid.uuid4().hex
159 |
160 | cached = os.path.join(dirname, filename)
161 |
162 | if is_cached_folder_exists_and_valid(cached):
163 | # When the cached folder exists and valid, we don't need to redownload
164 | # the tar file.
165 | self.cache_map[path] = _get_untar_directory(cached)
166 |
167 | else:
168 | with file_lock(cached):
169 | if not os.path.isfile(cached):
170 | logger.info("Downloading {} ...".format(path))
171 | cached = download(path, dirname, filename=filename)
172 |
173 | if path.endswith(".tar"):
174 | model_dir = _untar_model_weights(cached)
175 | try:
176 | os.remove(cached) # remove the redundant tar file
177 | # TODO: remove the .lock file .
178 | except:
179 | logger.warning(
180 | f"Not able to remove the cached tar file {cached}"
181 | )
182 |
183 | logger.info("URL {} cached in {}".format(path, model_dir))
184 | self.cache_map[path] = model_dir
185 |
186 | return self.cache_map[path]
187 |
188 |
189 | class LayoutParserPaddleModelHandler(PathHandler):
190 | """
191 | Resolve anything that's in LayoutParser model zoo.
192 | """
193 |
194 | PREFIX = "lp://paddledetection/"
195 |
196 | def _get_supported_prefixes(self):
197 | return [self.PREFIX]
198 |
199 | def _get_local_path(self, path, **kwargs):
200 | model_name = path[len(self.PREFIX) :]
201 | dataset_name, *model_name, data_type = model_name.split("/")
202 |
203 | if data_type == "weight":
204 | model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
205 | else:
206 | raise ValueError(f"Unknown data_type {data_type}")
207 | return PathManager.get_local_path(model_url, **kwargs)
208 |
209 | def _open(self, path, mode="r", **kwargs):
210 | return PathManager.open(self._get_local_path(path), mode, **kwargs)
211 |
212 |
213 | PathManager.register_handler(PaddleModelURLHandler())
214 | PathManager.register_handler(LayoutParserPaddleModelHandler())
215 |
--------------------------------------------------------------------------------
/src/layoutparser/ocr/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .gcv_agent import GCVAgent, GCVFeatureType
16 | from .tesseract_agent import TesseractAgent, TesseractFeatureType
--------------------------------------------------------------------------------
/src/layoutparser/ocr/base.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from abc import ABC, abstractmethod
16 | from enum import IntEnum
17 |
18 | from ..file_utils import requires_backends
19 |
20 | class BaseOCRElementType(IntEnum):
21 | @property
22 | @abstractmethod
23 | def attr_name(self):
24 | pass
25 |
26 |
27 | class BaseOCRAgent(ABC):
28 | @property
29 | @abstractmethod
30 | def DEPENDENCIES(self):
31 | """DEPENDENCIES lists all necessary dependencies for the class."""
32 | pass
33 |
34 | def __new__(cls, *args, **kwargs):
35 |
36 | requires_backends(cls, cls.DEPENDENCIES)
37 | return super().__new__(cls)
38 |
39 | @abstractmethod
40 | def detect(self, image):
41 | pass
42 |
--------------------------------------------------------------------------------
/src/layoutparser/ocr/tesseract_agent.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import io
16 | import csv
17 | import pickle
18 |
19 | import pandas as pd
20 |
21 | from .base import BaseOCRAgent, BaseOCRElementType
22 | from ..io import load_dataframe
23 | from ..file_utils import is_pytesseract_available
24 |
25 | if is_pytesseract_available():
26 | import pytesseract
27 |
28 |
29 | class TesseractFeatureType(BaseOCRElementType):
30 | """
31 | The element types for Tesseract Detection API
32 | """
33 |
34 | PAGE = 0
35 | BLOCK = 1
36 | PARA = 2
37 | LINE = 3
38 | WORD = 4
39 |
40 | @property
41 | def attr_name(self):
42 | name_cvt = {
43 | TesseractFeatureType.PAGE: "page_num",
44 | TesseractFeatureType.BLOCK: "block_num",
45 | TesseractFeatureType.PARA: "par_num",
46 | TesseractFeatureType.LINE: "line_num",
47 | TesseractFeatureType.WORD: "word_num",
48 | }
49 | return name_cvt[self]
50 |
51 | @property
52 | def group_levels(self):
53 | levels = ["page_num", "block_num", "par_num", "line_num", "word_num"]
54 | return levels[: self + 1]
55 |
56 |
57 | class TesseractAgent(BaseOCRAgent):
58 | """
59 | A wrapper for `Tesseract `_ Text
60 | Detection APIs based on `PyTesseract `_.
61 | """
62 |
63 | DEPENDENCIES = ["pytesseract"]
64 |
65 | def __init__(self, languages="eng", **kwargs):
66 | """Create a Tesseract OCR Agent.
67 |
68 | Args:
69 | languages (:obj:`list` or :obj:`str`, optional):
70 | You can specify the language code(s) of the documents to detect to improve
71 | accuracy. The supported language and their code can be found on
72 | `its github repo `_.
73 | It supports two formats: 1) you can pass in the languages code as a string
74 | of format like `"eng+fra"`, or 2) you can pack them as a list of strings
75 | `["eng", "fra"]`.
76 | Defaults to 'eng'.
77 | """
78 | self.lang = languages if isinstance(languages, str) else "+".join(languages)
79 | self.configs = kwargs
80 |
81 | @classmethod
82 | def with_tesseract_executable(cls, tesseract_cmd_path, **kwargs):
83 |
84 | pytesseract.pytesseract.tesseract_cmd = tesseract_cmd_path
85 | return cls(**kwargs)
86 |
87 | def _detect(self, img_content):
88 | res = {}
89 | res["text"] = pytesseract.image_to_string(
90 | img_content, lang=self.lang, **self.configs
91 | )
92 | _data = pytesseract.image_to_data(img_content, lang=self.lang, **self.configs)
93 | res["data"] = pd.read_csv(
94 | io.StringIO(_data),
95 | quoting=csv.QUOTE_NONE,
96 | encoding="utf-8",
97 | sep="\t",
98 | converters={"text": str},
99 | )
100 | return res
101 |
102 | def detect(
103 | self, image, return_response=False, return_only_text=True, agg_output_level=None
104 | ):
105 | """Send the input image for OCR.
106 |
107 | Args:
108 | image (:obj:`np.ndarray` or :obj:`str`):
109 | The input image array or the name of the image file
110 | return_response (:obj:`bool`, optional):
111 | Whether directly return all output (string and boxes
112 | info) from Tesseract.
113 | Defaults to `False`.
114 | return_only_text (:obj:`bool`, optional):
115 | Whether return only the texts in the OCR results.
116 | Defaults to `False`.
117 | agg_output_level (:obj:`~TesseractFeatureType`, optional):
118 | When set, aggregate the GCV output with respect to the
119 | specified aggregation level. Defaults to `None`.
120 | """
121 |
122 | res = self._detect(image)
123 |
124 | if return_response:
125 | return res
126 |
127 | if return_only_text:
128 | return res["text"]
129 |
130 | if agg_output_level is not None:
131 | return self.gather_data(res, agg_output_level)
132 |
133 | return res["text"]
134 |
135 | @staticmethod
136 | def gather_data(response, agg_level):
137 | """
138 | Gather the OCR'ed text, bounding boxes, and confidence
139 | in a given aggeragation level.
140 | """
141 | assert isinstance(
142 | agg_level, TesseractFeatureType
143 | ), f"Invalid agg_level {agg_level}"
144 | res = response["data"]
145 | df = (
146 | res[~res.text.isna()]
147 | .groupby(agg_level.group_levels)
148 | .apply(
149 | lambda gp: pd.Series(
150 | [
151 | gp["left"].min(),
152 | gp["top"].min(),
153 | gp["width"].max(),
154 | gp["height"].max(),
155 | gp["conf"].mean(),
156 | gp["text"].str.cat(sep=" "),
157 | ]
158 | )
159 | )
160 | .reset_index(drop=True)
161 | .reset_index()
162 | .rename(
163 | columns={
164 | 0: "x_1",
165 | 1: "y_1",
166 | 2: "w",
167 | 3: "h",
168 | 4: "score",
169 | 5: "text",
170 | "index": "id",
171 | }
172 | )
173 | .assign(
174 | x_2=lambda x: x.x_1 + x.w,
175 | y_2=lambda x: x.y_1 + x.h,
176 | block_type="rectangle",
177 | )
178 | .drop(columns=["w", "h"])
179 | )
180 |
181 | return load_dataframe(df)
182 |
183 | @staticmethod
184 | def load_response(filename):
185 | with open(filename, "rb") as fp:
186 | res = pickle.load(fp)
187 | return res
188 |
189 | @staticmethod
190 | def save_response(res, file_name):
191 |
192 | with open(file_name, "wb") as fp:
193 | pickle.dump(res, fp, protocol=pickle.HIGHEST_PROTOCOL)
194 |
--------------------------------------------------------------------------------
/src/layoutparser/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .shape_operations import (
2 | generalized_connected_component_analysis_1d,
3 | simple_line_detection,
4 | group_textblocks_based_on_category,
5 | )
6 |
--------------------------------------------------------------------------------
/src/layoutparser/tools/shape_operations.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import List, Union, Any, Callable, Iterable
16 | from functools import partial, reduce
17 |
18 | import numpy as np
19 | from scipy.sparse import csr_matrix
20 | from scipy.sparse.csgraph import connected_components
21 |
22 | from ..elements import BaseLayoutElement, TextBlock
23 |
24 |
25 | def generalized_connected_component_analysis_1d(
26 | sequence: List[Any],
27 | scoring_func: Callable[[Any, Any], int],
28 | aggregation_func: Callable[[List[Any]], Any] = None,
29 | default_score_value: int = 0,
30 | ) -> List[Any]:
31 | """Perform connected componenet analysis for any 1D sequence based on
32 | the scoring function and the aggregation function.
33 | It will generate the adjacency_matrix for the 1D sequence object using
34 | the provided `scoring_func` and find the connected componenets.
35 | The `aggregation_func` will be used to aggregate all elements within
36 | identified components (when not set, it will be the identity function).
37 |
38 | Args:
39 | sequence (List[Any]):
40 | The provided 1D sequence of objects.
41 | scoring_func (Callable[[Any, Any], int]):
42 | The scoring function used to construct the adjacency_matrix.
43 | It should take two objects in the sequence and produe a integer.
44 | aggregation_func (Callable[[List[Any]], Any], optional):
45 | The function used to aggregate the elements within an identified
46 | component.
47 | Defaults to the identify function: `lambda x: x`.
48 | default_score_value (int, optional):
49 | Used to set the default (background) score values that should be
50 | not considered when running connected component analysis.
51 | Defaults to 0.
52 |
53 | Returns:
54 | List[Any]: A list of length n - the number of the detected componenets.
55 | """
56 |
57 | if aggregation_func is None:
58 | aggregation_func = lambda x: x # Identity Function
59 |
60 | seq_len = len(sequence)
61 | adjacency_matrix = np.ones((seq_len, seq_len)) * default_score_value
62 |
63 | for i in range(seq_len):
64 | for j in range(i + 1, seq_len):
65 | adjacency_matrix[i][j] = scoring_func(sequence[i], sequence[j])
66 |
67 | graph = csr_matrix(adjacency_matrix)
68 | n_components, labels = connected_components(
69 | csgraph=graph, directed=False, return_labels=True
70 | )
71 |
72 | grouped_sequence = []
73 | for comp_idx in range(n_components):
74 | element_idx = np.where(labels == comp_idx)[0]
75 | grouped_sequence.append(aggregation_func([sequence[i] for i in element_idx]))
76 |
77 | return grouped_sequence
78 |
79 |
80 | def simple_line_detection(
81 | layout: Iterable[BaseLayoutElement], x_tolerance: int = 10, y_tolerance: int = 10
82 | ) -> List[BaseLayoutElement]:
83 | """Perform line detection based on connected component analysis.
84 |
85 | The is_line_wise_close is the scoring function, which returns True
86 | if the y-difference is smaller than the y_tolerance AND the
87 | x-difference (the horizontal gap between two boxes) is also smaller
88 | than the x_tolerance, and False otherwise.
89 |
90 | All the detected components will then be passed into aggregation_func,
91 | which returns the overall union box of all the elements, or the line
92 | box.
93 |
94 | Args:
95 | layout (Iterable):
96 | A list (or Layout) of BaseLayoutElement
97 | x_tolerance (int, optional):
98 | The value used for specifying the maximum allowed y-difference
99 | when considered whether two tokens are from the same line.
100 | Defaults to 10.
101 | y_tolerance (int, optional):
102 | The value used for specifying the maximum allowed horizontal gap
103 | when considered whether two tokens are from the same line.
104 | Defaults to 10.
105 |
106 | Returns:
107 | List[BaseLayoutElement]: A list of BaseLayoutElement, denoting the line boxes.
108 | """
109 |
110 | def is_line_wise_close(token_a, token_b, x_tolerance, y_tolerance):
111 | y_a = token_a.block.center[1]
112 | y_b = token_b.block.center[1]
113 |
114 | a_left, a_right = token_a.block.coordinates[0::2]
115 | b_left, b_right = token_b.block.coordinates[0::2]
116 |
117 | return (
118 | abs(y_a - y_b) <= y_tolerance
119 | and min(abs(a_left - b_right), abs(a_right - b_left)) <= x_tolerance
120 | )
121 | # If the y-difference is smaller than the y_tolerance AND
122 | # the x-difference (the horizontal gap between two boxes)
123 | # is also smaller than the x_tolerance threshold, then
124 | # these two tokens are considered as line-wise close.
125 |
126 | detected_lines = generalized_connected_component_analysis_1d(
127 | layout,
128 | scoring_func=partial(
129 | is_line_wise_close, y_tolerance=x_tolerance, x_tolerance=y_tolerance
130 | ),
131 | aggregation_func=lambda seq: reduce(layout[0].__class__.union, seq),
132 | )
133 |
134 | return detected_lines
135 |
136 |
137 | def group_textblocks_based_on_category(
138 | layout: Iterable[TextBlock], union_group: bool = True
139 | ) -> Union[List[TextBlock], List[List[TextBlock]]]:
140 | """Group textblocks based on their category (block.type).
141 |
142 | Args:
143 | layout (Iterable):
144 | A list (or Layout) of BaseLayoutElement
145 | union_group (bool):
146 | Whether to union the boxes within each group.
147 | Defaults to True.
148 |
149 | Returns:
150 | List[TextBlock]: When `union_group=True`, it produces a list of
151 | TextBlocks, denoting the boundaries of each texblock group.
152 | List[List[TextBlock]]: When `union_group=False`, it preserves
153 | the elements within each group for further processing.
154 | """
155 |
156 | if union_group:
157 | aggregation_func = lambda seq: reduce(layout[0].__class__.union, seq)
158 | else:
159 | aggregation_func = None
160 |
161 | detected_group_boxes = generalized_connected_component_analysis_1d(
162 | layout,
163 | scoring_func=lambda a, b: a.type == b.type,
164 | aggregation_func=aggregation_func,
165 | )
166 |
167 | return detected_group_boxes
168 |
--------------------------------------------------------------------------------
/tests/fixtures/io/empty.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/io/empty.pdf
--------------------------------------------------------------------------------
/tests/fixtures/io/example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/io/example.pdf
--------------------------------------------------------------------------------
/tests/fixtures/io/generate_test_jsons.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import json
16 | import numpy as np
17 | from layoutparser.elements import Interval, Rectangle, Quadrilateral, TextBlock, Layout
18 |
19 | if __name__ == "__main__":
20 |
21 | i = Interval(1, 2, "y", canvas_height=5)
22 | r = Rectangle(1, 2, 3, 4)
23 | q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
24 | l = Layout([i, r, q], page_data={"width": 200, "height": 200})
25 |
26 | with open("interval.json", "w") as fp:
27 | json.dump(i.to_dict(), fp)
28 | with open("rectangle.json", "w") as fp:
29 | json.dump(r.to_dict(), fp)
30 | with open("quadrilateral.json", "w") as fp:
31 | json.dump(q.to_dict(), fp)
32 | with open("layout.json", "w") as fp:
33 | json.dump(l.to_dict(), fp)
34 | l.to_dataframe().to_csv("layout.csv", index=None)
35 |
36 | i2 = TextBlock(i, "")
37 | r2 = TextBlock(r, id=24)
38 | q2 = TextBlock(q, text="test", parent=45)
39 | l2 = Layout([i2, r2, q2])
40 |
41 | with open("interval_textblock.json", "w") as fp:
42 | json.dump(i2.to_dict(), fp)
43 | with open("rectangle_textblock.json", "w") as fp:
44 | json.dump(r2.to_dict(), fp)
45 | with open("quadrilateral_textblock.json", "w") as fp:
46 | json.dump(q2.to_dict(), fp)
47 | with open("layout_textblock.json", "w") as fp:
48 | json.dump(l2.to_dict(), fp)
49 | l2.to_dataframe().to_csv("layout_textblock.csv", index=None)
--------------------------------------------------------------------------------
/tests/fixtures/io/interval.json:
--------------------------------------------------------------------------------
1 | {"start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, "block_type": "interval"}
--------------------------------------------------------------------------------
/tests/fixtures/io/interval_textblock.json:
--------------------------------------------------------------------------------
1 | {"start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, "block_type": "interval", "text": ""}
--------------------------------------------------------------------------------
/tests/fixtures/io/layout.csv:
--------------------------------------------------------------------------------
1 | start,end,axis,canvas_height,canvas_width,block_type,x_1,y_1,x_2,y_2,points,height,width
2 | 1.0,2.0,y,5.0,0.0,interval,,,,,,,
3 | ,,,,,rectangle,1.0,2.0,3.0,4.0,,,
4 | ,,,,,quadrilateral,,,,,"[0, 1, 2, 3, 4, 5, 6, 7]",200.0,400.0
5 |
--------------------------------------------------------------------------------
/tests/fixtures/io/layout.json:
--------------------------------------------------------------------------------
1 | {"page_data": {"width": 200, "height": 200}, "blocks": [{"start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, "block_type": "interval"}, {"x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4, "block_type": "rectangle"}, {"points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, "block_type": "quadrilateral"}]}
--------------------------------------------------------------------------------
/tests/fixtures/io/layout_textblock.csv:
--------------------------------------------------------------------------------
1 | start,end,axis,canvas_height,canvas_width,block_type,text,x_1,y_1,x_2,y_2,id,points,height,width,parent
2 | 1.0,2.0,y,5.0,0.0,interval,,,,,,,,,,
3 | ,,,,,rectangle,,1.0,2.0,3.0,4.0,24.0,,,,
4 | ,,,,,quadrilateral,test,,,,,,"[0, 1, 2, 3, 4, 5, 6, 7]",200.0,400.0,45.0
5 |
--------------------------------------------------------------------------------
/tests/fixtures/io/layout_textblock.json:
--------------------------------------------------------------------------------
1 | {"page_data": {}, "blocks": [{"start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, "block_type": "interval", "text": ""}, {"x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4, "block_type": "rectangle", "id": 24}, {"points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, "block_type": "quadrilateral", "text": "test", "parent": 45}]}
--------------------------------------------------------------------------------
/tests/fixtures/io/quadrilateral.json:
--------------------------------------------------------------------------------
1 | {"points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, "block_type": "quadrilateral"}
--------------------------------------------------------------------------------
/tests/fixtures/io/quadrilateral_textblock.json:
--------------------------------------------------------------------------------
1 | {"points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, "block_type": "quadrilateral", "text": "test", "parent": 45}
--------------------------------------------------------------------------------
/tests/fixtures/io/rectangle.json:
--------------------------------------------------------------------------------
1 | {"x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4, "block_type": "rectangle"}
--------------------------------------------------------------------------------
/tests/fixtures/io/rectangle_textblock.json:
--------------------------------------------------------------------------------
1 | {"x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4, "block_type": "rectangle", "id": 24}
--------------------------------------------------------------------------------
/tests/fixtures/model/config.yml:
--------------------------------------------------------------------------------
1 | CUDNN_BENCHMARK: false
2 | DATALOADER:
3 | ASPECT_RATIO_GROUPING: true
4 | FILTER_EMPTY_ANNOTATIONS: true
5 | NUM_WORKERS: 2
6 | REPEAT_THRESHOLD: 0.0
7 | SAMPLER_TRAIN: TrainingSampler
8 | DATASETS:
9 | PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10 | PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11 | PROPOSAL_FILES_TEST: []
12 | PROPOSAL_FILES_TRAIN: []
13 | TEST:
14 | - HJDataset_test
15 | TRAIN:
16 | - HJDataset_train
17 | GLOBAL:
18 | HACK: 1.0
19 | INPUT:
20 | CROP:
21 | ENABLED: false
22 | SIZE:
23 | - 0.9
24 | - 0.9
25 | TYPE: relative_range
26 | FORMAT: BGR
27 | MASK_FORMAT: polygon
28 | MAX_SIZE_TEST: 1333
29 | MAX_SIZE_TRAIN: 1333
30 | MIN_SIZE_TEST: 800
31 | MIN_SIZE_TRAIN:
32 | - 640
33 | - 672
34 | - 704
35 | - 736
36 | - 768
37 | - 800
38 | MIN_SIZE_TRAIN_SAMPLING: choice
39 | MODEL:
40 | ANCHOR_GENERATOR:
41 | ANGLES:
42 | - - -90
43 | - 0
44 | - 90
45 | ASPECT_RATIOS:
46 | - - 0.5
47 | - 1.0
48 | - 2.0
49 | NAME: DefaultAnchorGenerator
50 | OFFSET: 0.0
51 | SIZES:
52 | - - 32
53 | - - 64
54 | - - 128
55 | - - 256
56 | - - 512
57 | BACKBONE:
58 | FREEZE_AT: 2
59 | NAME: build_resnet_fpn_backbone
60 | DEVICE: cuda
61 | FPN:
62 | FUSE_TYPE: sum
63 | IN_FEATURES:
64 | - res2
65 | - res3
66 | - res4
67 | - res5
68 | NORM: ''
69 | OUT_CHANNELS: 256
70 | KEYPOINT_ON: false
71 | LOAD_PROPOSALS: false
72 | MASK_ON: false
73 | META_ARCHITECTURE: GeneralizedRCNN
74 | PANOPTIC_FPN:
75 | COMBINE:
76 | ENABLED: true
77 | INSTANCES_CONFIDENCE_THRESH: 0.5
78 | OVERLAP_THRESH: 0.5
79 | STUFF_AREA_LIMIT: 4096
80 | INSTANCE_LOSS_WEIGHT: 1.0
81 | PIXEL_MEAN:
82 | - 103.53
83 | - 116.28
84 | - 123.675
85 | PIXEL_STD:
86 | - 1.0
87 | - 1.0
88 | - 1.0
89 | PROPOSAL_GENERATOR:
90 | MIN_SIZE: 0
91 | NAME: RPN
92 | RESNETS:
93 | DEFORM_MODULATED: false
94 | DEFORM_NUM_GROUPS: 1
95 | DEFORM_ON_PER_STAGE:
96 | - false
97 | - false
98 | - false
99 | - false
100 | DEPTH: 50
101 | NORM: FrozenBN
102 | NUM_GROUPS: 1
103 | OUT_FEATURES:
104 | - res2
105 | - res3
106 | - res4
107 | - res5
108 | RES2_OUT_CHANNELS: 256
109 | RES5_DILATION: 1
110 | STEM_OUT_CHANNELS: 64
111 | STRIDE_IN_1X1: true
112 | WIDTH_PER_GROUP: 64
113 | RETINANET:
114 | BBOX_REG_WEIGHTS:
115 | - 1.0
116 | - 1.0
117 | - 1.0
118 | - 1.0
119 | FOCAL_LOSS_ALPHA: 0.25
120 | FOCAL_LOSS_GAMMA: 2.0
121 | IN_FEATURES:
122 | - p3
123 | - p4
124 | - p5
125 | - p6
126 | - p7
127 | IOU_LABELS:
128 | - 0
129 | - -1
130 | - 1
131 | IOU_THRESHOLDS:
132 | - 0.4
133 | - 0.5
134 | NMS_THRESH_TEST: 0.5
135 | NUM_CLASSES: 9
136 | NUM_CONVS: 4
137 | PRIOR_PROB: 0.01
138 | SCORE_THRESH_TEST: 0.05
139 | SMOOTH_L1_LOSS_BETA: 0.1
140 | TOPK_CANDIDATES_TEST: 1000
141 | ROI_BOX_CASCADE_HEAD:
142 | BBOX_REG_WEIGHTS:
143 | - - 10.0
144 | - 10.0
145 | - 5.0
146 | - 5.0
147 | - - 20.0
148 | - 20.0
149 | - 10.0
150 | - 10.0
151 | - - 30.0
152 | - 30.0
153 | - 15.0
154 | - 15.0
155 | IOUS:
156 | - 0.5
157 | - 0.6
158 | - 0.7
159 | ROI_BOX_HEAD:
160 | BBOX_REG_WEIGHTS:
161 | - 10.0
162 | - 10.0
163 | - 5.0
164 | - 5.0
165 | CLS_AGNOSTIC_BBOX_REG: false
166 | CONV_DIM: 256
167 | FC_DIM: 1024
168 | NAME: FastRCNNConvFCHead
169 | NORM: ''
170 | NUM_CONV: 0
171 | NUM_FC: 2
172 | POOLER_RESOLUTION: 7
173 | POOLER_SAMPLING_RATIO: 0
174 | POOLER_TYPE: ROIAlignV2
175 | SMOOTH_L1_BETA: 0.0
176 | TRAIN_ON_PRED_BOXES: false
177 | ROI_HEADS:
178 | BATCH_SIZE_PER_IMAGE: 256
179 | IN_FEATURES:
180 | - p2
181 | - p3
182 | - p4
183 | - p5
184 | IOU_LABELS:
185 | - 0
186 | - 1
187 | IOU_THRESHOLDS:
188 | - 0.5
189 | NAME: StandardROIHeads
190 | NMS_THRESH_TEST: 0.5
191 | NUM_CLASSES: 8
192 | POSITIVE_FRACTION: 0.25
193 | PROPOSAL_APPEND_GT: true
194 | SCORE_THRESH_TEST: 0.05
195 | ROI_KEYPOINT_HEAD:
196 | CONV_DIMS:
197 | - 512
198 | - 512
199 | - 512
200 | - 512
201 | - 512
202 | - 512
203 | - 512
204 | - 512
205 | LOSS_WEIGHT: 1.0
206 | MIN_KEYPOINTS_PER_IMAGE: 1
207 | NAME: KRCNNConvDeconvUpsampleHead
208 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209 | NUM_KEYPOINTS: 17
210 | POOLER_RESOLUTION: 14
211 | POOLER_SAMPLING_RATIO: 0
212 | POOLER_TYPE: ROIAlignV2
213 | ROI_MASK_HEAD:
214 | CLS_AGNOSTIC_MASK: false
215 | CONV_DIM: 256
216 | NAME: MaskRCNNConvUpsampleHead
217 | NORM: ''
218 | NUM_CONV: 4
219 | POOLER_RESOLUTION: 14
220 | POOLER_SAMPLING_RATIO: 0
221 | POOLER_TYPE: ROIAlignV2
222 | RPN:
223 | BATCH_SIZE_PER_IMAGE: 256
224 | BBOX_REG_WEIGHTS:
225 | - 1.0
226 | - 1.0
227 | - 1.0
228 | - 1.0
229 | BOUNDARY_THRESH: -1
230 | HEAD_NAME: StandardRPNHead
231 | IN_FEATURES:
232 | - p2
233 | - p3
234 | - p4
235 | - p5
236 | - p6
237 | IOU_LABELS:
238 | - 0
239 | - -1
240 | - 1
241 | IOU_THRESHOLDS:
242 | - 0.3
243 | - 0.7
244 | LOSS_WEIGHT: 1.0
245 | NMS_THRESH: 0.7
246 | POSITIVE_FRACTION: 0.5
247 | POST_NMS_TOPK_TEST: 1000
248 | POST_NMS_TOPK_TRAIN: 1000
249 | PRE_NMS_TOPK_TEST: 1000
250 | PRE_NMS_TOPK_TRAIN: 2000
251 | SMOOTH_L1_BETA: 0.0
252 | SEM_SEG_HEAD:
253 | COMMON_STRIDE: 4
254 | CONVS_DIM: 128
255 | IGNORE_VALUE: 255
256 | IN_FEATURES:
257 | - p2
258 | - p3
259 | - p4
260 | - p5
261 | LOSS_WEIGHT: 1.0
262 | NAME: SemSegFPNHead
263 | NORM: GN
264 | NUM_CLASSES: 54
265 | WEIGHTS: https://www.dropbox.com/s/3hafewz6wcvev04/model_final.pth?dl=1
266 | OUTPUT_DIR: ./train_log/faster_rcnn_R_50_FPN_3x
267 | SEED: -1
268 | SOLVER:
269 | BASE_LR: 0.00025
270 | BIAS_LR_FACTOR: 1.0
271 | CHECKPOINT_PERIOD: 30000
272 | GAMMA: 0.1
273 | IMS_PER_BATCH: 2
274 | LR_SCHEDULER_NAME: WarmupMultiStepLR
275 | MAX_ITER: 60000
276 | MOMENTUM: 0.9
277 | STEPS:
278 | - 210000
279 | - 250000
280 | WARMUP_FACTOR: 0.001
281 | WARMUP_ITERS: 1000
282 | WARMUP_METHOD: linear
283 | WEIGHT_DECAY: 0.0001
284 | WEIGHT_DECAY_BIAS: 0.0001
285 | WEIGHT_DECAY_NORM: 0.0
286 | TEST:
287 | AUG:
288 | ENABLED: false
289 | FLIP: true
290 | MAX_SIZE: 4000
291 | MIN_SIZES:
292 | - 400
293 | - 500
294 | - 600
295 | - 700
296 | - 800
297 | - 900
298 | - 1000
299 | - 1100
300 | - 1200
301 | DETECTIONS_PER_IMAGE: 100
302 | EVAL_PERIOD: 0
303 | EXPECTED_RESULTS: []
304 | KEYPOINT_OKS_SIGMAS: []
305 | PRECISE_BN:
306 | ENABLED: false
307 | NUM_ITER: 200
308 | VERSION: 2
309 | VIS_PERIOD: 0
310 |
--------------------------------------------------------------------------------
/tests/fixtures/model/layout_detection_reference.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/model/layout_detection_reference.jpg
--------------------------------------------------------------------------------
/tests/fixtures/model/layout_detection_reference.json:
--------------------------------------------------------------------------------
1 | {"page_data": {}, "blocks": [{"x_1": 648.9922485351562, "y_1": 1418.7113037109375, "x_2": 1132.6805419921875, "y_2": 1479.303955078125, "block_type": "rectangle", "type": "Text", "score": 0.9995978474617004}, {"x_1": 106.12457275390625, "y_1": 1032.07470703125, "x_2": 599.2977905273438, "y_2": 1323.208984375, "block_type": "rectangle", "type": "Text", "score": 0.9981802701950073}, {"x_1": 639.54736328125, "y_1": 773.1265869140625, "x_2": 1135.9765625, "y_2": 1044.6507568359375, "block_type": "rectangle", "type": "Text", "score": 0.9974864721298218}, {"x_1": 104.36861419677734, "y_1": 767.3282470703125, "x_2": 595.1759643554688, "y_2": 970.451171875, "block_type": "rectangle", "type": "Text", "score": 0.9974320530891418}, {"x_1": 107.37610626220703, "y_1": 1448.544189453125, "x_2": 598.3998413085938, "y_2": 1488.01611328125, "block_type": "rectangle", "type": "Text", "score": 0.9953517913818359}, {"x_1": 132.01339721679688, "y_1": 146.253173828125, "x_2": 1160.3997802734375, "y_2": 652.8322143554688, "block_type": "rectangle", "type": "Figure", "score": 0.9953091740608215}, {"x_1": 103.79012298583984, "y_1": 1327.6717529296875, "x_2": 601.3895874023438, "y_2": 1429.9224853515625, "block_type": "rectangle", "type": "Text", "score": 0.9949470162391663}, {"x_1": 103.83270263671875, "y_1": 671.7702026367188, "x_2": 1138.1756591796875, "y_2": 748.6300659179688, "block_type": "rectangle", "type": "Text", "score": 0.9943684935569763}, {"x_1": 104.0943832397461, "y_1": 985.9046020507812, "x_2": 444.34979248046875, "y_2": 1011.3511352539062, "block_type": "rectangle", "type": "Title", "score": 0.9880087375640869}, {"x_1": 395.9805908203125, "y_1": 141.7040252685547, "x_2": 1141.115478515625, "y_2": 659.3515625, "block_type": "rectangle", "type": "Figure", "score": 0.9815265536308289}, {"x_1": 107.32891845703125, "y_1": 149.01644897460938, "x_2": 405.1805419921875, "y_2": 582.9757690429688, "block_type": "rectangle", "type": "Figure", "score": 0.965209424495697}, {"x_1": 638.6964721679688, "y_1": 1075.6173095703125, "x_2": 1137.9869384765625, "y_2": 1154.6956787109375, "block_type": "rectangle", "type": "Text", "score": 0.9612341523170471}, {"x_1": 137.1743621826172, "y_1": 591.2607421875, "x_2": 376.2920227050781, "y_2": 609.2918701171875, "block_type": "rectangle", "type": "Text", "score": 0.9027073979377747}, {"x_1": 643.3095703125, "y_1": 1175.7694091796875, "x_2": 1127.9664306640625, "y_2": 1416.0784912109375, "block_type": "rectangle", "type": "Table", "score": 0.8846631646156311}]}
--------------------------------------------------------------------------------
/tests/fixtures/model/test_model_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/model/test_model_image.jpg
--------------------------------------------------------------------------------
/tests/fixtures/ocr/test_gcv_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/ocr/test_gcv_image.jpg
--------------------------------------------------------------------------------
/tests/fixtures/ocr/test_tesseract_response.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Layout-Parser/layout-parser/04e28168d820eea3a1ff1e098078323e7b48648b/tests/fixtures/ocr/test_tesseract_response.pickle
--------------------------------------------------------------------------------
/tests/test_io.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import numpy as np
16 | from layoutparser.elements import Interval, Rectangle, Quadrilateral, TextBlock, Layout
17 | from layoutparser import load_json, load_dict, load_csv, load_pdf
18 |
19 | def test_json():
20 |
21 | i = Interval(1, 2, "y", canvas_height=5)
22 | r = Rectangle(1, 2, 3, 4)
23 | q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
24 | l = Layout([i, r, q], page_data={"width": 200, "height": 200})
25 |
26 | i2 = TextBlock(i, "")
27 | r2 = TextBlock(r, id=24)
28 | q2 = TextBlock(q, text="test", parent=45)
29 | l2 = Layout([i2, r2, q2])
30 |
31 | i3 = TextBlock(i, None)
32 | r3 = TextBlock(r, id=None)
33 | q3 = TextBlock(q, text=None, parent=None)
34 | l3 = Layout([i3, r3, q3], page_data={"width": 200, "height": 200})
35 |
36 | # fmt: off
37 | assert i == load_dict(i.to_dict()) == load_json("tests/fixtures/io/interval.json")
38 | assert r == load_dict(r.to_dict()) == load_json("tests/fixtures/io/rectangle.json")
39 | assert q == load_dict(q.to_dict()) == load_json("tests/fixtures/io/quadrilateral.json")
40 | assert l == load_dict(l.to_dict()) == load_json("tests/fixtures/io/layout.json")
41 |
42 | assert i2 == load_dict(i2.to_dict()) == load_json("tests/fixtures/io/interval_textblock.json")
43 | assert r2 == load_dict(r2.to_dict()) == load_json("tests/fixtures/io/rectangle_textblock.json")
44 | assert q2 == load_dict(q2.to_dict()) == load_json("tests/fixtures/io/quadrilateral_textblock.json")
45 | assert l2 == load_dict(l2.to_dict()) == load_json("tests/fixtures/io/layout_textblock.json")
46 |
47 | # Test if LP can ignore the unused None features
48 | assert l == load_dict(l3.to_dict())
49 | # fmt: on
50 |
51 |
52 | def test_csv():
53 | i = Interval(1, 2, "y", canvas_height=5)
54 | r = Rectangle(1, 2, 3, 4)
55 | q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
56 | l = Layout([i, r, q], page_data={"width": 200, "height": 200})
57 |
58 | _l = load_csv("tests/fixtures/io/layout.csv")
59 | assert _l != l
60 | _l.page_data = {"width": 200, "height": 200}
61 | assert _l == l
62 |
63 | i2 = i # <- Allow mixmode loading
64 | r2 = TextBlock(r, id=24)
65 | q2 = TextBlock(q, text="test", parent=45)
66 | l2 = Layout([i2, r2, q2])
67 |
68 | _l2 = load_csv("tests/fixtures/io/layout_textblock.csv")
69 | assert _l2 == l2
70 |
71 |
72 | def test_pdf():
73 | pdf_layout = load_pdf("tests/fixtures/io/example.pdf")
74 | assert len(pdf_layout) == 1
75 |
76 | page_layout = pdf_layout[0]
77 | for attr_name in ["width", "height", "index"]:
78 | assert attr_name in page_layout.page_data
79 |
80 | assert len(set(ele.type for ele in page_layout)) == 3
81 | # Only three types of font show-up in the file
82 |
83 | def test_empty_pdf():
84 | pdf_layout = load_pdf("tests/fixtures/io/empty.pdf")
85 | assert len(pdf_layout) == 1 # Only one page
86 |
87 | page_layout = pdf_layout[0]
88 | assert len(page_layout) == 0 # No selectable tokens on the page
--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import pytest
16 | import cv2
17 |
18 | from layoutparser import load_json
19 | from layoutparser.models import *
20 |
21 | ALL_DETECTRON2_MODEL_CONFIGS = [
22 | "lp://PrimaLayout/mask_rcnn_R_50_FPN_3x/config",
23 | "lp://HJDataset/faster_rcnn_R_50_FPN_3x/config",
24 | "lp://HJDataset/mask_rcnn_R_50_FPN_3x/config",
25 | "lp://HJDataset/retinanet_R_50_FPN_3x/config",
26 | "lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config",
27 | "lp://PubLayNet/mask_rcnn_R_50_FPN_3x/config",
28 | "lp://PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/config",
29 | "lp://NewspaperNavigator/faster_rcnn_R_50_FPN_3x/config",
30 | "lp://TableBank/faster_rcnn_R_50_FPN_3x/config",
31 | "lp://TableBank/faster_rcnn_R_101_FPN_3x/config",
32 | "lp://MFD/faster_rcnn_R_50_FPN_3x/config",
33 | ]
34 |
35 | ALL_PADDLEDETECTION_MODEL_CONFIGS = [
36 | "lp://PubLayNet/ppyolov2_r50vd_dcn_365e/config",
37 | "lp://TableBank/ppyolov2_r50vd_dcn_365e/config",
38 | ]
39 |
40 | ALL_EFFDET_MODEL_CONFIGS = [
41 | "lp://PubLayNet/tf_efficientdet_d0/config",
42 | "lp://PubLayNet/tf_efficientdet_d1/config",
43 | "lp://MFD/tf_efficientdet_d0/config",
44 | "lp://MFD/tf_efficientdet_d1/config",
45 | ]
46 |
47 |
48 | def _construct_valid_config_variations(config, backend_name):
49 | dataset_name, arch_name, identifier = config[len("lp://") :].split("/")
50 | return [
51 | "lp://" + "/".join([backend_name, dataset_name, arch_name, identifier]),
52 | "lp://" + "/".join([backend_name, dataset_name, arch_name]),
53 | "lp://" + "/".join([backend_name, dataset_name]),
54 | "lp://" + "/".join([dataset_name, arch_name, identifier]),
55 | "lp://" + "/".join([dataset_name, arch_name]),
56 | "lp://" + "/".join([dataset_name]),
57 | ]
58 |
59 |
60 | def _construct_invalid_config_variations(config, backend_name):
61 | dataset_name, arch_name, identifier = config[len("lp://") :].split("/")
62 | return [
63 | "lp://" + "/".join([backend_name]),
64 | ]
65 |
66 |
67 | def _single_config_test_pipeline(TestLayoutModel, base_config):
68 | for config in _construct_valid_config_variations(
69 | base_config, TestLayoutModel.DETECTOR_NAME
70 | ):
71 | model = TestLayoutModel(config)
72 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
73 | layout = model.detect(image)
74 | del model
75 |
76 | for config in _construct_invalid_config_variations(
77 | base_config, TestLayoutModel.DETECTOR_NAME
78 | ):
79 | with pytest.raises(ValueError):
80 | model = TestLayoutModel(config)
81 |
82 |
83 | def test_Detectron2Model(is_large_scale=False):
84 |
85 | if is_large_scale:
86 |
87 | for config in ALL_DETECTRON2_MODEL_CONFIGS:
88 | model = Detectron2LayoutModel(config)
89 |
90 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
91 | layout = model.detect(image)
92 | else:
93 | _single_config_test_pipeline(
94 | Detectron2LayoutModel, ALL_DETECTRON2_MODEL_CONFIGS[0]
95 | )
96 | # Test in enforce CPU mode
97 | model = Detectron2LayoutModel("tests/fixtures/model/config.yml")
98 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
99 | layout = model.detect(image)
100 |
101 |
102 | def test_Detectron2Model_version_compatibility(enabled=False):
103 |
104 | if enabled:
105 | model = Detectron2LayoutModel(
106 | config_path="lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config",
107 | extra_config=[
108 | "MODEL.ROI_HEADS.SCORE_THRESH_TEST",
109 | 0.85,
110 | "MODEL.ROI_HEADS.NMS_THRESH_TEST",
111 | 0.75,
112 | ],
113 | )
114 | image = cv2.imread("tests/fixtures/model/layout_detection_reference.jpg")
115 | layout = model.detect(image)
116 | assert (
117 | load_json("tests/fixtures/model/layout_detection_reference.json") == layout
118 | )
119 |
120 |
121 | def test_PaddleDetectionModel(is_large_scale=False):
122 | """test PaddleDetection model"""
123 | if is_large_scale:
124 |
125 | for config in ALL_PADDLEDETECTION_MODEL_CONFIGS:
126 | model = PaddleDetectionLayoutModel(config)
127 |
128 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
129 | layout = model.detect(image)
130 | else:
131 | _single_config_test_pipeline(
132 | PaddleDetectionLayoutModel, ALL_PADDLEDETECTION_MODEL_CONFIGS[0]
133 | )
134 |
135 |
136 | def test_EffDetModel(is_large_scale=False):
137 |
138 | if is_large_scale:
139 |
140 | for config in ALL_EFFDET_MODEL_CONFIGS:
141 | model = EfficientDetLayoutModel(config)
142 |
143 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
144 | layout = model.detect(image)
145 | else:
146 | _single_config_test_pipeline(
147 | EfficientDetLayoutModel, ALL_EFFDET_MODEL_CONFIGS[0]
148 | )
149 |
150 |
151 | def test_AutoModel():
152 |
153 | # Full configs
154 | auto_model_config_1 = [
155 | "lp://detectron2/PubLayNet/faster_rcnn_R_50_FPN_3x/config",
156 | "lp://paddledetection/PubLayNet/ppyolov2_r50vd_dcn_365e/config",
157 | "lp://efficientdet/PubLayNet/tf_efficientdet_d0/config",
158 | ]
159 | for config in auto_model_config_1:
160 | model = AutoLayoutModel(config)
161 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
162 | layout = model.detect(image)
163 |
164 | # Dataset name only
165 | # It will use the first available model
166 | auto_model_config_2 = [
167 | "lp://PubLayNet",
168 | "lp://MFD",
169 | ]
170 | for config in auto_model_config_1:
171 | model = AutoLayoutModel(config)
172 | model.DETECTOR_NAME == "efficientdet"
173 |
174 | # Automodel name that doesn't work
175 |
176 | # 1. No available backend for the model
177 | with pytest.raises(ValueError):
178 | model = AutoLayoutModel("lp://prima")
179 |
180 | # 2. Completely invalid name
181 | with pytest.raises(ValueError):
182 | model = AutoLayoutModel("lp://test")
183 |
--------------------------------------------------------------------------------
/tests/test_ocr.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from layoutparser import (
16 | GCVAgent,
17 | GCVFeatureType,
18 | TesseractAgent,
19 | TesseractFeatureType,
20 | )
21 | import json, cv2, os
22 |
23 | image = cv2.imread("tests/fixtures/ocr/test_gcv_image.jpg")
24 |
25 |
26 | def test_gcv_agent(test_detect=False):
27 |
28 | # Test loading the agent with designated credential
29 | ocr_agent = GCVAgent()
30 |
31 | # Test loading the saved response and parse the data
32 | res = ocr_agent.load_response("tests/fixtures/ocr/test_gcv_response.json")
33 | r0 = ocr_agent.gather_text_annotations(res)
34 | r1 = ocr_agent.gather_full_text_annotation(res, GCVFeatureType.SYMBOL)
35 | r2 = ocr_agent.gather_full_text_annotation(res, GCVFeatureType.WORD)
36 | r3 = ocr_agent.gather_full_text_annotation(res, GCVFeatureType.PARA)
37 | r4 = ocr_agent.gather_full_text_annotation(res, GCVFeatureType.BLOCK)
38 | r5 = ocr_agent.gather_full_text_annotation(res, GCVFeatureType.PAGE)
39 |
40 | # Test with a online image detection and compare the results with the stored one
41 | # Warning: there could be updates on the GCV side. So it would be good to not
42 | # frequently test this part.
43 | if test_detect:
44 | res2 = ocr_agent.detect(image, return_response=True)
45 |
46 | assert res == res2
47 | assert r0 == ocr_agent.gather_text_annotations(res2)
48 | assert r1 == ocr_agent.gather_full_text_annotation(res2, GCVFeatureType.SYMBOL)
49 | assert r2 == ocr_agent.gather_full_text_annotation(res2, GCVFeatureType.WORD)
50 | assert r3 == ocr_agent.gather_full_text_annotation(res2, GCVFeatureType.PARA)
51 | assert r4 == ocr_agent.gather_full_text_annotation(res2, GCVFeatureType.BLOCK)
52 | assert r5 == ocr_agent.gather_full_text_annotation(res2, GCVFeatureType.PAGE)
53 |
54 | # Finally, test the response storage and remove the file
55 | ocr_agent.save_response(res, "tests/fixtures/ocr/.test_gcv_response.json")
56 | os.remove("tests/fixtures/ocr/.test_gcv_response.json")
57 |
58 |
59 | def test_tesseract(test_detect=False):
60 |
61 | ocr_agent = TesseractAgent(languages="eng")
62 | res = ocr_agent.load_response("tests/fixtures/ocr/test_tesseract_response.pickle")
63 | r0 = res["text"]
64 | r1 = ocr_agent.gather_data(res, agg_level=TesseractFeatureType.PAGE)
65 | r2 = ocr_agent.gather_data(res, agg_level=TesseractFeatureType.BLOCK)
66 | r3 = ocr_agent.gather_data(res, agg_level=TesseractFeatureType.PARA)
67 | r4 = ocr_agent.gather_data(res, agg_level=TesseractFeatureType.LINE)
68 | r5 = ocr_agent.gather_data(res, agg_level=TesseractFeatureType.WORD)
69 |
70 | # The results could be different is using another version of Tesseract Engine.
71 | # tesseract 4.1.1 is used for generating the pickle test file.
72 | if test_detect:
73 | res = ocr_agent.detect(image, return_response=True)
74 | assert r0 == res["text"]
75 | assert r1 == ocr_agent.gather_data(res, agg_level=TesseractFeatureType.PAGE)
76 | assert r2 == ocr_agent.gather_data(res, agg_level=TesseractFeatureType.BLOCK)
77 | assert r3 == ocr_agent.gather_data(res, agg_level=TesseractFeatureType.PARA)
78 | assert r4 == ocr_agent.gather_data(res, agg_level=TesseractFeatureType.LINE)
79 | assert r5 == ocr_agent.gather_data(res, agg_level=TesseractFeatureType.WORD)
--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from layoutparser import load_pdf
16 | from layoutparser.tools import (
17 | generalized_connected_component_analysis_1d,
18 | simple_line_detection,
19 | group_textblocks_based_on_category,
20 | )
21 |
22 | def test_generalized_connected_component_analysis_1d():
23 |
24 | A = [1, 2, 3]
25 |
26 | results = generalized_connected_component_analysis_1d(
27 | A,
28 | scoring_func=lambda x,y: abs(x-y)<=1
29 | )
30 | assert len(results) == 1
31 |
32 | A = [1, 2, 3, 5, 6, 7]
33 | results = generalized_connected_component_analysis_1d(
34 | A,
35 | scoring_func=lambda x,y: abs(x-y)<=1
36 | )
37 | assert len(results) == 2
38 |
39 | A = [1, 2, 3, 5, 6, 7]
40 | results = generalized_connected_component_analysis_1d(
41 | A,
42 | scoring_func=lambda x,y: abs(x-y)<=2
43 | )
44 | assert len(results) == 1
45 |
46 | A = [1, 2, 3, 5, 6, 7]
47 | results = generalized_connected_component_analysis_1d(
48 | A,
49 | scoring_func=lambda x,y: abs(x-y)<=1,
50 | aggregation_func=max
51 | )
52 | assert results == [3, 7]
53 |
54 | def test_simple_line_detection():
55 |
56 | page_layout = load_pdf("tests/fixtures/io/example.pdf")[0]
57 |
58 | pdf_lines = simple_line_detection(page_layout)
59 |
60 | assert len(pdf_lines) == 15
61 |
62 | def test_group_textblocks_based_on_category():
63 |
64 | page_layout = load_pdf("tests/fixtures/io/example.pdf")[0]
65 |
66 | pdf_blocks = group_textblocks_based_on_category(page_layout)
67 |
68 | assert len(pdf_blocks) == 3
--------------------------------------------------------------------------------
/tests/test_visualization.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import pytest
16 |
17 | from layoutparser.elements import *
18 | from layoutparser.ocr import *
19 | from layoutparser.visualization import *
20 | import cv2
21 | import numpy as np
22 |
23 |
24 | def test_viz():
25 |
26 | image = cv2.imread("tests/fixtures/ocr/test_gcv_image.jpg")
27 | ocr_agent = GCVAgent.with_credential(
28 | "tests/fixtures/ocr/test_gcv_credential.json", languages=["en"]
29 | )
30 | res = ocr_agent.load_response("tests/fixtures/ocr/test_gcv_response.json")
31 |
32 | draw_box(image, Layout([]))
33 | draw_text(image, Layout([]))
34 |
35 | layout = Layout(
36 | [
37 | Interval(0, 10, axis="x"),
38 | Rectangle(0, 50, 100, 80),
39 | Quadrilateral(np.array([[10, 10], [30, 40], [90, 40], [10, 20]])),
40 | ]
41 | )
42 |
43 | draw_box(image, layout)
44 | draw_text(image, layout)
45 |
46 | # Test colors
47 | draw_box(image, layout, box_color=["red", "green", "blue"])
48 | draw_box(image, layout, box_color="red")
49 |
50 | draw_text(image, layout, box_color=["red", "green", "blue"])
51 | with pytest.raises(ValueError):
52 | draw_box(image, layout, box_color=["red", "green", "blue", "yellow"])
53 | with pytest.raises(ValueError):
54 | draw_text(
55 | image,
56 | layout,
57 | box_color=["red", "green", "blue", "yellow"],
58 | with_layout=True,
59 | )
60 |
61 | # Test alphas
62 | draw_box(image, layout, box_alpha=0)
63 | draw_box(image, layout, box_alpha=[0.1, 0.2, 0.3])
64 | with pytest.raises(ValueError):
65 | draw_box(image, layout, box_color=[0.1, 0.2, 0.3, 0.5])
66 | with pytest.raises(ValueError):
67 | draw_box(image, layout, box_color=[0.1, 0.2, 0.3, 1.5])
68 |
69 | # Test widths
70 | draw_box(image, layout, box_width=1)
71 | draw_box(image, layout, box_width=[1, 2, 3])
72 | with pytest.raises(ValueError):
73 | draw_box(image, layout, box_width=[1, 2, 3, 4])
74 |
75 | draw_box(
76 | image,
77 | layout,
78 | box_alpha=[0.1, 0.2, 0.3],
79 | box_width=[1, 2, 3],
80 | box_color=["red", "green", "blue"],
81 | )
82 |
83 | for idx, level in enumerate(
84 | [
85 | GCVFeatureType.SYMBOL,
86 | GCVFeatureType.WORD,
87 | GCVFeatureType.PARA,
88 | GCVFeatureType.BLOCK,
89 | GCVFeatureType.PAGE,
90 | ]
91 | ):
92 |
93 | layout = ocr_agent.gather_full_text_annotation(res, level)
94 |
95 | draw_text(
96 | image,
97 | layout,
98 | arrangement="ud" if idx % 2 else "ud",
99 | font_size=15,
100 | text_color="pink",
101 | text_background_color="grey",
102 | text_background_alpha=0.1,
103 | with_box_on_text=True,
104 | text_box_width=2,
105 | text_box_color="yellow",
106 | text_box_alpha=0.2,
107 | with_layout=True,
108 | box_width=1,
109 | color_map={None: "blue"},
110 | show_element_id=True,
111 | id_font_size=8,
112 | box_alpha=0.25,
113 | id_text_background_alpha=0.25,
114 | )
115 |
116 | draw_box(image, layout)
117 | draw_text(image, layout)
118 |
--------------------------------------------------------------------------------
/tests_deps/test_file_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import pytest
16 |
17 | from layoutparser import requires_backends
18 |
19 | def test_when_backends_are_not_loaded():
20 |
21 | # When all the backeds are not installed, it should
22 | # elicit only ImportErrors
23 |
24 | for backend_name in ["torch", "detectron2", "paddle", "effdet", "pytesseract", "google-cloud-vision"]:
25 | with pytest.raises(ImportError):
26 | requires_backends("a", backend_name)
--------------------------------------------------------------------------------
/tests_deps/test_only_detectron2.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import cv2
16 | import pytest
17 | from layoutparser import Detectron2LayoutModel
18 |
19 | def test_only_effdet_model():
20 |
21 | # When all the backeds are not installed, it should
22 | # elicit only ImportErrors
23 |
24 | config = "lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config"
25 | model = Detectron2LayoutModel(config)
26 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
27 | layout = model.detect(image)
28 |
29 | with pytest.raises(ImportError):
30 | from layoutparser import EfficientDetLayoutModel
31 | from layoutparser import PaddleDetectionLayoutModel
--------------------------------------------------------------------------------
/tests_deps/test_only_effdet.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import cv2
16 | import pytest
17 | from layoutparser import EfficientDetLayoutModel
18 |
19 | def test_only_effdet_model():
20 |
21 | # When all the backeds are not installed, it should
22 | # elicit only ImportErrors
23 |
24 | config = "lp://PubLayNet/tf_efficientdet_d0/config"
25 | model = EfficientDetLayoutModel(config)
26 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
27 | layout = model.detect(image)
28 |
29 | with pytest.raises(ImportError):
30 | from layoutparser import Detectron2LayoutModel
31 | from layoutparser import PaddleDetectionLayoutModel
--------------------------------------------------------------------------------
/tests_deps/test_only_paddledetection.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The Layout Parser team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import cv2
16 | import pytest
17 | from layoutparser import PaddleDetectionLayoutModel
18 |
19 | def test_only_effdet_model():
20 |
21 | # When all the backeds are not installed, it should
22 | # elicit only ImportErrors
23 |
24 | config = "lp://PubLayNet/ppyolov2_r50vd_dcn_365e/config"
25 | model = PaddleDetectionLayoutModel(config)
26 | image = cv2.imread("tests/fixtures/model/test_model_image.jpg")
27 | layout = model.detect(image)
28 |
29 | with pytest.raises(ImportError):
30 | from layoutparser import EfficientDetLayoutModel
31 | from layoutparser import Detectron2LayoutModel
--------------------------------------------------------------------------------