16 |
17 | {% include ".icons/fontawesome/brands/twitter.svg" %}
18 |
19 | {{ config.theme.twitter_name }}
20 |
21 | {% endif %}
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature Request
2 | description: Suggest an idea for this project
3 | labels: ["feature"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | Thanks for taking the time to fill out this form!
9 | - type: textarea
10 | id: description
11 | attributes:
12 | label: Description
13 | description: Explanation of the feature.
14 | validations:
15 | required: true
16 | - type: textarea
17 | id: motivation
18 | attributes:
19 | label: Motivation
20 | description: Why are we doing this? What use cases does it support? What is the expected outcome?
21 | - type: textarea
22 | id: implementation
23 | attributes:
24 | label: Possible Implementation
25 | description: Suggest an idea for implementing the addition or change.
26 | - type: textarea
27 | id: additional-context
28 | attributes:
29 | label: Additional context
30 | description: Add any other context or screenshots about the feature request here.
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2021 - 2022 Saransh Chopra
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | "README.md"
4 | ],
5 | "imageSize": 100,
6 | "commit": false,
7 | "contributors": [
8 | {
9 | "login": "Saransh-cpp",
10 | "name": "Saransh",
11 | "avatar_url": "https://avatars.githubusercontent.com/u/74055102?v=4",
12 | "profile": "https://saransh-cpp.github.io/",
13 | "contributions": [
14 | "code",
15 | "bug",
16 | "content",
17 | "doc",
18 | "design",
19 | "example",
20 | "ideas",
21 | "infra",
22 | "maintenance",
23 | "platform",
24 | "review",
25 | "test",
26 | "tutorial",
27 | "mentoring"
28 | ],
29 | },
30 | {
31 | "login": "priyanshi-git",
32 | "name": "Priyanshi Goel",
33 | "avatar_url": "https://avatars.githubusercontent.com/u/82112540?v=4",
34 | "profile": "https://github.com/priyanshi-git",
35 | "contributions": [
36 | "bug"
37 | ]
38 | },
39 | ],
40 | "contributorsPerLine": 7,
41 | "projectName": "OCRed",
42 | "projectOwner": "Saransh-cpp",
43 | "repoType": "github",
44 | "repoHost": "https://github.com",
45 | "skipCi": true
46 | }
47 |
--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | Follow the steps below to install `ocred` locally.
4 |
5 | ## Create a virtual environment
6 |
7 | Create and activate a virtual environment
8 |
9 | ```bash
10 | python -m venv env
11 |
12 | . env/bin/activate
13 | ```
14 |
15 | ## Install OCRed
16 |
17 | - Install Tesseract for your OS and add it to PATH
18 |
19 | The installation guide is available [here](https://tesseract-ocr.github.io/tessdoc/Installation.html)
20 |
21 | - `pip` magic
22 |
23 | `OCRed` uses modern `Python` packaging and can be installed using `pip` -
24 |
25 | ```
26 | python -m pip install ocred
27 | ```
28 |
29 | ## Build OCRed from source
30 |
31 | If you want to develop `OCRed`, or use its latest commit (!can be unstable!), you might want to install it from the source -
32 |
33 | - Install Tesseract for your OS and add it to PATH
34 |
35 | The installation guide is available [here](https://tesseract-ocr.github.io/tessdoc/Installation.html)
36 |
37 | - Clone this repository
38 |
39 | ```bash
40 | git clone https://github.com/Saransh-cpp/OCRed
41 | ```
42 |
43 | - Change directory
44 |
45 | ```bash
46 | cd OCRed
47 | ```
48 |
49 | - Install the package in editable mode with the "dev" dependencies
50 |
51 | ```bash
52 | python -m pip install -e ".[dev]"
53 | ```
54 |
55 | Feel free to read our [Contributing Guide](https://github.com/Saransh-cpp/OCRed/blob/main/CONTRIBUTING.md) for more information on developing `OCRed`.
56 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import nox
4 |
5 | ALL_PYTHONS = ["3.7", "3.8", "3.9", "3.10", "3.11"]
6 |
7 | nox.options.sessions = ["lint", "tests", "doctests"]
8 |
9 |
10 | @nox.session(reuse_venv=True)
11 | def lint(session):
12 | """Run the linter."""
13 | session.install("pre-commit")
14 | session.run("pre-commit", "run", "--all-files", *session.posargs)
15 |
16 |
17 | @nox.session(python=ALL_PYTHONS, reuse_venv=True)
18 | def tests(session):
19 | """Run the unit and regular tests."""
20 | session.install(".[dev]")
21 | session.run("pytest", *session.posargs)
22 |
23 |
24 | @nox.session(reuse_venv=True)
25 | def doctests(session):
26 | """Run the doctests."""
27 | session.install(".[dev]")
28 | session.run("xdoctest", "./ocred/", *session.posargs)
29 |
30 |
31 | @nox.session(reuse_venv=True)
32 | def docs(session):
33 | """Build the docs. Pass "serve" to serve."""
34 | session.install("-e", ".[docs]")
35 |
36 | if session.posargs:
37 | if "serve" in session.posargs:
38 | print("Launching docs at http://localhost:8000/ - use Ctrl-C to quit")
39 | session.run("mkdocs", "serve")
40 | else:
41 | print("Unsupported argument to docs")
42 | else:
43 | session.run("mkdocs", "build")
44 |
45 |
46 | @nox.session
47 | def build(session):
48 | """Build an SDist and wheel."""
49 | session.install("build")
50 | session.run("python", "-m", "build")
51 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: File a bug report
3 | title: "[Bug]: "
4 | labels: ["bug"]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | Thanks for taking the time to fill out this bug report!
10 | - type: input
11 | id: OCRed-version
12 | attributes:
13 | label: OCRed Version
14 | description: What version of OCRed are you running?
15 | placeholder: OCRed version
16 | validations:
17 | required: true
18 | - type: input
19 | id: python-version
20 | attributes:
21 | label: Python Version
22 | description: What version of python are you running?
23 | placeholder: python version
24 | validations:
25 | required: true
26 | - type: textarea
27 | id: what-happened
28 | attributes:
29 | label: Describe the bug
30 | description: A clear and concise description of what the bug is.
31 | validations:
32 | required: true
33 | - type: textarea
34 | id: reproduce
35 | attributes:
36 | label: Steps to Reproduce
37 | description: Tell us how to reproduce this behaviour. Ideally, this should take the form of a [Minimum Workable Example](https://stackoverflow.com/help/minimal-reproducible-example)
38 | validations:
39 | required: true
40 | - type: textarea
41 | id: logs
42 | attributes:
43 | label: Relevant log output
44 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
45 | render: shell
46 |
--------------------------------------------------------------------------------
/.github/workflows/cd.yml:
--------------------------------------------------------------------------------
1 | name: CD
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | target:
7 | description: 'Deployment target. Can be "pypi" or "testpypi"'
8 | default: "testpypi"
9 | release:
10 | types:
11 | - published
12 |
13 | jobs:
14 | dist:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v4
18 | with:
19 | fetch-depth: 0
20 |
21 | - name: Build SDist and wheel
22 | run: pipx run build
23 |
24 | - uses: actions/upload-artifact@v4
25 | with:
26 | path: dist/*
27 |
28 | - name: Check metadata
29 | run: pipx run twine check dist/*
30 |
31 | publish:
32 | needs: dist
33 | runs-on: ubuntu-latest
34 |
35 | steps:
36 | - uses: actions/download-artifact@v4
37 | with:
38 | name: artifact
39 | path: dist
40 |
41 | - name: Publish on PyPI
42 | if: github.event.inputs.target == 'pypi' || (github.event_name == 'release' && github.event.action == 'published')
43 | uses: pypa/gh-action-pypi-publish@v1.10.3
44 | with:
45 | user: __token__
46 | password: ${{ secrets.PYPI_API_TOKEN }}
47 |
48 | - name: Publish on TestPyPI
49 | if: github.event.inputs.target == 'testpypi' || (github.event_name == 'release' && github.event.action == 'published')
50 | uses: pypa/gh-action-pypi-publish@v1.10.3
51 | with:
52 | user: __token__
53 | password: ${{ secrets.TEST_PYPI_API_TOKEN }}
54 | repository_url: https://test.pypi.org/legacy/
55 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | # inspired from https://github.com/avik-pal/Lux.jl/blob/main/docs/mkdocs.yml
2 | theme:
3 | name: material
4 | features:
5 | - navigation.sections
6 | palette:
7 | - scheme: default
8 | primary: white
9 | accent: amber
10 | toggle:
11 | icon: material/weather-night
12 | name: Switch to dark mode
13 | - scheme: slate
14 | primary: black
15 | accent: amber
16 | toggle:
17 | icon: material/weather-sunny
18 | name: Switch to light mode
19 | font:
20 | text: Lato
21 | icon:
22 | repo: fontawesome/brands/github
23 | custom_dir: "docs/_overrides/" # Overriding part of the HTML
24 |
25 | # TODO: look into this
26 | # twitter_name: "@saranshchopra7"
27 | # twitter_url: "https://twitter.com/saranshchopra7"
28 |
29 | site_name: OCRed
30 | site_description: Documentation for OCRed
31 | site_author: Saransh Chopra
32 | site_url: https://ocred.readthedocs.io/
33 |
34 | repo_url: https://github.com/Saransh-cpp/OCRed
35 | repo_name: Saransh-cpp/OCRed
36 | edit_uri: ./edit/main/docs
37 |
38 | extra_css:
39 | - stylesheets/extra.css
40 |
41 | strict: true
42 |
43 | plugins:
44 | - search
45 | - mkdocstrings
46 | - autorefs # Cross-links to headings
47 | - include_exclude_files:
48 | exclude:
49 | - "_overrides"
50 |
51 | markdown_extensions:
52 | - callouts
53 | - pymdownx.arithmatex
54 | - pymdownx.magiclink
55 | - pymdownx.details # Allowing hidden expandable regions denoted by ???
56 | - pymdownx.highlight
57 | - pymdownx.inlinehilite
58 | - pymdownx.superfences # Seems to enable syntax highlighting when used with the Material theme.
59 | - pymdownx.tasklist:
60 | custom_checkbox: true
61 | - pymdownx.tabbed:
62 | alternate_style: true
63 | - pymdownx.snippets:
64 | check_paths: true
65 | - toc:
66 | permalink: "¤" # Adds a clickable permalink to each section heading
67 | toc_depth: 4
68 |
69 | extra_javascript:
70 | - mathjax-config.js
71 | - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML
72 |
73 | docs_dir: docs
74 |
75 | nav:
76 | - Home: "index.md"
77 | - Installation: "install.md"
78 | # - Examples:
79 | - Reference: "reference.md"
80 | - Contributing: "contributing.md"
81 | - Changelog: "changelog.md"
82 | - Code of Conduct: "conduct.md"
83 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ci:
2 | autoupdate_commit_msg: "chore: update pre-commit hooks"
3 | autofix_commit_msg: "style: pre-commit fixes"
4 |
5 | repos:
6 | - repo: https://github.com/psf/black-pre-commit-mirror
7 | rev: 24.4.2
8 | hooks:
9 | - id: black-jupyter
10 |
11 | - repo: https://github.com/pre-commit/pre-commit-hooks
12 | rev: v4.6.0
13 | hooks:
14 | - id: check-added-large-files
15 | - id: check-case-conflict
16 | - id: check-merge-conflict
17 | - id: check-symlinks
18 | - id: check-yaml
19 | - id: debug-statements
20 | - id: end-of-file-fixer
21 | exclude: ^docs
22 | - id: mixed-line-ending
23 | - id: requirements-txt-fixer
24 | - id: trailing-whitespace
25 |
26 | - repo: https://github.com/astral-sh/ruff-pre-commit
27 | rev: "v0.4.10"
28 | hooks:
29 | - id: ruff
30 | args: ["--fix", "--show-fixes"]
31 |
32 | - repo: https://github.com/tox-dev/pyproject-fmt
33 | rev: "2.1.3"
34 | hooks:
35 | - id: pyproject-fmt
36 |
37 | - repo: https://github.com/pre-commit/mirrors-mypy
38 | rev: v1.10.0
39 | hooks:
40 | - id: mypy
41 | files: src
42 | args: []
43 | additional_dependencies:
44 | - numpy
45 | - packaging
46 |
47 | - repo: https://github.com/codespell-project/codespell
48 | rev: v2.3.0
49 | hooks:
50 | - id: codespell
51 |
52 | - repo: https://github.com/pre-commit/mirrors-prettier
53 | rev: "v4.0.0-alpha.8"
54 | hooks:
55 | - id: prettier
56 | types_or: [yaml, markdown, html, css, scss, javascript, json]
57 | exclude: assets/js/webapp\.js
58 |
59 | - repo: https://github.com/asottile/blacken-docs
60 | rev: 1.16.0
61 | hooks:
62 | - id: blacken-docs
63 | args: ["-E"]
64 | additional_dependencies: [black==23.1.0]
65 |
66 | - repo: https://github.com/pre-commit/pygrep-hooks
67 | rev: v1.10.0
68 | hooks:
69 | - id: python-check-blanket-type-ignore
70 | - id: rst-backticks
71 | - id: rst-directive-colons
72 | - id: rst-inline-touching-normal
73 |
74 | - repo: https://github.com/nbQA-dev/nbQA
75 | rev: 1.8.5
76 | hooks:
77 | - id: nbqa-pyupgrade
78 | args: ["--py37-plus"]
79 | - id: nbqa-isort
80 | args: ["--float-to-top"]
81 |
--------------------------------------------------------------------------------
/tests/test_preprocessing.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import os
4 |
5 | import cv2
6 | import numpy as np
7 | import pytest
8 |
9 | from ocred.preprocessing import Preprocessor
10 |
11 | path = "images/CosmosOne.jpg"
12 |
13 |
14 | def test_deprecations_and_errors():
15 | pre = Preprocessor(path)
16 | img = cv2.imread(path)
17 |
18 | with pytest.raises(DeprecationWarning):
19 | pre.scan(inplace=True)
20 | with pytest.raises(DeprecationWarning):
21 | pre.scan(overriden_image=img)
22 |
23 | with pytest.raises(DeprecationWarning):
24 | pre.rotate(inplace=True)
25 | with pytest.raises(DeprecationWarning):
26 | pre.rotate(overriden_image=img)
27 |
28 | with pytest.raises(DeprecationWarning):
29 | pre.remove_noise(inplace=True)
30 | with pytest.raises(DeprecationWarning):
31 | pre.remove_noise(overriden_image=img)
32 |
33 | with pytest.raises(DeprecationWarning):
34 | pre.thicken_font(inplace=True)
35 | with pytest.raises(DeprecationWarning):
36 | pre.thicken_font(overriden_image=img)
37 |
38 |
39 | def test_scan():
40 | pre = Preprocessor(path)
41 | assert isinstance(pre.img, np.ndarray)
42 |
43 | scanned = pre.scan()
44 | assert isinstance(scanned, np.ndarray)
45 | assert isinstance(pre.img, np.ndarray)
46 | assert (scanned == pre.img).all()
47 |
48 | img = cv2.imread(path)
49 | pre = Preprocessor(img)
50 | scanned = pre.scan(save=True)
51 | assert isinstance(scanned, np.ndarray)
52 | assert isinstance(pre.img, np.ndarray)
53 | assert (scanned == pre.img).all()
54 |
55 | assert os.path.exists("scanned.png")
56 | os.remove("scanned.png")
57 |
58 |
59 | def test_rotate():
60 | pre = Preprocessor(path)
61 | assert isinstance(pre.img, np.ndarray)
62 |
63 | rotated, median_angle = pre.rotate(save=True)
64 | assert isinstance(median_angle, float)
65 | assert isinstance(rotated, np.ndarray)
66 | assert isinstance(pre.img, np.ndarray)
67 | assert (rotated == pre.img).all()
68 | assert os.path.exists("rotated.png")
69 |
70 | os.remove("rotated.png")
71 |
72 |
73 | def test_remove_noise():
74 | pre = Preprocessor(path)
75 | assert isinstance(pre.img, np.ndarray)
76 |
77 | noiseless = pre.remove_noise(save=True)
78 | assert isinstance(noiseless, np.ndarray)
79 | assert isinstance(pre.img, np.ndarray)
80 | assert (noiseless == pre.img).all()
81 | assert os.path.exists("noise_free.png")
82 |
83 | os.remove("noise_free.png")
84 |
85 |
86 | def test_thicken_font():
87 | pre = Preprocessor(path)
88 | assert isinstance(pre.img, np.ndarray)
89 |
90 | thickened = pre.thicken_font(save=True)
91 | assert isinstance(thickened, np.ndarray)
92 | assert isinstance(pre.img, np.ndarray)
93 | assert (thickened == pre.img).all()
94 | assert os.path.exists("thick_font.png")
95 |
96 | os.remove("thick_font.png")
97 |
--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | /* Fix /page#foo going to the top of the viewport and being hidden by the navbar */
2 | html {
3 | scroll-padding-top: 50px;
4 | }
5 |
6 | /* Fit the Twitter handle alongside the GitHub one in the top right. */
7 |
8 | div.md-header__source {
9 | width: revert;
10 | max-width: revert;
11 | }
12 |
13 | a.md-source {
14 | display: inline-block;
15 | }
16 |
17 | .md-source__repository {
18 | max-width: 100%;
19 | }
20 |
21 | /* Emphasise sections of nav on left hand side */
22 |
23 | nav.md-nav {
24 | padding-left: 5px;
25 | }
26 |
27 | nav.md-nav--secondary {
28 | border-left: revert !important;
29 | }
30 |
31 | .md-nav__title {
32 | font-size: 0.9rem;
33 | }
34 |
35 | .md-nav__item--section > .md-nav__link {
36 | font-size: 0.9rem;
37 | }
38 |
39 | /* Indent autogenerated documentation */
40 |
41 | div.doc-contents {
42 | padding-left: 25px;
43 | border-left: 4px solid rgba(230, 230, 230);
44 | }
45 |
46 | /* Increase visibility of splitters "---" */
47 |
48 | [data-md-color-scheme="default"] .md-typeset hr {
49 | border-bottom-color: rgb(0, 0, 0);
50 | border-bottom-width: 1pt;
51 | }
52 |
53 | [data-md-color-scheme="slate"] .md-typeset hr {
54 | border-bottom-color: rgb(230, 230, 230);
55 | }
56 |
57 | /* More space at the bottom of the page */
58 |
59 | .md-main__inner {
60 | margin-bottom: 1.5rem;
61 | }
62 |
63 | /* Remove prev/next footer buttons */
64 |
65 | .md-footer__inner {
66 | display: none;
67 | }
68 |
69 | /* Bugfix: remove the superfluous parts generated when doing:
70 |
71 | ??? Blah
72 |
73 | ::: library.something
74 | */
75 |
76 | .md-typeset details .mkdocstrings > h4 {
77 | display: none;
78 | }
79 |
80 | .md-typeset details .mkdocstrings > h5 {
81 | display: none;
82 | }
83 |
84 | /* Change default colours for