├── .gitattributes ├── .github └── workflows │ └── build-sphinx.yml ├── .gitignore ├── .graphics ├── all-results-example.png ├── azure-create-computer-vision.png ├── azure-create-resource.png ├── azure-dashboard.png ├── azure-get-keys.png ├── azure-no-resources.png ├── azure-start-free-account.png ├── caltech-round.png ├── caltech-round.svg ├── clara-barton-page.jpg ├── clara-barton-page.png ├── example-tsv-file.png ├── glaser-example-google.jpg ├── glaser-example-google.png ├── google-api-create-credentials-button.png ├── google-api-credentials-menu.png ├── google-bounding-box-example.png ├── google-cloud-add-api.png ├── google-cloud-new-project.png ├── google-cloud-platform-api-option.png ├── google-cloud-platform.png ├── google-create-credentials.png ├── google-create-oauth-client-id.png ├── google-create-service-account-key.png ├── google-create-service-account.png ├── google-download-json.png ├── google-gcp-add-billing-account.png ├── google-gcp-billing.png ├── google-gcp-create-project.png ├── google-gcp-create-service-account-key.png ├── google-gcp-free.png ├── google-gcp-new-project.png ├── google-gcp-no-billing.png ├── google-gcp-role.png ├── google-oauth-consent-info.png ├── google-oauth-create-client-id.png ├── google-select-project.png ├── google-service-account-info.png ├── handprint-asciinema.png ├── handprint-demo.gif ├── handprint-preview-image.png ├── linux-32.png ├── mac-os-32.png ├── noun_Hand_733265.png ├── noun_Hand_733265.svg ├── os-windows-32.png └── sample-annotated-image.png ├── CHANGES.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README-PAPER.md ├── README.md ├── SUPPORT.md ├── bin └── handprint ├── codemeta.json ├── dev ├── icon │ ├── README.md │ ├── handprint-icon-white.png │ ├── handprint-icon-white.svg │ ├── handprint-icon.svg │ └── noun_Hand_733265.svg ├── scripts │ └── create-pyz └── services │ ├── google │ ├── Detect handwriting in images - Cloud Vision API - Google Cloud.pdf │ ├── Detect text in images - Cloud Vision API - Google Cloud.pdf │ ├── Document Text Tutorial - Cloud Vision API - Google Cloud.pdf │ ├── Google-vision-notes.md │ ├── Package google.cloud.vision.v1p4beta1 - Cloud Vision API.pdf │ ├── README.md │ ├── Types for Google Cloud Vision v1 API - google-cloud-vision documentation.pdf │ ├── google cloud vision api feature types 2018-10-25.pdf │ └── google.cloud.vision_v1.types.image_annotator - google-cloud-vision documentation.pdf │ └── sample-output │ ├── README.md │ ├── dag-304-DAG_1_1_8_0029.handprint-all.png │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.json │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.png │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.txt │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-textract.json │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-textract.png │ ├── dag-304-DAG_1_1_8_0029.handprint-amazon-textract.txt │ ├── dag-304-DAG_1_1_8_0029.handprint-google.json │ ├── dag-304-DAG_1_1_8_0029.handprint-google.png │ ├── dag-304-DAG_1_1_8_0029.handprint-google.txt │ ├── dag-304-DAG_1_1_8_0029.handprint-microsoft.json │ ├── dag-304-DAG_1_1_8_0029.handprint-microsoft.png │ ├── dag-304-DAG_1_1_8_0029.handprint-microsoft.txt │ └── dag-304-DAG_1_1_8_0029.jp2 ├── docs ├── .nojekyll ├── Makefile ├── README.md ├── _static │ ├── css │ │ └── custom.css │ ├── media │ │ ├── all-results-example.png │ │ ├── clara-barton-page.jpg │ │ ├── favicon.ico │ │ ├── glaser-example-google.jpg │ │ ├── handprint-icon-white.png │ │ ├── handprint-icon-white.svg │ │ └── handprint-icon.svg │ └── versions.json ├── advanced-usage.md ├── basic-usage.md ├── colophon.md ├── command-summary.md ├── conf.py ├── configuration.md ├── index.md ├── installation.md └── known-issues.md ├── handprint ├── __init__.py ├── __main__.py ├── comparison.py ├── credentials │ ├── __init__.py │ ├── amazon_auth.py │ ├── base.py │ ├── credentials_files.py │ ├── google_auth.py │ └── microsoft_auth.py ├── exceptions.py ├── exit_codes.py ├── images.py ├── main_body.py ├── manager.py └── services │ ├── __init__.py │ ├── amazon.py │ ├── base.py │ ├── google.py │ └── microsoft.py ├── pubs └── joss │ ├── .gitignore │ ├── H96566k.handprint-all.png │ ├── Makefile │ ├── README.md │ ├── notes.txt │ ├── paper.bib │ └── paper.md ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── data ├── README.md ├── bad-images │ └── corrupted-image.png ├── caltech-archives │ ├── caltech-archives-urls.txt │ └── glaser │ │ ├── DAG_5_1_6 1952-1957 Notebook VI p2.jpg │ │ ├── DAG_5_2_1 1950-1953 notebook VIII p7.jpg │ │ ├── README.md │ │ ├── dag-285-DAG_1_1_8_0003.jp2 │ │ └── dag-304-DAG_1_1_8_0029.jp2 ├── fragments │ ├── f1.png │ ├── f2.png │ ├── f6.png │ └── f7.png └── public-domain │ ├── LOC-urls.txt │ └── images │ ├── AGBell_Notebook.jpg │ ├── H96566k.jpg │ ├── README.md │ ├── clara-barton-life-of-my-childhood-p90.jpg │ └── mabel-h-bell-to-eliza-s-bell-sept-28-1879-1-1600.jpg ├── test_comparison.py ├── test_exceptions.py ├── test_exit_codes.py └── test_images.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # -*- mode: sh; -*- 2 | 3 | # Set the default behavior, in case people don't have core.autocrlf set. 4 | # ............................................................................. 5 | 6 | * text=auto 7 | 8 | # Specify what's text and should be normalized. 9 | # ............................................................................. 10 | 11 | *.py text 12 | *.in text 13 | *.rst text 14 | *.cfg text 15 | *.ini text 16 | *.yml text 17 | *.json text 18 | *.bat text 19 | *.sh text 20 | LICENSE text 21 | CONTRIBUTING text 22 | 23 | # Denote all files that are truly binary and should not be modified. 24 | # ............................................................................. 25 | 26 | *.png binary 27 | *.jpg binary 28 | *.xls binary 29 | *.doc binary 30 | 31 | # This next one is because in other projects, we've had problems with git 32 | # getting confused about line endings when people using Windows and Mac edit 33 | # the same files. 34 | # ............................................................................. 35 | 36 | *.csv binary diff=csv 37 | -------------------------------------------------------------------------------- /.github/workflows/build-sphinx.yml: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file build-myst.yml 3 | # @brief GitHub Actions workflow to build Handprint docs using Sphinx + MyST 4 | # @author Michael Hucka 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # 8 | # This workflow file was originally based on work by GitHub user "peaceiris": 9 | # https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-static-site-generators-with-python 10 | # ============================================================================= 11 | 12 | name: Update docs on GitHub 13 | 14 | on: 15 | push: 16 | branches: 17 | - main 18 | 19 | jobs: 20 | deploy: 21 | runs-on: ubuntu-18.04 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Setup Python 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: '3.8' 29 | 30 | - name: Upgrade pip 31 | run: | 32 | # install pip=>20.1 to use "pip cache dir" 33 | python3 -m pip install --upgrade pip 34 | 35 | - name: Get pip cache dir 36 | id: pip-cache 37 | run: echo "::set-output name=dir::$(pip cache dir)" 38 | 39 | - name: Cache dependencies 40 | uses: actions/cache@v2 41 | with: 42 | path: ${{ steps.pip-cache.outputs.dir }} 43 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 44 | restore-keys: | 45 | ${{ runner.os }}-pip- 46 | 47 | - name: Install dependencies 48 | run: | 49 | python3 -m pip install "myst-parser[linkify]" 50 | python3 -m pip install sphinx-material 51 | python3 -m pip install sphinxcontrib-mermaid 52 | 53 | - name: Build 54 | run: | 55 | cd docs 56 | make html 57 | 58 | - name: Deploy 59 | uses: peaceiris/actions-gh-pages@v3 60 | with: 61 | github_token: ${{ secrets.GITHUB_TOKEN }} 62 | publish_dir: ./docs/_build/html 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # -*- mode: sh-mode; -*- 2 | 3 | # Compiled source 4 | # ............................................................................. 5 | 6 | *.com 7 | *.class 8 | *.dll 9 | *.exe 10 | *.o 11 | *.so 12 | 13 | # Compressed archives & package files 14 | # ............................................................................. 15 | 16 | *.7z 17 | *.dmg 18 | *.gz 19 | *.iso 20 | *.jar 21 | *.rar 22 | *.tar 23 | *.zip 24 | 25 | # OS-specific things to ignore: 26 | # ............................................................................. 27 | 28 | .DS_Store 29 | .DS_Store? 30 | ._* 31 | .Spotlight-V100 32 | .Trashes 33 | ehthumbs.db 34 | Thumbs.db 35 | 36 | # Emacs-specific things to ignore: 37 | # ............................................................................. 38 | 39 | *~ 40 | .#* 41 | .git/COMMIT_EDITMSG 42 | auto 43 | *.synctex.gz 44 | TAGS 45 | 46 | # LaTeX-specific things to ignore: 47 | # ............................................................................. 48 | 49 | *.aux 50 | *.log 51 | *.bbl 52 | *.blg 53 | *.out 54 | *.toc 55 | *.loc 56 | *.mp 57 | 58 | # Python-specific things to ignore: 59 | # ............................................................................. 60 | 61 | *.pyc 62 | __pycache__ 63 | *.egg-info 64 | .eggs 65 | 66 | # Project-specific things to ignore: 67 | # ............................................................................. 68 | 69 | ./build 70 | ./dist 71 | *-reduced.jpg 72 | *-reduced.png 73 | *.all-results.jpg 74 | *.all-results.png 75 | *.amazon-rekognition.jpg 76 | *.amazon-textract.jpg 77 | *.microsoft.jpg 78 | *.google.jpg 79 | *.amazon-rekognition.png 80 | *.amazon-textract.png 81 | *.microsoft.png 82 | *.google.png 83 | *.bak 84 | .README.md.tmp 85 | docs/_build 86 | -------------------------------------------------------------------------------- /.graphics/all-results-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/all-results-example.png -------------------------------------------------------------------------------- /.graphics/azure-create-computer-vision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-create-computer-vision.png -------------------------------------------------------------------------------- /.graphics/azure-create-resource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-create-resource.png -------------------------------------------------------------------------------- /.graphics/azure-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-dashboard.png -------------------------------------------------------------------------------- /.graphics/azure-get-keys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-get-keys.png -------------------------------------------------------------------------------- /.graphics/azure-no-resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-no-resources.png -------------------------------------------------------------------------------- /.graphics/azure-start-free-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/azure-start-free-account.png -------------------------------------------------------------------------------- /.graphics/caltech-round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/caltech-round.png -------------------------------------------------------------------------------- /.graphics/clara-barton-page.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/clara-barton-page.jpg -------------------------------------------------------------------------------- /.graphics/clara-barton-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/clara-barton-page.png -------------------------------------------------------------------------------- /.graphics/example-tsv-file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/example-tsv-file.png -------------------------------------------------------------------------------- /.graphics/glaser-example-google.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/glaser-example-google.jpg -------------------------------------------------------------------------------- /.graphics/glaser-example-google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/glaser-example-google.png -------------------------------------------------------------------------------- /.graphics/google-api-create-credentials-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-api-create-credentials-button.png -------------------------------------------------------------------------------- /.graphics/google-api-credentials-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-api-credentials-menu.png -------------------------------------------------------------------------------- /.graphics/google-bounding-box-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-bounding-box-example.png -------------------------------------------------------------------------------- /.graphics/google-cloud-add-api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-cloud-add-api.png -------------------------------------------------------------------------------- /.graphics/google-cloud-new-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-cloud-new-project.png -------------------------------------------------------------------------------- /.graphics/google-cloud-platform-api-option.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-cloud-platform-api-option.png -------------------------------------------------------------------------------- /.graphics/google-cloud-platform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-cloud-platform.png -------------------------------------------------------------------------------- /.graphics/google-create-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-create-credentials.png -------------------------------------------------------------------------------- /.graphics/google-create-oauth-client-id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-create-oauth-client-id.png -------------------------------------------------------------------------------- /.graphics/google-create-service-account-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-create-service-account-key.png -------------------------------------------------------------------------------- /.graphics/google-create-service-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-create-service-account.png -------------------------------------------------------------------------------- /.graphics/google-download-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-download-json.png -------------------------------------------------------------------------------- /.graphics/google-gcp-add-billing-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-add-billing-account.png -------------------------------------------------------------------------------- /.graphics/google-gcp-billing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-billing.png -------------------------------------------------------------------------------- /.graphics/google-gcp-create-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-create-project.png -------------------------------------------------------------------------------- /.graphics/google-gcp-create-service-account-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-create-service-account-key.png -------------------------------------------------------------------------------- /.graphics/google-gcp-free.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-free.png -------------------------------------------------------------------------------- /.graphics/google-gcp-new-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-new-project.png -------------------------------------------------------------------------------- /.graphics/google-gcp-no-billing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-no-billing.png -------------------------------------------------------------------------------- /.graphics/google-gcp-role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-gcp-role.png -------------------------------------------------------------------------------- /.graphics/google-oauth-consent-info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-oauth-consent-info.png -------------------------------------------------------------------------------- /.graphics/google-oauth-create-client-id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-oauth-create-client-id.png -------------------------------------------------------------------------------- /.graphics/google-select-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-select-project.png -------------------------------------------------------------------------------- /.graphics/google-service-account-info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/google-service-account-info.png -------------------------------------------------------------------------------- /.graphics/handprint-asciinema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/handprint-asciinema.png -------------------------------------------------------------------------------- /.graphics/handprint-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/handprint-demo.gif -------------------------------------------------------------------------------- /.graphics/handprint-preview-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/handprint-preview-image.png -------------------------------------------------------------------------------- /.graphics/linux-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/linux-32.png -------------------------------------------------------------------------------- /.graphics/mac-os-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/mac-os-32.png -------------------------------------------------------------------------------- /.graphics/noun_Hand_733265.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/noun_Hand_733265.png -------------------------------------------------------------------------------- /.graphics/noun_Hand_733265.svg: -------------------------------------------------------------------------------- 1 | ShapeCreated with Sketch. -------------------------------------------------------------------------------- /.graphics/os-windows-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/os-windows-32.png -------------------------------------------------------------------------------- /.graphics/sample-annotated-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/.graphics/sample-annotated-image.png -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | --- 3 | authors: 4 | - 5 | affiliation: "Caltech Library" 6 | given-names: Michael 7 | family-names: Hucka 8 | orcid: "0000-0001-9105-5960" 9 | cff-version: "1.1.0" 10 | message: "If you use this software, please cite it using these metadata." 11 | repository-code: "https://github.com/caltechlibrary/handprint" 12 | title: "Handprint: Run handwritten text recognition services on images of documents" 13 | date-released: 2022-06-24 14 | version: "1.6.0" 15 | doi: 10.22002/20207 16 | keywords: 17 | - handwritten text recognition 18 | - optical character recognition 19 | - machine learning 20 | - artificial intelligence 21 | - cloud services 22 | - document processing 23 | ... 24 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | Contributor Covenant Code of Conduct 2 | ==================================== 3 | 4 | ## Our Pledge 5 | 6 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 7 | 8 | ## Our Standards 9 | 10 | Examples of behavior that contributes to creating a positive environment include: 11 | 12 | * Using welcoming and inclusive language 13 | * Being respectful of differing viewpoints and experiences 14 | * Gracefully accepting constructive criticism 15 | * Focusing on what is best for the community 16 | * Showing empathy towards other community members 17 | 18 | Examples of unacceptable behavior by participants include: 19 | 20 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 21 | * Trolling, insulting/derogatory comments, and personal or political attacks 22 | * Public or private harassment 23 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 24 | * Other conduct which could reasonably be considered inappropriate in a professional setting 25 | 26 | ## Our Responsibilities 27 | 28 | Project contributors are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 29 | 30 | Project contributors have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 31 | 32 | ## Scope 33 | 34 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project contributors. 35 | 36 | ## Enforcement 37 | 38 | If a contributor engages in harassing behaviour, the project organizers may take any action they deem appropriate, including warning the offender or expelling them from online forums, online project resources, face-to-face meetings, or any other project-related activity or resource. 39 | 40 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the project team immediately. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 41 | 42 | ## Attribution 43 | 44 | Portions of this Code of Conduct were adapted from Electron's [Contributor Covenant Code of Conduct](https://github.com/electron/electron/blob/master/CODE_OF_CONDUCT.md), which itself was adapted from the [Contributor Covenant](http://contributor-covenant.org/version/1/4), version 1.4. 45 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Guidelines for contributing to this project 2 | 3 | Any constructive contributions – bug reports, pull requests (code or documentation), suggestions for improvements, and more – are welcome. 4 | 5 | ## Conduct 6 | 7 | Everyone is asked to read and respect the [code of conduct](CODE_OF_CONDUCT.md) before participating in this project. 8 | 9 | ## Coordinating work 10 | 11 | A quick way to find out what is currently on people's plates and our near-term plans is to look at the [GitHub issue tracker](https://github.com/caltechlibrary/handprint/issues) for this project, but the possibilities are not limited to what you see there – if you have ideas for new features and enhancements, please feel free to write them up as a new issue or contact the developers directly! 12 | 13 | ## Submitting contributions 14 | 15 | Please feel free to contact the author directly, or even better, jump right in and use the standard GitHub approach of forking the repo and creating a pull request. When committing code changes and submitting pull requests, please write a clear log message for your commits. One-line messages are fine for small changes, but bigger changes should look like this: 16 | 17 | $ git commit -m "A brief summary of the commit 18 | > 19 | > A paragraph describing what changed and its impact." 20 | 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018-2022, Caltech 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file Makefile 3 | # @brief Makefile for some steps in creating new releases on GitHub 4 | # @author Michael Hucka 5 | # @date 2020-08-11 6 | # @license Please see the file named LICENSE in the project directory 7 | # @website https://github.com/caltechlibrary/handprint 8 | # ============================================================================= 9 | 10 | .ONESHELL: # Run all commands in the same shell. 11 | .SHELLFLAGS += -e # Exit at the first error. 12 | 13 | # Before we go any further, test if certain programs are available. 14 | # The following is based on the approach posted by Jonathan Ben-Avraham to 15 | # Stack Overflow in 2014 at https://stackoverflow.com/a/25668869 16 | 17 | PROGRAMS_NEEDED = curl gh git jq sed 18 | TEST := $(foreach p,$(PROGRAMS_NEEDED),\ 19 | $(if $(shell which $(p)),_,$(error Cannot find program "$(p)"))) 20 | 21 | 22 | # Gather values that we need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | $(info Gathering data -- this takes a few moments ...) 25 | 26 | name := $(strip $(shell grep -m 1 'name\s*=' setup.cfg | cut -f2 -d'=')) 27 | version := $(strip $(shell grep -m 1 'version\s*=' setup.cfg | cut -f2 -d'=')) 28 | url := $(strip $(shell grep -m 1 'url\s*=' setup.cfg | cut -f2 -d'=')) 29 | desc := $(strip $(shell grep -m 1 'description\s*=' setup.cfg | cut -f2 -d'=')) 30 | author := $(strip $(shell grep -m 1 'author\s*=' setup.cfg | cut -f2 -d'=')) 31 | email := $(strip $(shell grep -m 1 'author_email\s*=' setup.cfg | cut -f2 -d'=')) 32 | license := $(strip $(shell grep -m 1 'license\s*=' setup.cfg | cut -f2 -d'=')) 33 | 34 | branch := $(shell git rev-parse --abbrev-ref HEAD) 35 | repo := $(strip $(shell gh repo view | head -1 | cut -f2 -d':')) 36 | id := $(shell curl -s https://api.github.com/repos/$(repo) | jq '.id') 37 | id_url := https://data.caltech.edu/badge/latestdoi/$(id) 38 | doi_url := $(shell curl -sILk $(id_url) | grep Locat | cut -f2 -d' ') 39 | doi := $(subst https://doi.org/,,$(doi_url)) 40 | doi_tail := $(lastword $(subst ., ,$(doi))) 41 | init_file := $(name)/__init__.py 42 | 43 | $(info Gathering data ... Done.) 44 | 45 | 46 | # make release ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 47 | 48 | release: | test-branch release-on-github print-instructions 49 | 50 | test-branch: 51 | ifneq ($(branch),main) 52 | $(error Current git branch != main. Merge changes into main first) 53 | endif 54 | 55 | update-init:; 56 | @sed -i .bak -e "s|^\(__version__ *=\).*|\1 '$(version)'|" $(init_file) 57 | @sed -i .bak -e "s|^\(__description__ *=\).*|\1 '$(desc)'|" $(init_file) 58 | @sed -i .bak -e "s|^\(__url__ *=\).*|\1 '$(url)'|" $(init_file) 59 | @sed -i .bak -e "s|^\(__author__ *=\).*|\1 '$(author)'|" $(init_file) 60 | @sed -i .bak -e "s|^\(__email__ *=\).*|\1 '$(email)'|" $(init_file) 61 | @sed -i .bak -e "s|^\(__license__ *=\).*|\1 '$(license)'|" $(init_file) 62 | 63 | update-codemeta:; 64 | @sed -i .bak -e "/version/ s/[0-9].[0-9][0-9]*.[0-9][0-9]*/$(version)/" codemeta.json 65 | 66 | update-citation:; 67 | $(eval date := $(shell date "+%F")) 68 | @sed -i .bak -e "/^date-released/ s/[0-9][0-9-]*/$(date)/" CITATION.cff 69 | @sed -i .bak -e "/^version/ s/[0-9].[0-9][0-9]*.[0-9][0-9]*/$(version)/" CITATION.cff 70 | 71 | edited := codemeta.json $(init_file) CITATION.cff 72 | 73 | commit-updates:; 74 | git add $(edited) 75 | git diff-index --quiet HEAD $(edited) || \ 76 | git commit -m"Update stored version number" $(edited) 77 | 78 | release-on-github: | update-init update-codemeta commit-updates 79 | $(eval tmp_file := $(shell mktemp /tmp/release-notes-$(name).XXXX)) 80 | git push -v --all 81 | git push -v --tags 82 | $(info ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓) 83 | $(info ┃ Write release notes in the file that gets opened in your ┃) 84 | $(info ┃ editor. Close the editor to complete the release process. ┃) 85 | $(info ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛) 86 | sleep 2 87 | $(EDITOR) $(tmp_file) 88 | gh release create v$(version) -t "Release $(version)" -F $(tmp_file) 89 | 90 | print-instructions:; 91 | $(info ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓) 92 | $(info ┃ Next steps: ┃) 93 | $(info ┃ 1. Visit https://github.com/$(repo)/releases ) 94 | $(info ┃ 2. Check the release ┃) 95 | $(info ┃ 3. Wait a few seconds to let web services do their work ┃) 96 | $(info ┃ 4. Run "make update-doi" to update the DOI in README.md ┃) 97 | $(info ┃ 5. Run "make packages" & check the results ┃) 98 | $(info ┃ 6. Run "make test-pypi" to push to test.pypi.org ┃) 99 | $(info ┃ 7. Check https://test.pypi.org/$(repo) ) 100 | $(info ┃ 8. Run "make pypi" to push to pypi for real ┃) 101 | $(info ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛) 102 | @echo "" 103 | 104 | update-doi: 105 | sed -i .bak -e 's|/api/record/[0-9]\{1,\}|/api/record/$(doi_tail)|' README.md 106 | sed -i .bak -e 's|edu/records/[0-9]\{1,\}|edu/records/$(doi_tail)|' README.md 107 | sed -i .bak -e '/doi:/ s|10.22002/[0-9]\{1,\}|10.22002/$(doi_tail)|' CITATION.cff 108 | git add README.md CITATION.cff 109 | git diff-index --quiet HEAD README.md || \ 110 | (git commit -m"Update DOI" README.md && git push -v --all) 111 | git diff-index --quiet HEAD CITATION.cff || \ 112 | (git commit -m"Update DOI" CITATION.cff && git push -v --all) 113 | 114 | packages: clean 115 | python3 setup.py sdist bdist_wheel 116 | python3 -m twine check dist/* 117 | 118 | test-pypi: packages 119 | python3 -m twine upload --repository testpypi dist/$(name)-$(version)*.{whl,gz} 120 | 121 | pypi: packages 122 | python3 -m twine upload dist/* 123 | 124 | 125 | # make executables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 126 | 127 | binaries binary: 128 | mkdir -p dist/binary 129 | dev/scripts/create-pyz dist/binary 3.8.2 130 | dev/scripts/create-pyz dist/binary 3.9.2 131 | dev/scripts/create-pyz dist/binary 3.10.0 132 | 133 | 134 | # Cleanup and miscellaneous directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 135 | 136 | clean: clean-dist clean-build clean-release clean-other 137 | 138 | clean-dist:; 139 | -rm -fr dist/$(name) dist/$(name)-$(version).tar.gz \ 140 | dist/$(name)-$(version)-py3-none-any.whl dist/binary \ 141 | __pycache__ .eggs 142 | 143 | clean-build:; 144 | -rm -rf build 145 | 146 | clean-release:; 147 | -rm -rf $(name).egg-info codemeta.json.bak $(init_file).bak README.md.bak 148 | 149 | clean-other:; 150 | -rm -fr $(name)/__pycache__ 151 | 152 | .PHONY: release release-on-github update-init update-codemeta \ 153 | print-instructions packages clean test-pypi pypi 154 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | aenum = "==3.1.0" 8 | appdirs = "==1.4.4" 9 | boltons = "==21.0.0" 10 | boto3 = "==1.17.91" 11 | bun = "==0.0.7" 12 | commonpy = ">=1.9.0" 13 | fastnumbers = "==3.1.0" 14 | google-api-core = "==1.30.0" 15 | google-api-python-client = "==2.8.0" 16 | google-auth = "==1.30.2" 17 | google-auth-httplib2 = "==0.1.0" 18 | google-cloud = "==0.34.0" 19 | google-cloud-vision = "==2.3.1" 20 | googleapis-common-protos = "==1.53.0" 21 | grpcio = "==1.38.0" 22 | humanize = ">=3.7.1" 23 | imagesize = "==1.2.0" 24 | matplotlib = "==3.4.2" 25 | numpy = "==1.22.2" 26 | plac = "==1.3.3" 27 | psutil = "==5.8.0" 28 | requests = "==2.25.0" 29 | rich = "==10.1.0" 30 | sidetrack = "==2.0.0" 31 | textdistance = "==4.2.2" 32 | urllib3 = "==1.26.5" 33 | validator-collection = "==1.5.0" 34 | Pillow = "==9.0.1" 35 | PyMuPDF = "==1.19.6" 36 | StringDist = "==1.0.9" 37 | 38 | [dev-packages] 39 | pytest-mock = "==3.7.0" 40 | -------------------------------------------------------------------------------- /README-PAPER.md: -------------------------------------------------------------------------------- 1 | # About the JOSS paper 2 | 3 | This repository uses a separate branch named `joss-paper` to store the files for the Handprint JOSS paper submission made in March 2022. That branch has a separate change history and different file structure. Changes to the paper are not reflected in the change history of the `main` or `develop` branches of the repository. 4 | 5 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | Support 2 | ======= 3 | 4 | Thank you for your interest in this project. If you are experiencing problems or have questions, the following are the preferred methods of reaching someone: 5 | 6 | 1. Report a new issue using the [issue tracker](https://github.com/caltechlibrary/handprint/issues). 7 | 2. Send email to the Caltech Library: [helpdesk@library.caltech.edu](mailto:helpdesk@library.caltech.edu). 8 | 3. Send email to an individual involved in the project. People's names appear in the top-level `README.md` file in the source code repository. 9 | -------------------------------------------------------------------------------- /bin/handprint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ============================================================================= 3 | # @file handprint 4 | # @brief Simple interface to run handprint, for testing and exploration 5 | # @author Michael Hucka 6 | # @license Please see the file named LICENSE in the project directory 7 | # @website https://github.com/caltechlibrary/handprint 8 | # ============================================================================= 9 | 10 | # Allow this program to be executed directly from the 'bin' directory. 11 | import os 12 | import sys 13 | import plac 14 | 15 | # Allow this program to be executed directly from the 'bin' directory. 16 | try: 17 | thisdir = os.path.dirname(os.path.abspath(__file__)) 18 | sys.path.append(os.path.join(thisdir, '..')) 19 | except: 20 | sys.path.append('..') 21 | 22 | # Hand over to the command line interface. 23 | import handprint 24 | from handprint.__main__ import main as main 25 | 26 | if __name__ == "__main__": 27 | plac.call(main) 28 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "description": "Apply different handwritten text recognition services and algorithms to handwritten documents.", 5 | "name": "Handprint", 6 | "codeRepository": "https://github.com/caltechlibrary/handprint", 7 | "issueTracker": "https://github.com/caltechlibrary/handprint/issues", 8 | "license": "https://github.com/caltechlibrary/handprint/blob/master/LICENSE", 9 | "version": "1.6.0", 10 | "author": [ 11 | { 12 | "@type": "Person", 13 | "givenName": "Michael", 14 | "familyName": "Hucka", 15 | "affiliation": "Caltech Library", 16 | "email": "mhucka@caltech.edu", 17 | "@id": "https://orcid.org/0000-0001-9105-5960" 18 | }], 19 | "developmentStatus": "active", 20 | "keywords": [ 21 | "handwritten text recognition", 22 | "HTR", 23 | "OCR", 24 | "machine learning" 25 | ], 26 | "maintainer": "https://orcid.org/0000-0001-9105-5960", 27 | "programmingLanguage": "Python" 28 | } 29 | -------------------------------------------------------------------------------- /dev/icon/README.md: -------------------------------------------------------------------------------- 1 | The [vector artwork](https://thenounproject.com/search/?q=733265&i=733265) used as a starting point for the logo for this repository was created by ["Kevin"](https://thenounproject.com/kevn/) for the [Noun Project](https://thenounproject.com). It is licensed under the Creative Commons [Attribution 3.0 Unported](https://creativecommons.org/licenses/by/3.0/deed.en) license. The vector graphics was modified by Mike Hucka to change the color. 2 | -------------------------------------------------------------------------------- /dev/icon/handprint-icon-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/icon/handprint-icon-white.png -------------------------------------------------------------------------------- /dev/icon/handprint-icon-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Shape 4 | Created with Sketch. 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /dev/icon/handprint-icon.svg: -------------------------------------------------------------------------------- 1 | ShapeCreated with Sketch. -------------------------------------------------------------------------------- /dev/icon/noun_Hand_733265.svg: -------------------------------------------------------------------------------- 1 | ShapeCreated with Sketch.Created by Kevinfrom the Noun Project -------------------------------------------------------------------------------- /dev/services/google/Detect handwriting in images - Cloud Vision API - Google Cloud.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/Detect handwriting in images - Cloud Vision API - Google Cloud.pdf -------------------------------------------------------------------------------- /dev/services/google/Detect text in images - Cloud Vision API - Google Cloud.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/Detect text in images - Cloud Vision API - Google Cloud.pdf -------------------------------------------------------------------------------- /dev/services/google/Document Text Tutorial - Cloud Vision API - Google Cloud.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/Document Text Tutorial - Cloud Vision API - Google Cloud.pdf -------------------------------------------------------------------------------- /dev/services/google/Google-vision-notes.md: -------------------------------------------------------------------------------- 1 | # Notes about the Google Vision text recognition API 2 | 3 | These notes correspond to v1 and v1p4beta1. I obtained the Python package from PyPI. These notes correspond to [version 2.3.1](https://pypi.org/project/google-cloud-vision/2.3.1/), released on 2021-04-13. 4 | 5 | ## Calling the service 6 | 7 | The basic API is obtained by importing `google.cloud.vision_v1`. For example, 8 | 9 | ```python 10 | from google.cloud import vision_v1 as gv 11 | ``` 12 | 13 | To invoke image recognition, you need a client object of type `ImageAnnotatorClient` and an `ImageContext` object. To set certain parameters, you also need a `TextDetectionParams` object. In principle, the parameters available are described in the [documentation for `TextDetectionParams`](https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.TextDetectionParams); however, as of 2021-06-10, **only `enable_text_detection_confidence_score` is supported** by `TextDetectionParams`, which you can verify by looking at the [API documentation for `TextDetectionParam`](https://googleapis.dev/python/vision/latest/vision_v1/types.html?highlight=textdetectionparam#google.cloud.vision_v1.types.TextDetectionParams) as well as the [source code in GitHub](https://googleapis.dev/python/vision/latest/_modules/google/cloud/vision_v1/types/image_annotator.html#TextDetectionParams). `ImageContext` also takes a `language_hints` parameter, which for English handwriting, should be set to `"en-t-i0-handwrit"`. 14 | 15 | All together, this leads to the following code to get the client and context objects: 16 | 17 | ```python 18 | client = gv.ImageAnnotatorClient() 19 | params = gv.TextDetectionParams(mapping = { 'enable_text_detection_confidence_score': True }) 20 | context = gv.ImageContext(language_hints = ['en-t-i0-handwrit'], text_detection_params = params) 21 | ``` 22 | 23 | If you do not include the parameters for the confidence score, the results come back with _some_ confidence scores, but not as many. 24 | 25 | Next, you need to create an object containing the image to be uploaded to Google. Assuming the raw bytes of a PNG (or similar) image are stored in the variable `image`, you can do this as follows: 26 | 27 | ```python 28 | img = gv.Image(content = image) 29 | ``` 30 | 31 | And now you can invoke the text recognition service on the image. There are two flavors: `TEXT_DETECTION` and `DOCUMENT_TEXT_DETECTION`. Both are used for OCR, but as described in the [Google Vision API documentation](https://cloud.google.com/vision/docs/ocr#optical_character_recognition_ocr), the `DOCUMENT_TEXT_DETECTION` service is "optimized for dense text and documents", and thus presumably more suited to handling scanned documents. (In my testing on pretty easy text pages bears this out; the results from `TEXT_DETECTION` were worse.) 32 | 33 | ```python 34 | response = client.document_text_detection(image = img, image_context = context) 35 | ``` 36 | 37 | More information about this service can be found in the section of Google's docs titled [Detect handwriting in images](https://cloud.google.com/vision/docs/handwriting), and in particular the section on [specifying the language](https://cloud.google.com/vision/docs/handwriting#specify_the_language_optional), which has a box explaining how the language hint works. 38 | 39 | To help figure out how to parse the results, the [Document Text Tutorial](https://cloud.google.com/vision/docs/fulltext-annotations) is worth reading. In the end, I came up with code by inspecting the results and figuring out what the different parts were. See in particular the [sample code for `doctext.py`](https://github.com/googleapis/python-vision/blob/HEAD/samples/snippets/document_text/doctext.py). 40 | 41 | To get the full text of a page, there are three approaches possible: 42 | * Access `full_text_annotation.text` from the `response` object. 43 | * Access `text_annotation.description` from the `response` object. In the limited tests I've done on this, the contents were always the same as `full_text_annotation.text`. 44 | * Traverse the hierarchy of objects returned in the list of `full_text_annotation.pages` objects. These will be blocks, paragraphs, words, and symbols. The symbol objects contain in addition indications of breaks in the text. In my testing, assembling words into lines (using the break indicators) produced the same results as the full text annotation text. 45 | -------------------------------------------------------------------------------- /dev/services/google/Package google.cloud.vision.v1p4beta1 - Cloud Vision API.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/Package google.cloud.vision.v1p4beta1 - Cloud Vision API.pdf -------------------------------------------------------------------------------- /dev/services/google/README.md: -------------------------------------------------------------------------------- 1 | Information about the Google Cloud Vision API 2 | ============================================= 3 | 4 | This directory contains some saved materials providing documentation about the Google Cloud Vision API. These are local copies made in case the web pages change in the future. Unless otherwise noted, the documentation is content obtained from Google is licensed under the Creative Commons Attribution 3.0 License. More information about Google developers' documentation can be found in the developers.google.com [Site Policies](https://developers.google.com/terms/site-policies) document. 5 | -------------------------------------------------------------------------------- /dev/services/google/Types for Google Cloud Vision v1 API - google-cloud-vision documentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/Types for Google Cloud Vision v1 API - google-cloud-vision documentation.pdf -------------------------------------------------------------------------------- /dev/services/google/google cloud vision api feature types 2018-10-25.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/google cloud vision api feature types 2018-10-25.pdf -------------------------------------------------------------------------------- /dev/services/google/google.cloud.vision_v1.types.image_annotator - google-cloud-vision documentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/google/google.cloud.vision_v1.types.image_annotator - google-cloud-vision documentation.pdf -------------------------------------------------------------------------------- /dev/services/sample-output/README.md: -------------------------------------------------------------------------------- 1 | # Sample output 2 | 3 | This directory contains output all four services currently supported, for a sample page from one of the documents in the Caltech Archives' [Donald A. Glaser Digital Collection](http://glaser.library.caltech.edu). The following is the command used on 2021-06-09: 4 | 5 | ``` 6 | handprint -e dev/services/sample-output/dag-304-DAG_1_1_8_0029.jp2 7 | ``` 8 | -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-all.png -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.png -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-rekognition.txt: -------------------------------------------------------------------------------- 1 | is the experimental approach though some theoretical 2 | issues ave probably still cloudy. 3 | In biology knowledge is weaker still, but I 4 | must nevertheless outline my present views. 5 | my 7 June 1962 6 | Living things influence 7 | experience 8 | of 9 | Enormous complexity organi is one of the 10 | m main features of that set living things apart from inert ones. 11 | As objects scientific interest one is curious to know how 12 | living things originated, how top developed into their present 13 | forms, and how they work. An attractive working hypothesis 14 | is that they are only complex systems of physical compon 15 | governed by the laws of physics (including chemistry 16 | -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-textract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-textract.png -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-amazon-textract.txt: -------------------------------------------------------------------------------- 1 | is the experimental approach, though some theoretical 2 | issues ave probably still cloudy. 3 | In biology my knowledge is weaker still, but I 4 | must nevertheless outline my present views. 5 | 7 June 1962 29 6 | Living things constitute a main influence on human 7 | experience and represent systems of enormous complexity 8 | Enormous complexity of organization is one of the 9 | main features that set living things apart from inert ones. 10 | As objects of scientific interest one is curious to 11 | Know 12 | how 13 | living things originated how dep developed into their present 14 | forms, and how they work. An attractive working hypothesis 15 | is that they are only complex systems of physical 16 | components 17 | governed by the laws of physics (including chemistry ), nothing 18 | more and nothing less. On the molecular level this view has 19 | had great successes recently in molecular genetics So far there 20 | has appeared no obstacle contradicting this hypothesis, It must 21 | be admitted, however, that there are not wany detailed 22 | predictions of physics in biology that could furnish a 23 | sensitive test, some people feel that when such tests 24 | are 25 | made, biology will be found to require some new principle 26 | not already present in physics, though not contradicting 27 | physics. In the meantime one goes ontostady the properties 28 | of living things discovering one marvelous mechanism after 29 | another in the complex mechanism that maintains the 30 | delicately ba lauced conditions for healthy life. 31 | What are the questions that seem the most pivotal to 32 | me? 33 | 1) How did the first living thing appear on earth? 34 | 2) Are there living things elsewhere in the universe? 35 | 3) Is evolution a selection among undirected mutations? 36 | 4) Are favorable mutations propagated only within a species, 37 | or can one species "learn" genetic tricks from another 38 | by analoques of bacterial transformation , transduction, and 39 | conjugation? On even by cannibilization or eating totherspacies? 40 | 5) Are there any cases in which learned or otherwise 41 | a required characteristics become genetically buitt-in? 42 | 6) Do the Known processes account quantitatively for the 43 | rate at which evolution has proceeded? 44 | -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-google.png -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-google.txt: -------------------------------------------------------------------------------- 1 | issues 2 | 2? 3 | more 4 | are 5 | is the experimental approach, though some theoretical 6 | are probably still cloudy. 7 | In biology my knowledge is weaker still, but I 8 | must never the less outline my present views. 9 | 7 June 1962 10 | Living things constitute a main influence 11 | on human 12 | experience and represent systems of enormous complexity 13 | Enormous complexity of organization is one of the 14 | main features that set living things apart from inert ones. 15 | As objects of scientific interest one is curious to know how 16 | living things originated, how dep developed into their present 17 | forms, and how they work. An attractive working hypothesis 18 | is that they only complex systems of physical components 19 | governed by the laws of physics (including chemistry ), nothing 20 | and nothing less. On the molecular level this view ha's 21 | had great successes recently in molecular genetics. So far there 22 | has appeared no obstacle contradicting this hypothesis, It must 23 | be admi Hed, however, that there not 24 | wany 25 | detailed 26 | predictions of physics in biology that could 27 | furnish a 28 | sensitive test; some people feel that when such 29 | tests are 30 | made, biology will be found to require some new principle 31 | not already present in physics, though not contradicting 32 | physics. In the meantime one goes on to study the properties 33 | of living things discovering one marvelous mechanism after 34 | another in the complex mechanism that maintains the 35 | delicately balauced 'conditions for healthy, life, 36 | What are the questions that seem the most pivotal to 37 | me ? 38 | 1. How did the first living thing appear on earth? 39 | z) Are there living things elsewhere in the suiverse ? 40 | 3) Is evolution á selection among on directed mutations? 41 | 4) Are favorable mutations propagated only within a species, 42 | species "learn genetic tricks from another 43 | by analogues of bacterial transformation, transduction, and 44 | conjugation? Or even by cannibilization or eating other species.? 45 | 5) Are there 46 | any 47 | cases in which learned or otherwise 48 | acquired characteristics become genetically built-in? 49 | 6) Do the known 50 | account quan 51 | antitatively for the 52 | rate at which evolution nas proceded? 53 | or 54 | can 55 | cue 56 | processes 57 | -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-microsoft.png -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.handprint-microsoft.txt: -------------------------------------------------------------------------------- 1 | is the experimental approach, though some theoretical 2 | are probably still cloudy . 3 | issues 4 | In biology my knowledge is weaker still, but I 5 | must nevertheless outline 6 | my present views . 7 | 7 June 1962 2P 8 | Living things constitute a main influence on human 9 | represent systems of enormous complexity. 10 | expericate and 11 | complexity of organization is one of the 12 | Enormous 13 | features that set living things apart from inent ones. 14 | main 15 | As objects of scientific interest one is curious to know how 16 | living things originated , how dep developed into their present 17 | forms, and how they work. An attractive working hypothesis 18 | is that they are only complex systems of physical components 19 | by the laws of physics ( including chemistry ) , nothing 20 | governed 21 | more and nothing less . On the molecular level this view ha's 22 | had great successes recently in molecular genetics. So far there 23 | has appeared no obstacle contradicting this hypothesis . It must 24 | be admitted , however , that there are not many detailed 25 | predictions of physics in biology that could furnish a 26 | sensitive test ; some people feel that when such tests are 27 | made, biology will be found to require some new principle. 28 | not already present in physics, though not contradicting 29 | physics . In the meantime one goes ontostudy the properties 30 | living things discovering one marvelous mechanism after 31 | of 32 | another in the complex mechanism that maintains the 33 | delicately balanced conditions for healthy life. 34 | the most pivotal to 35 | What are the questions that seem 36 | me ? 37 | 1 ) How did the first living thing appear on earth? 38 | 2 ) Are there living things elsewhere in the universe ? 39 | 3 ) Is evolution a selection among undirected mutations ? 40 | 4) Are favorable mutations propagated only within a species, 41 | or can one species "learn" genetic tricks from another 42 | by analogues of bacterial transformation, transduction, and 43 | conjugation? Or even by cannibilization or eating other species? 44 | 5 ) Are there any cases in which learned or otherwise 45 | acquired characteristics become genetically built-in? 46 | 6 ) Do the known processes account quantitatively for the 47 | rate at which evolution was proceeded ? -------------------------------------------------------------------------------- /dev/services/sample-output/dag-304-DAG_1_1_8_0029.jp2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/dev/services/sample-output/dag-304-DAG_1_1_8_0029.jp2 -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/.nojekyll -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file Makefile 3 | # @brief Makefile for building docs using Sphinx and MyST 4 | # @created 2021-01-25 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # ============================================================================= 8 | 9 | # Before we go any further, test if certain programs are available. 10 | # The following is based on the approach posted by Jonathan Ben-Avraham to 11 | # Stack Overflow in 2014 at https://stackoverflow.com/a/25668869 12 | 13 | PROGRAMS_NEEDED = sphinx-build 14 | TEST := $(foreach p,$(PROGRAMS_NEEDED),\ 15 | $(if $(shell which $(p)),_,$(error Cannot find program "$(p)"))) 16 | 17 | 18 | # Gather values that we need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | # You can set the following variables from the command line, and also from 21 | # the environment for the first two. 22 | 23 | SPHINXOPTS ?= 24 | SPHINXBUILD ?= sphinx-build 25 | SPHINXAUTO = sphinx-autobuild 26 | SOURCEDIR = . 27 | BUILDDIR = _build 28 | 29 | # Actions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 30 | 31 | # Put it first so that "make" without argument is like "make help". 32 | help: 33 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 34 | 35 | auto autobuild live livehtml: 36 | @$(SPHINXAUTO) "$(SOURCEDIR)" "$(BUILDDIR)"/html $(SPHINXOPTS) $(O) 37 | 38 | # Catch-all target: route all unknown targets to Sphinx using the new 39 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 40 | %: Makefile 41 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 42 | 43 | 44 | # Cleanup and miscellaneous directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | .PHONY: help auto autobuild livehtml Makefile 47 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # About the Handprint docs 2 | 3 | This page describes how to (re)create the formatted Handprint documentation. 4 | 5 | ## Building the docs locally 6 | 7 | First, install [MyST](https://myst-parser.readthedocs.io/en/latest/index.html) and [Sphinx](https://www.sphinx-doc.org): 8 | 9 | ```sh 10 | python3 -m pip install "myst-parser[linkify]" 11 | python3 -m pip install sphinx-material 12 | python3 -m pip install sphinx-autobuild 13 | ``` 14 | 15 | After that, rebuilding the docs is simply a matter of running `make html` in the current directory: 16 | 17 | ```sh 18 | make html 19 | ``` 20 | 21 | The output will be put in [`_build/html`](_build/html). Instead of running `make` deliberately, you can also get auto-rebuilds and live preview using `sphinx-autobuild` by running the following command in the current directory (preferably in a new terminal window, because it will generate continuous output): 22 | 23 | ```sh 24 | make auto 25 | ``` 26 | 27 | 28 | ## Writing documentation 29 | 30 | This documentation is written in [MyST-flavored Markdown](https://myst-parser.readthedocs.io/en/latest/) and the [Napoleon](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) extension to Sphinx. What this means is that the documentation is written in Markdown instead of reStructuredText, with essentially all the features of Sphinx and reStructuredText having MyST equivalents and some additional features beyond _that_ – things like [pandoc](https://pandoc.org)-style footnotes, LaTeX math, and more. 31 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap'); 2 | 3 | body, input { 4 | color: rgba(0,0,0,.87); 5 | -webkit-font-feature-settings: "kern","liga"; 6 | font-feature-settings: "kern","liga"; 7 | font-family: "Atkinson Hyperlegible", "Roboto", "Helvetica Neue", Helvetica, Arial, sans-serif; 8 | } 9 | 10 | .md-typeset { 11 | line-height: 1.5; 12 | font-size: 0.9rem; 13 | } 14 | 15 | .md-typeset p, .md-typeset ol { 16 | margin-top: 0.75em;; 17 | margin-bottom: 0.75em;; 18 | } 19 | 20 | .md-typeset ol li,.md-typeset ul li{ 21 | margin-bottom: .35em; 22 | } 23 | 24 | .md-typeset ol li ol,.md-typeset ol li ul,.md-typeset ul li ol,.md-typeset ul li ul{ 25 | margin:.35em 0 .35em .625em 26 | } 27 | 28 | .md-typeset ol li blockquote,.md-typeset ol li p,.md-typeset ul li blockquote,.md-typeset ul li p{ 29 | margin: .35em 0 !important; 30 | } 31 | 32 | .md-sidebar--secondary { 33 | display: none !important; 34 | } 35 | 36 | .md-content { 37 | margin-right: 0; 38 | } 39 | 40 | .md-header-nav__source { 41 | width: 8rem !important; 42 | } 43 | 44 | .md-typeset h1{ 45 | margin: 0 0 1.5rem; 46 | } 47 | 48 | .md-typeset h2 { 49 | margin: 1.5rem 0 .8rem; 50 | } 51 | 52 | .md-typeset h3{ 53 | margin: 1.35rem 0 .8rem; 54 | } 55 | 56 | .md-typeset table:not([class]) { 57 | font-size: 0.8rem; 58 | } 59 | 60 | .md-typeset__scrollwrap { 61 | text-align: center; 62 | } 63 | 64 | nav.md-tabs { 65 | display: none; 66 | } 67 | 68 | .md-nav__list ul { 69 | margin-left: 10px !important; 70 | } 71 | 72 | figure { 73 | margin-top: 1.5em; 74 | text-align: center; 75 | } 76 | 77 | figure img { 78 | max-width: 75% !important; 79 | } 80 | 81 | figure figcaption { 82 | font-style: italic; 83 | } 84 | 85 | dt { 86 | font-weight: bold; 87 | } 88 | 89 | .button { 90 | -moz-border-radius: 2px; 91 | -webkit-border-radius: 2px; 92 | border-radius: 2px; 93 | display: inline-block; 94 | font-size: 90%; 95 | line-height: 1.2; 96 | margin: 0 .1em; 97 | padding: .15em .3em; 98 | } 99 | 100 | .color-info { 101 | background-color: #20a1b6; 102 | color: #fff; 103 | } 104 | 105 | .color-danger { 106 | background-color: #db3a4a; 107 | color: #fff; 108 | } 109 | 110 | .color-primary { 111 | background-color: #357bf6; 112 | color: #fff; 113 | } 114 | 115 | .color-secondary { 116 | background-color: #6c6c6c; 117 | color: #fff; 118 | } 119 | 120 | .color-not-available { 121 | background-color: #a6a6a6; 122 | color: #fff; 123 | } 124 | 125 | .color-outline-info { 126 | border: 1px solid #20a1b6; 127 | border-radius: 3px; 128 | color: #20a1b6; 129 | } 130 | 131 | .color-outline-danger { 132 | border: 1px solid #db3a4a; 133 | border-radius: 3px; 134 | color: #db3a4a; 135 | } 136 | 137 | .color-outline-primary { 138 | border: 1px solid #357bf6; 139 | border-radius: 3px; 140 | color: #357bf6; 141 | } 142 | 143 | .color-outline-secondary { 144 | border: 1px solid #6c6c6c; 145 | border-radius: 3px; 146 | color: #6c6c6c; 147 | } 148 | 149 | .color-outline-not-available { 150 | border: 1px solid #a6a6a6; 151 | border-radius: 3px; 152 | color: #a6a6a6; 153 | } 154 | 155 | h1 { 156 | font-weight: 500 !important; 157 | color: #000 !important; 158 | } 159 | 160 | h2 { 161 | font-weight: 500 !important; 162 | color: #000; 163 | } 164 | 165 | h3 { 166 | color: #000; 167 | } 168 | 169 | h4 { 170 | font-style: italic; 171 | } 172 | 173 | img.shadowed { 174 | box-shadow: inset 0 1px 0 rgba(255, 255, 255, .4), 0 22px 50px 4px rgba(0, 0, 0, 0.4), 0 0 0 1px rgba(0, 0, 0, 0.0); 175 | } 176 | -------------------------------------------------------------------------------- /docs/_static/media/all-results-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/_static/media/all-results-example.png -------------------------------------------------------------------------------- /docs/_static/media/clara-barton-page.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/_static/media/clara-barton-page.jpg -------------------------------------------------------------------------------- /docs/_static/media/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/_static/media/favicon.ico -------------------------------------------------------------------------------- /docs/_static/media/glaser-example-google.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/_static/media/glaser-example-google.jpg -------------------------------------------------------------------------------- /docs/_static/media/handprint-icon-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/docs/_static/media/handprint-icon-white.png -------------------------------------------------------------------------------- /docs/_static/media/handprint-icon-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Shape 4 | Created with Sketch. 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /docs/_static/media/handprint-icon.svg: -------------------------------------------------------------------------------- 1 | ShapeCreated with Sketch. -------------------------------------------------------------------------------- /docs/_static/versions.json: -------------------------------------------------------------------------------- 1 | {"release": "", "development": "devel"} 2 | -------------------------------------------------------------------------------- /docs/basic-usage.md: -------------------------------------------------------------------------------- 1 | # Basic Handprint usage 2 | 3 | If the installation steps described in the previous section proceed successfully, a command-line program named `handprint` should end up installed in a location where software is normally installed on your computer. Running `handprint` from a terminal shell then should be as simple as running any other shell command on your system: 4 | 5 | ```bash 6 | handprint -h 7 | ``` 8 | 9 | If that fails for some reason, you should be able to run `handprint` from anywhere using the normal approach for running Python modules: 10 | 11 | ```bash 12 | python3 -m handprint -h 13 | ``` 14 | 15 | The `-h` option (`/h` on Windows) will make `handprint` display some help information and exit immediately. To make Handprint do more, you can supply other arguments that instruct `handprint` to process image files (or alternatively, URLs pointing to image files at a network location) and run text recognition algorithms on them, as explained below. 16 | 17 | 18 | ## Installing credentials 19 | 20 | Handprint includes several adapters for working with cloud-based HTR services from Amazon, Google, and Microsoft, but _does not include credentials for using the services_. Before you can use Handprint, you must supply credentials for accessing the cloud services you want to use. The process for doing this involves using the `-a` option (`/a` on Windows) and is described in detail in the section [Configuration for cloud services](configuration.md). 21 | 22 | 23 | ## Input files and URLs 24 | 25 | After credentials are installed, running Handprint _without_ the `-a` option will invoke one or more services on files, directories of files, or URLs pointing to files. Here is an example of running Handprint on a directory containing images: 26 | ```sh 27 | handprint tests/data/caltech-archives/glaser/ 28 | ``` 29 | 30 | Image paths or URLs can be supplied to Handprint in any of the following ways: 31 | * One or more directory paths or one or more image file paths on the local disk, which will be interpreted as images (either individually or in directories) to be processed 32 | * One or more URLs, which will be interpreted as network locations of image files to be processed 33 | * If given the `-f` option (`/f` on Windows), a file containing either image paths or image URLs to be processed 34 | 35 | For every input given as a URL, Handprint will first download the image found at the URL to a directory indicated by the option `-o` (`/o` on Windows), or the current directory if option `-o` is not used. 36 | 37 | No matter whether files or URLs, each input item should be a single image of a document page in which text should be recognized. Handprint reads a number of common formats: JP2, JPEG, PDF, PNG, GIF, BMP, and TIFF. However, for simplicity and maximum compatibility with all cloud services, Handprint always **converts all input files to PNG** if they are not already in that format, no matter if a given service can accept other formats. Handprint also **reduces the size** of input images to the smallest size accepted by any of the services invoked if an image exceeds that size. (For example, when sending a file to services A and B at the same time, if service A accepts files up to 10 MB in size and service B accepts files up to 4 MB, Handprint will resize the file to 4 MB before sending it to _both_ A and B, even if A could accept a higher-resolution image.) Finally, if the input contains more than one page (e.g., in a PDF file), Handprint will **only use the first page of the input** and ignore the remaining pages. 38 | 39 | Be aware that **downsizing images can change the text recognition results returned by some services** compared to the results obtained using the original full-size input image. If your images are larger when converted to PNG than the smallest size accepted by one of the destination services (currently 4 MB, for Microsoft), then you may wish to compare the results of using multiple services at once versus one at a time (i.e., one destination at a time in separate invocations of Handprint). 40 | 41 | Finally, note that providing URLs on the command line can be problematic due to how terminal shells interpret certain characters, and so when supplying URLs, it's usually better to store the URLs in a file in combination with the `-f` option (`/f` on Windows). 42 | 43 | 44 | ## Selecting destination services 45 | 46 | You can use the `-l` option (`/l` on Windows) to make it display a list of the services currently supported by Handprint: 47 | 48 | ```sh 49 | # handprint -l 50 | Known services: amazon-rekognition, amazon-textract, google, microsoft 51 | ``` 52 | 53 | By default, Handprint will send images to all of the known services, creating annotated images to represent the results of each individual service. To invoke only specific services, use the `-s` option (`/s` on Windows) followed by a service name or a list of names separated by commas (e.g., `google,microsoft`). For example, the following command will invoke Microsoft's text recognition service on a page from [Clara Barton's unpublished draft book "The Life of My Childhood"](https://picryl.com/media/clara-barton-papers-speeches-and-writings-file-1849-1947-books-the-life-of-71), available in Handprint's source directory: 54 | ```sh 55 | handprint -s microsoft tests/data/public-domain/images/clara-barton-life-of-my-childhood-p90.jpg 56 | ``` 57 | 58 | Here is the result of that command: 59 |

60 | Example of running Microsoft's service on a page from Clara Barton's unpublished draft book, The Life of My Childhood. 61 |

62 | 63 | 64 | ## Visual display of results 65 | 66 | After gathering the results of each service for a given input, Handprint will create a single compound image consisting of the results for each service arranged in a _N_×_N_ grid. This overview image is intended to make it easier to compare the results of multiple services against each other. The grid image will have the suffix `.handprint-all.png`. Here is a sample output image to illustrate: 67 | 68 |

69 | Example annotated results output image 70 |

71 | 72 | The 2×2 image above was produced by running the following command from the Handprint `tests/data/caltech-archives/glaser` directory: 73 | ```csh 74 | handprint --text-size 20 "DAG_5_1_6 1952-1957 Notebook VI p2.jpg" 75 | ``` 76 | 77 | To move the position of the text annotations overlaid over the input image, you can use the option `-m` (or `/m` on Windows). This takes two numbers separated by a comma in the form `x,y`. Positive numbers move the text rightward and upward, respectively, relative to the default position. The default position of each text annotation in the annotated output is such that the _left edge of the word_ starts at the location of the _upper left corner of the bounding box_ returned by the service; this has the effect of putting the annotation near, but above, the location of the (actual) word in the input image by default. For example, if the word in the image is _strawberry_, the bounding box returned by the service will enclose _strawberry_, and the upper left corner of that bounding box will be somewhere above the letter _s_. Then, the default position of the text annotation will put the left edge of the word "strawberry" at that point above the letter _s_. Using the move-text option allows you to move the annotation if desired. A value such as `0,-5` will move it downward five pixels. 78 | 79 | To change the color of the text annotations overlaid over the input image, you can use the option `-x` (or `/x` on Windows). You can use hex color codes such as `"#ff0000"` (make sure to enclose the value with quotes, or the shell will interpret the pound sign as a comment character), or X11/CSS4 color names with no spaces such as `purple` or `darkgreen`. 80 | 81 | To change the size of the text annotations overlaid over the input image, you can use the option `-z` (or `/z` on Windows). The value is in units of points. The default size is 12. 82 | 83 | Finally, the individual results, as well as individual annotated images corresponding to the results from each service, will not be retained unless the `-e` extended results option (`/e` on Windows) is invoked (described in more detail below). The production of the overview grid image can be skipped by using the `-G` option (`/G` on Windows). 84 | -------------------------------------------------------------------------------- /docs/colophon.md: -------------------------------------------------------------------------------- 1 | # Colophon 2 | 3 | This documentation was written by [Michael Hucka](https://www.cds.caltech.edu/~mhucka/) using the [Sphinx](https://www.sphinx-doc.org) document generator together with [MyST-flavored Markdown](https://myst-parser.readthedocs.io/en/latest/). The theme is the [Material theme for Sphinx](https://bashtage.github.io/sphinx-material/), with light customizations such as the use of Google's [Atkinson Hyperlegible](https://fonts.google.com/specimen/Atkinson+Hyperlegible) font. A [GitHub Action](https://github.com/caltechlibrary/foliage/blob/main/.github/workflows/build-sphinx.yml) takes care of creating the formatted version of the documentation and hosting it on GitHub.io at https://caltechlibrary.github.io/foliage. The formatted output can also be produced manually using commands implemented in the `Makefile` located in the [`docs/`](https://github.com/caltechlibrary/foliage/tree/main/docs) subdirectory of the Foliage source code repository. 4 | 5 | The [vector artwork](https://thenounproject.com/search/?q=hand&i=733265) of a hand used as a logo for Handprint was created by [Kevin](https://thenounproject.com/kevn/) for the [Noun Project](https://thenounproject.com). It is licensed under the Creative Commons [CC-BY 3.0](https://creativecommons.org/licenses/by/3.0/) license. 6 | 7 | Unless indicated otherwise, all other artwork in this documentation was created using [OmniGraffle Pro](https://www.omnigroup.com/omnigraffle) on a macOS computer. SVG versions of the diagrams were produced with the help of [svg-buddy](https://github.com/phauer/svg-buddy) to embed fonts into the SVG files and overcome a limitation of OmniGraffle's SVG output. 8 | -------------------------------------------------------------------------------- /docs/command-summary.md: -------------------------------------------------------------------------------- 1 | # Handprint command summary 2 | 3 | ## Command-line options 4 | 5 | The following table summarizes all the command line options available. (Note: on Windows computers, `/` must be used as the prefix character instead of the `-` dash character): 6 | 7 | | Short     | Long form | Meaning | Default | | 8 | |----------------------------------|----------------|---------|---------|--| 9 | | `-a` _A_ | `--add-creds` _A_ | Add credentials for service _A_ and exit | | | 10 | | `-b` _B_ | `--base-name` _B_ | Write outputs to files named _B_-n | Use base names of image files | ⚑ | 11 | | `-C` | `--no-color` | Don't color-code info messages | Color-code terminal output | 12 | | `-c` | `--compare` | Compare to ground truth; see `-r` too | | 13 | | `-d` _D_ | `--display` _D_ | Display annotation types _D_ | Display text annotations | ★ | 14 | | `-e` | `--extended` | Produce extended results | Produce only summary image | | 15 | | `-f` _F_ | `--from-file` _F_ | Read file names or URLs from file _F_ | Use args on the command line | 16 | | `-G` | `--no-grid` | Don't create summary image | Create an _N_×_N_ grid image| | 17 | | `-h` | `--help` | Display help, then exit | | | 18 | | `-j` | `--reuse-json` | Reuse prior JSON results if found | Ignore any existing results | | 19 | | `-l` | `--list` | Display known services and exit | | | 20 | | `-m` _x,y_ | `--text-move` _x,y_ | Move each text annotation by x,y | `0,0` | | 21 | | `-n` _N_ | `--confidence` _N_ | Use confidence score threshold _N_ | `0` | | 22 | | `-o` _O_ | `--output` _O_ | Write all outputs to directory _O_ | Write to images' directories | | 23 | | `-q` | `--quiet` | Don't write messages while working | Be chatty while working | 24 | | `-r` | `--relaxed` | Use looser criteria for `--compare` | | 25 | | `-s` _S_ | `--service` _S_ | Use recognition service _S_; see `-l` | Use all services | | 26 | | `-t` _T_ | `--threads` _T_ | Use _T_ number of threads | Use (#cores)/2 threads | | 27 | | `-V` | `--version` | Write program version info and exit | | | 28 | | `-x` _X_ | `--text-color` _X_ | Use color _X_ for text annotations | Red | | 29 | | `-z` _Z_ | `--text-size` _Z_ | Use font size _Z_ for text annotations | Use font size 12 | | 30 | | `-@` _OUT_ | `--debug` _OUT_ | Write detailed execution info to _OUT_ | Normal mode | ⬥ | 31 | 32 | ⚑   If URLs are given, then the outputs will be written by default to names of the form `document-n`, where n is an integer. Examples: `document-1.jpg`, `document-1.handprint-google.txt`, etc. This is because images located in network content management systems may not have any clear names in their URLs.
33 | ★   The possible values of _D_ are: `text`, `bb`, `bb-word`, `bb-line`, `bb-para`. Multiple values must be separated with commas. The value `bb` is a shorthand for `bb-word,bb-line,bb-para`. The default is `text`.
34 | ⬥   To write to the console, use the character `-` as the value of _OUT_; otherwise, _OUT_ must be the name of a file where the output should be written. 35 | 36 | 37 | ## Return values 38 | 39 | This program exits with a return code of 0 if no problems are encountered. It returns a nonzero value otherwise. The following table lists the possible return values: 40 | 41 | | Code | Meaning | 42 | |:----:|----------------------------------------------------------| 43 | | 0 | success – program completed normally | 44 | | 1 | the user interrupted the program's execution | 45 | | 2 | encountered a bad or missing value for an option | 46 | | 3 | no network detected – cannot proceed | 47 | | 4 | file error – encountered a problem with a file | 48 | | 5 | server error – encountered a problem with a server | 49 | | 6 | an exception or fatal error occurred | 50 | 51 | 52 | ## Additional notes 53 | 54 | The debug logging functionality is implemented using [Sidetrack](https://github.com/caltechlibrary/sidetrack) and all calls to the debug code are conditionalized on the Python symbol `__debug__`. It is carefully written so that you can cause the calls to be _optimized out completely_ if your run Python with [optimization turned on](https://docs.python.org/3/using/cmdline.html#cmdoption-o) (e.g., using the `-O` command-line option). 55 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file conf.py 3 | # @brief COnfiguration file for Sphynx + MyST based documentation 4 | # @created 2021-01-25 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # 8 | # This file only contains a selection of the most common options. For a full 9 | # list, refer to https://www.sphinx-doc.org/en/master/usage/configuration.html 10 | # ============================================================================= 11 | 12 | project = 'Handprint' 13 | copyright = '2022, Caltech Library' 14 | author = 'Michael Hucka @ Caltech Library' 15 | 16 | 17 | # -- General configuration ---------------------------------------------------- 18 | 19 | extensions = [ 20 | 'myst_parser', 21 | 'sphinx.ext.autodoc', 22 | 'sphinx.ext.autosectionlabel', 23 | 'sphinx.ext.napoleon', 24 | 'sphinxcontrib.mermaid' 25 | ] 26 | 27 | # Add any paths that contain templates here, relative to this directory. 28 | templates_path = ['_templates'] 29 | 30 | # List of patterns, relative to source directory, that match files and 31 | # directories to ignore when looking for source files. 32 | # This pattern also affects html_static_path and html_extra_path. 33 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'README.md', 34 | '**/README.md'] 35 | 36 | 37 | # -- Sphinx options for HTML output ------------------------------------------- 38 | 39 | html_title = 'Handprint' 40 | html_short_title = "Home" 41 | 42 | html_logo = "_static/media/handprint-icon-white.png" 43 | html_favicon = "_static/media/favicon.ico" 44 | 45 | # The theme to use for HTML and HTML Help pages. 46 | html_theme = 'sphinx_material' 47 | 48 | # Add any paths that contain custom static files (such as style sheets) here, 49 | # relative to this directory. They are copied after the builtin static files, 50 | # so a file named "default.css" will overwrite the builtin "default.css". 51 | html_static_path = ['_static'] 52 | 53 | # Our additional CSS. 54 | html_css_files = ['css/custom.css'] 55 | 56 | html_show_sourcelink = False 57 | html_sidebars = { 58 | "**": ["globaltoc.html", "searchbox.html"] 59 | } 60 | 61 | # Show the last updated date in the footer using the default format. 62 | html_last_updated_fmt = "" 63 | 64 | 65 | # -- Options for the Material theme ------------------------------------------- 66 | # C.f. https://github.com/bashtage/sphinx-material/blob/master/docs/conf.py 67 | 68 | # Material theme options (see theme.conf for more information) 69 | html_theme_options = { 70 | 71 | # Set the name of the project to appear in the navigation. 72 | 'nav_title': 'Handprint', 73 | 74 | # Set you GA account ID to enable tracking 75 | 'google_analytics_account': '', 76 | 77 | # Specify a base_url used to generate sitemap.xml. If not 78 | # specified, then no sitemap will be built. 79 | 'base_url': 'https://caltechlibrary.github.io/handprint', 80 | 81 | # Set the colors. I found a list here: 82 | # https://squidfunk.github.io/mkdocs-material/setup/changing-the-colors/ 83 | "theme_color": 'blue-grey', 84 | 'color_primary': 'deep-orange', 85 | 'color_accent': 'teal', 86 | 87 | # Set the repo location to get a badge with stats 88 | 'repo_url': 'https://github.com/caltechlibrary/handprint/', 89 | 'repo_name': 'Handprint', 90 | 91 | # Visible levels of the global TOC; -1 means unlimited 92 | 'globaltoc_depth': 2, 93 | # If False, expand all TOC entries 94 | 'globaltoc_collapse': False, 95 | # If True, show hidden TOC entries 96 | 'globaltoc_includehidden': False, 97 | 98 | "html_minify": False, 99 | "html_prettify": False, 100 | 101 | "version_dropdown": False, 102 | "version_json": "_static/versions.json", 103 | # "version_info": { 104 | # "Release": "https://bashtage.github.io/sphinx-material/", 105 | # "Development": "https://bashtage.github.io/sphinx-material/devel/", 106 | # "Release (rel)": "/sphinx-material/", 107 | # "Development (rel)": "/sphinx-material/devel/", 108 | # }, 109 | } 110 | 111 | 112 | # -- Options for the MyST parser ---------------------------------------------- 113 | 114 | myst_enable_extensions = [ 115 | "colon_fence", 116 | "html_image", 117 | "linkify", 118 | "smartquotes", 119 | "substitution" 120 | ] 121 | -------------------------------------------------------------------------------- /docs/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration for cloud services 2 | 3 | Handprint includes several adapters for working with cloud-based HTR services from Amazon, Google, and Microsoft, but _does not include credentials for using the services_. To be able to use Handprint, you must **both** install a copy of Handprint on your computer **and** supply your copy with credentials for accessing the cloud services you want to use. 4 | 5 | Installing credentials involves a one-time configuration step for **each** cloud-based HTR service after you install Handprint on a computer. In each case, the same command format is used: 6 | ```sh 7 | handprint -a SERVICENAME CREDENTIALSFILE.json 8 | ``` 9 | 10 | _SERVICENAME_ must be one of the service names printed by running `handprint -l`, and `CREDENTIALSFILE.json` must have one of the formats discussed below. When you run this command, Handprint copies `CREDENTIALSFILE.json` to a private location, and thereafter uses the credentials to access _SERVICENAME_. (The private location is different on different systems; for example, on macOS it is `~/Library/Application Support/Handprint/`.) Examples are given below. 11 | 12 | 13 | ## Microsoft 14 | 15 | Microsoft's approach to credentials in Azure involves the use of [subscription keys](https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/vision-api-how-to-topics/howtosubscribe). The format of the credentials file for Handprint needs to contain two fields: 16 | 17 | ```json 18 | { 19 | "subscription_key": "YOURKEYHERE", 20 | "endpoint": "https://ENDPOINT" 21 | } 22 | ``` 23 | 24 | The value `"YOURKEYHERE"` will be a string such as `"18de248475134eb49ae4a4e94b93461c"`, and it will be associated with an endpoint URI such as `"https://westus.api.cognitive.microsoft.com"`. To obtain a key and the corresponding endpoint URI, visit [https://portal.azure.com](https://portal.azure.com) and sign in using your account login. (Note: you will need to turn off browser security plugins such as Ad Block and uMatrix if you have them, or else the site will not work.) Once you are authenticated to the Azure portal, you can create credentials for using Azure's machine-learning services. Some notes all about this can be found in the [Handprint project Wiki pages on GitHub](https://github.com/caltechlibrary/handprint/wiki/Getting-Microsoft-Azure-credentials). 25 | 26 | Once you have obtained both a key and an endpoint URI, use a text editor to create a JSON file in the simple format shown above, save that file somewhere on your computer (for the sake of this example, assume it is `myazurecredentials.json`), and use the command discussed above to make Handprint copy the credentials file: 27 | ```sh 28 | handprint -a microsoft myazurecredentials.json 29 | ``` 30 | 31 | 32 | ## Google 33 | 34 | Credentials for using a Google service account need to be stored in a JSON file that contains many fields. The overall format looks like this: 35 | 36 | ```json 37 | { 38 | "type": "service_account", 39 | "project_id": "theid", 40 | "private_key_id": "thekey", 41 | "private_key": "-----BEGIN PRIVATE KEY-----anotherkey-----END PRIVATE KEY-----\n", 42 | "client_email": "emailaddress", 43 | "client_id": "id", 44 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 45 | "token_uri": "https://oauth2.googleapis.com/token", 46 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 47 | "client_x509_cert_url": "someurl" 48 | } 49 | ``` 50 | 51 | The process for getting one of these is summarized in the Google Cloud docs for [Creating a service account](https://cloud.google.com/docs/authentication/), but more explicit instructions can be found in the [Handprint project Wiki pages on GitHub](https://github.com/caltechlibrary/handprint/wiki/Getting-Google-Cloud-credentials). Once you have downloaded a Google credentials file from Google, save the file somewhere on your computer (for the sake of this example, assume it is `mygooglecredentials.json`), and use the command discussed above to make Handprint copy the credentials file: 52 | ```sh 53 | handprint -a google mygooglecredentials.json 54 | ``` 55 | 56 | 57 | ## Amazon 58 | 59 | Amazon credentials for AWS take the form of two alphanumeric strings: a _key id_ string and a _secret access key_ string. In addition, the service needs to be invoked with a region identifier. For the purposes of Handprint, these should be stored in a JSON file with the following format: 60 | 61 | ```json 62 | { 63 | "aws_access_key_id": "YOUR_KEY_ID_HERE", 64 | "aws_secret_access_key": "YOUR_ACCESS_KEY_HERE", 65 | "region_name": "YOUR_REGION_NAME_HERE" 66 | } 67 | ``` 68 | 69 | Getting this information is, thankfully, a relatively simple process for Amazon's services. Instructions can be found in the [Handprint project Wiki pages on GitHub](https://github.com/caltechlibrary/handprint/wiki/Creating-credentials-for-use-with-Amazon-Rekognition). Once you have obtained the two alphanumeric keys and a region identifier string, use a text editor to create a JSON file in the simple format shown above, save that file somewhere on your computer (for the sake of this example, assume it is `myamazoncredentials.json`), and use _two_ commands to make Handprint copy the credentials file for the two different Amazon services currently supported by Handprint: 70 | ```sh 71 | handprint -a amazon-textract myamazoncredentials.json 72 | handprint -a amazon-rekognition myamazoncredentials.json 73 | ``` 74 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Handprint 2 | 3 | The _**Hand**written **p**age **r**ecognit**i**o**n** **t**est_ is a command-line program that invokes HTR (handwritten text recognition) services on images of document pages. It can produce annotated images showing the results, compare the recognized text to expected text, save the HTR service results as JSON and text files, and more. It can be applied to any images of text documents. The images can come from your local computer's disk or from web locations. 4 | 5 | Handprint currently supports Google's [Google Cloud Vision API](https://cloud.google.com/vision/docs/ocr), Microsoft's Azure [Computer Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/), and Amazon's [Textract](https://aws.amazon.com/textract/) and [Rekognition](https://aws.amazon.com/rekognition/). (To use the services, you need to provide suitable account credentials.) 6 | 7 | Handprint is written in Python. Ready-to-run executables are available for some operating systems; you can also install Handprint using common Python installation frameworks such as `pipx` and `pip`. 8 | 9 | ## Sections 10 | 11 | ```{toctree} 12 | --- 13 | maxdepth: 2 14 | --- 15 | installation.md 16 | configuration.md 17 | basic-usage.md 18 | advanced-usage.md 19 | command-summary.md 20 | known-issues.md 21 | colophon.md 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | This page describes how you can install Handprint on your computer. After installation, you will also need to perform some [configuration steps described elsewhere](configuration.md). 4 | 5 | 6 | ## Preliminary requirements 7 | 8 | Handprint is written in [Python 3](https://www.python.org) and makes use of some additional Python software libraries that are installed automatically during the [installation steps](#installation). It also assumes a macOS, Windows or Linux environment, and a working Internet connection. 9 | 10 | The instructions below assume you have a Python interpreter version 3.8 or higher installed on your computer; if that's not the case, please first install Python and familiarize yourself with running Python programs on your system. If you are unsure of which version of Python you have, you can find out by running the following command in a terminal and inspecting the results: 11 | ```sh 12 | # Note: on Windows, you may have to use "python" instead of "python3" 13 | python3 --version 14 | ``` 15 | 16 | Note for Mac users: if you are using macOS Catalina (10.15) or later and have never run `python3`, then the first time you do, macOS will ask you if you want to install the macOS command-line developer tools. Go ahead and do so, as this is the easiest way to get a recent-enough Python 3 on those systems. 17 | 18 | 19 | ## Installation instructions 20 | 21 | There are multiple ways of installing Handprint, ranging from downloading a self-contained, single-file, ready-to-run program, to installing it as a typical Python program using `pip`. Please choose the alternative that suits you. 22 | 23 | ### Approach 1: using the standalone Handprint executables 24 | 25 | Beginning with version 1.5.1, runnable self-contained single-file executables are available for select operating system and Python version combinations – to use them, you **only** need a Python 3 interpreter and a copy of Handprint, but **do not** need to run `pip install` or other steps. 26 | 27 | #### MacOS 28 | 29 | Visit the [Handprint releases page](https://github.com/caltechlibrary/handprint/releases) and look for the ZIP files with names such as (e.g.) `handprint-1.5.4-macos-python3.8.zip`. Then: 30 | 1. Download the one matching your version of Python 31 | 2. Unzip the file (if your browser did not automatically unzip it for you) 32 | 3. Open the folder thus created (it will have a name like `handprint-1.5.4-macos-python3.8`) 33 | 4. Look inside for `handprint` and move it to a location where you put other command-line programs (e.g., `/usr/local/bin`) 34 | 35 | #### Linux 36 | 37 | Visit the [Handprint releases page](https://github.com/caltechlibrary/handprint/releases) and look for the ZIP files with names such as (e.g.) `handprint-1.5.4-linux-python3.8.zip`. Then: 38 | 1. Download the one matching your version of Python 39 | 2. Unzip the file (if your browser did not automatically unzip it for you) 40 | 3. Open the folder thus created (it will have a name like `handprint-1.5.4-linux-python3.8`) 41 | 4. Look inside for `handprint` and move it to a location where you put other command-line programs (e.g., `/usr/local/bin`) 42 | 43 | #### Windows 44 | 45 | Standalone executables for Windows are not available at this time. If you are running Windows, please use one of the other methods described below. 46 | 47 | 48 | ### Approach 2: using `pipx` 49 | 50 | You can use [pipx](https://pypa.github.io/pipx/) to install Handprint. Pipx will install it into a separate Python environment that isolates the dependencies needed by Handprint from other Python programs on your system, and yet the resulting `handprint` command wil be executable from any shell – like any normal application on your computer. If you do not already have `pipx` on your system, it can be installed in a variety of easy ways and it is best to consult [Pipx's installation guide](https://pypa.github.io/pipx/installation/) for instructions. Once you have pipx on your system, you can install Handprint with the following command: 51 | ```sh 52 | pipx install handprint 53 | ``` 54 | 55 | Pipx can also let you run Handprint directly using `pipx run handprint`, although in that case, you must always prefix every Handprint command with `pipx run`. Consult the [documentation for `pipx run`](https://github.com/pypa/pipx#walkthrough-running-an-application-in-a-temporary-virtual-environment) for more information. 56 | 57 | 58 | ### Approach 3: using `pip` 59 | 60 | If you prefer, you can install Handprint with [pip](https://pip.pypa.io/en/stable/installing/). If you don't have `pip` package or are uncertain if you do, please consult the [pip installation instructions](https://pip.pypa.io/en/stable/installation/). Then, to install or upgrade Handprint from the Python package repository, run the following command: 61 | ```sh 62 | python3 -m pip install handprint --upgrade 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/known-issues.md: -------------------------------------------------------------------------------- 1 | # Known issues and limitations 2 | 3 | Here are some known limitations in the current version of Handprint: 4 | * If the input has multiple pages, only the first page/image is used; the rest (if any) are ignored. 5 | * The Amazon Rekognition API will return [at most 50 words in an image](https://docs.aws.amazon.com/rekognition/latest/dg/limits.html). 6 | * The Microsoft Azure API will only detect a maximum of [300 lines of text per page](https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/concept-recognizing-text). 7 | * Some services have different file size restrictions depending on the format of the file, but Handprint always uses the same limit for all files for a given service. This is a code simplification. 8 | -------------------------------------------------------------------------------- /handprint/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Handprint: experiment with handwritten text recognition on Caltech Archives. 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | # Package metadata ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | # 19 | # ╭────────────────────── Notice ── Notice ── Notice ─────────────────────╮ 20 | # | The following values are automatically updated at every release | 21 | # | by the Makefile. Manual changes to these values will be lost. | 22 | # ╰────────────────────── Notice ── Notice ── Notice ─────────────────────╯ 23 | 24 | __version__ = '1.6.0' 25 | __description__ = 'Run handwritten text recognition services on images of documents' 26 | __url__ = 'https://github.com/caltechlibrary/handprint' 27 | __author__ = 'Michael Hucka' 28 | __email__ = 'mhucka@caltech.edu' 29 | __license__ = 'BSD 3-clause' 30 | 31 | 32 | # Miscellaneous utilities. 33 | # ............................................................................. 34 | 35 | def print_version(): 36 | print(f'{__name__} version {__version__}') 37 | print(f'Authors: {__author__}') 38 | print(f'URL: {__url__}') 39 | print(f'License: {__license__}') 40 | 41 | 42 | # Miscellaneous constants. 43 | # ............................................................................. 44 | 45 | # Output format for the files we write. 46 | _OUTPUT_FORMAT = 'png' 47 | _OUTPUT_EXT = '.png' 48 | -------------------------------------------------------------------------------- /handprint/comparison.py: -------------------------------------------------------------------------------- 1 | ''' 2 | comparison.py: compare results to ground truth 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2019-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from collections import namedtuple 18 | 19 | import handprint 20 | from handprint.exceptions import * 21 | 22 | 23 | # Data structures. 24 | # ............................................................................. 25 | 26 | Line = namedtuple('Line', 'number distance cer gt_text htr_text') 27 | Line.__doc__ = '''Data about one line in the comparison results. 28 | 'number' is the line number in the HTR text results 29 | 'distance' is the Levenshtein distance between the HTR text and the g.t. 30 | 'cer' is the character error for the HTR text line 31 | 'gt_text' is the ground truth text line 32 | 'htr_text' is the HTR text line 33 | ''' 34 | 35 | 36 | # Constants. 37 | # ............................................................................. 38 | 39 | _SIMILARITY_THRESHOLD = 0.5 40 | 41 | _PUNCTUATION_REMOVER = str.maketrans('', '', '.,:;') 42 | 43 | 44 | # Main functions. 45 | # ............................................................................. 46 | 47 | def text_comparison(htr_text, gt_text, relaxed = False): 48 | '''Compare the HTR result text in "htr_text" with the expected ground truth 49 | text in "gt_text". Returns a tab-separated table describing the results. 50 | 51 | This function accounts for the possibility that the HTR results may not 52 | contain a line of text for every line of ground truth, and conversely, 53 | may also contain lines of text that are not supposed to appear. The 54 | approach uses a novel algorithm to compare the texts line-by-line using 55 | longest common subsequence similarity (as implemented by the LCSSEQ 56 | function in the Python "textdistance" package), to try to match up 57 | corresponding lines in the two texts before calculating Levenshtein 58 | distance and CER for each line individually. 59 | ''' 60 | 61 | # This works by building up an intermediate data structure that consists 62 | # of a list of tuples of type "Line" (a named tuple). Each has this form: 63 | # 64 | # (htr line #, Levenshtein error, CER, gt line text, htr line text) 65 | # 66 | # If a line is missing from the htr text (relative to the gt text), its 67 | # line number is written as None. If a line is missing from the gt text 68 | # (relative to the htr text), its value (in the "gt line text" column) is 69 | # written as ''. The order of the list of tuples is important; in the 70 | # end it represents the entire list of text lines present in either the 71 | # gt text or htr text. Here is an example showing missing lines: 72 | # 73 | # htr line # gt text htr text 74 | # ---------- ------- -------- 75 | # 1 "" doc 01 76 | # 2 April 25, 2019 Avril 25, 2019 77 | # 3 My darling, My darling, 78 | # None what a wonderful "" 79 | # 4 day today was. bay today vas. 80 | # 5 "" rooujjlh 81 | # 82 | # The final outcome for the above will have 6 lines, even though the ground 83 | # truth has 4 lines, in order to describe the fact that the HTR text 84 | # contains extra lines text at the beginning and end. The HTR text is also 85 | # missing a line in the middle. 86 | 87 | # Algorithm: 88 | # 1) Go through the gt text lines one at a time in linear order, and 89 | # compare each line to each line of the HTR text using LCSSEQ. If 90 | # a line in the gt text does not appear in the HTR text (judged by 91 | # the LCSSEQ score not crossing a certain threshold), mark that line 92 | # as missing; otherwise, store the Levenshtein distance and CER scores 93 | # for that line in a tuple. 94 | # 95 | # 2) Go through the list of tuples and find all lines in the HTR text 96 | # that do not exist in the gt text. 97 | # 98 | # 3) Go through this list of extra HTR lines and insert tuples in the 99 | # correct locations in the main list of tuples. 100 | # 101 | # 4) Go through the list of tuples, add up error scores and other things 102 | # and produce the final output string. 103 | 104 | # Textdistance takes a long time to load. Delay loading it until we need 105 | # it so that the overall application startup times can be faster. 106 | from textdistance import lcsseq 107 | # Shorten this name for easier reading in the code below. 108 | lcsseq_score = lcsseq.normalized_similarity 109 | 110 | gt_lines = gt_text.strip().splitlines() 111 | htr_lines = htr_text.strip().splitlines() 112 | htr_index = 0 113 | results = [] 114 | 115 | if relaxed: 116 | gt_lines = [text.lower() for text in gt_lines] 117 | gt_lines = [text.translate(_PUNCTUATION_REMOVER) for text in gt_lines] 118 | htr_lines = [text.lower() for text in htr_lines] 119 | htr_lines = [text.translate(_PUNCTUATION_REMOVER) for text in htr_lines] 120 | 121 | for gt_line in gt_lines: 122 | htr_line = htr_lines[htr_index] 123 | if lcsseq_score(gt_line, htr_line) >= _SIMILARITY_THRESHOLD: 124 | results.append(line_data(gt_line, htr_line, htr_index)) 125 | htr_index += 1 126 | else: 127 | # LCSSEQ score too low => lines don't correspond. Also means the 128 | # line in the HTR text is something not found in the gt text. 129 | # Check if any line later in the HTR text matches any better. 130 | for other_index, other_line in enumerate(htr_lines[htr_index + 1:], 1): 131 | if lcsseq_score(gt_line, other_line) >= _SIMILARITY_THRESHOLD: 132 | # We found a matching line. 133 | htr_index += other_index 134 | results.append(line_data(gt_line, other_line, htr_index)) 135 | break 136 | else: # "else" for the for loop, not the if stmt! 137 | # Nothing sufficiently close. Treat as missing. 138 | results.append(line_data(gt_line, '', None)) 139 | 140 | # Are there any lines in htr_text after the end of the lines in gt_text? 141 | # If so, add them (as errors) to the results. 142 | if len(htr_lines) - (htr_index + 1) > 0: 143 | for index, line in enumerate(htr_lines[htr_index + 1:], htr_index + 1): 144 | results.append(line_data('', line, index)) 145 | 146 | # At this point, if there are gaps in the htr line numbers that we 147 | # stored, it means those are extra lines in the beginning or middle of 148 | # the htr text. Find and insert those lines into the results list. 149 | matched = [line.number for line in results if line.gt_text != ''] 150 | extra_lines = [i for i in range(0, len(htr_lines)) if i not in matched] 151 | for index in extra_lines: 152 | # Find the previous location in the results list. We will insert a 153 | # new tuple after it. 154 | for pos, line in enumerate(results): 155 | if line.number is not None and index < line.number: 156 | results.insert(pos, line_data('', htr_lines[index], index)) 157 | break 158 | 159 | # We return data as 4 columns. 160 | output = ['Errors\tCER (%)\tExpected text\tReceived text'] 161 | total_errors = 0 162 | for line in results: 163 | total_errors += line.distance 164 | output.append('{}\t{}\t{}\t{}'.format( 165 | line.distance, line.cer, line.gt_text, line.htr_text)) 166 | # Append total errors count, and we're done. 167 | output.append('Total errors\t\t\t') 168 | output.append(str(total_errors) + '\t\t\t') 169 | return '\n'.join(output) 170 | 171 | 172 | # Helper functions. 173 | # ...................................................................... 174 | 175 | def line_data(gt_line, htr_line, htr_index): 176 | # Remove leading spaces and compress runs of spaces in the line. 177 | expected = ' '.join(gt_line.split()) 178 | obtained = ' '.join(htr_line.split()) 179 | # The stringdist package definition of levenshtein_norm() divides 180 | # by the longest of the two strings, but it is more conventional in 181 | # OCR papers and software to divide by the length of the reference. 182 | from stringdist import levenshtein 183 | distance = levenshtein(expected, obtained) 184 | if len(expected) > 0: 185 | cer = '{:.2f}'.format(100 * float(distance)/len(expected)) 186 | else: 187 | cer = '100.00' 188 | return Line(htr_index, distance, cer, expected, obtained) 189 | -------------------------------------------------------------------------------- /handprint/credentials/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Handprint module for handling credentials. 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from .base import Credentials 18 | from .amazon_auth import AmazonCredentials 19 | from .google_auth import GoogleCredentials 20 | from .microsoft_auth import MicrosoftCredentials 21 | -------------------------------------------------------------------------------- /handprint/credentials/amazon_auth.py: -------------------------------------------------------------------------------- 1 | ''' 2 | amazon_auth.py: subclass of handprint.credentials.base 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from commonpy.file_utils import readable 18 | import json 19 | import os 20 | from os import path 21 | from sidetrack import log 22 | 23 | import handprint 24 | from handprint.exceptions import * 25 | 26 | from .base import Credentials 27 | from .credentials_files import credentials_filename 28 | 29 | 30 | # Main class. 31 | # ............................................................................. 32 | 33 | class AmazonCredentials(Credentials): 34 | def __init__(self): 35 | cfile = path.join(self.credentials_dir(), credentials_filename('amazon')) 36 | if __debug__: log(f'credentials file for amazon is {cfile}') 37 | if not path.exists(cfile): 38 | raise AuthFailure('Credentials for Amazon have not been installed') 39 | elif not readable(cfile): 40 | raise AuthFailure(f'Amazon credentials file unreadable: {cfile}') 41 | 42 | try: 43 | with open(cfile, 'r') as file: 44 | self.credentials = json.load(file) 45 | except Exception as ex: 46 | raise AuthFailure(f'Unable to parse Amazon exceptions file: {str(ex)}') 47 | -------------------------------------------------------------------------------- /handprint/credentials/base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | base.py: credentials base class 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from appdirs import user_config_dir 18 | from commonpy.file_utils import copy_file 19 | import os 20 | from os import path, makedirs 21 | from sidetrack import log 22 | 23 | import handprint 24 | 25 | from .credentials_files import credentials_filename 26 | 27 | 28 | # Main class. 29 | # ............................................................................. 30 | 31 | class Credentials(object): 32 | creds_dir = user_config_dir('Handprint', 'CaltechLibrary') 33 | 34 | def __init__(self): 35 | self.credentials = None 36 | 37 | 38 | def creds(self): 39 | return self.credentials 40 | 41 | 42 | @classmethod 43 | def credentials_dir(self): 44 | return Credentials.creds_dir 45 | 46 | 47 | @classmethod 48 | def save_credentials(self, service, supplied_file): 49 | if not path.isdir(Credentials.creds_dir): 50 | if __debug__: log(f'creating credentials dir: {Credentials.creds_dir}.') 51 | makedirs(Credentials.creds_dir) 52 | dest_file = path.join(Credentials.creds_dir, credentials_filename(service)) 53 | copy_file(supplied_file, dest_file) 54 | -------------------------------------------------------------------------------- /handprint/credentials/credentials_files.py: -------------------------------------------------------------------------------- 1 | ''' 2 | credentials_files.py: mapping of services to credentials files 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | CREDENTIALS_FILES = { 18 | 'amazon' : 'amazon_credentials.json', 19 | 'amazon-rekognition' : 'amazon_credentials.json', 20 | 'amazon-textract' : 'amazon_credentials.json', 21 | 'google' : 'google_credentials.json', 22 | 'microsoft' : 'microsoft_credentials.json', 23 | } 24 | 25 | def credentials_filename(service): 26 | assert service in CREDENTIALS_FILES 27 | return CREDENTIALS_FILES[service] 28 | -------------------------------------------------------------------------------- /handprint/credentials/google_auth.py: -------------------------------------------------------------------------------- 1 | ''' 2 | google_auth.py: subclass of handprint.credentials.base 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from commonpy.file_utils import readable 18 | import json 19 | import os 20 | from os import path 21 | from sidetrack import log 22 | 23 | import handprint 24 | from handprint.exceptions import * 25 | 26 | from .base import Credentials 27 | from .credentials_files import credentials_filename 28 | 29 | 30 | # Main class. 31 | # ............................................................................. 32 | 33 | class GoogleCredentials(Credentials): 34 | def __init__(self): 35 | cfile = path.join(self.credentials_dir(), credentials_filename('google')) 36 | if __debug__: log(f'credentials file for google is {cfile}') 37 | if not path.exists(cfile): 38 | raise AuthFailure('Credentials for Google have not been installed') 39 | elif not readable(cfile): 40 | raise AuthFailure(f'Google credentials file unreadable: {cfile}') 41 | 42 | # Haven't been able to make it work; only the environment variable 43 | # approach has been working for me. 44 | # 45 | # with open(self.credentials_file, 'r') as file: 46 | # self.credentials = json.load(file) 47 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = cfile 48 | -------------------------------------------------------------------------------- /handprint/credentials/microsoft_auth.py: -------------------------------------------------------------------------------- 1 | ''' 2 | microsoft_auth.py: subclass of handprint.credentials.base 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from commonpy.file_utils import readable 18 | import json 19 | import os 20 | from os import path 21 | from sidetrack import log 22 | 23 | import handprint 24 | from handprint.exceptions import * 25 | 26 | from .base import Credentials 27 | from .credentials_files import credentials_filename 28 | 29 | 30 | # Constants. 31 | # ............................................................................. 32 | 33 | _DEFAULT_ENDPOINT = 'https://westus.api.cognitive.microsoft.com' 34 | 35 | 36 | # Main class. 37 | # ............................................................................. 38 | 39 | class MicrosoftCredentials(Credentials): 40 | def __init__(self): 41 | cfile = path.join(self.credentials_dir(), credentials_filename('microsoft')) 42 | if __debug__: log(f'credentials file for microsoft is {cfile}') 43 | if not path.exists(cfile): 44 | raise AuthFailure('Credentials for Microsoft have not been installed') 45 | elif not readable(cfile): 46 | raise AuthFailure(f'Microsoft credentials file unreadable: {cfile}') 47 | 48 | try: 49 | with open(cfile, 'r') as file: 50 | creds = json.load(file) 51 | if 'endpoint' in creds: 52 | endpoint = creds['endpoint'].rstrip('/') 53 | if not endpoint.startswith('http'): 54 | endpoint = 'https://' + endpoint 55 | else: 56 | if __debug__: log('endpoint not found; using default') 57 | endpoint = _DEFAULT_ENDPOINT 58 | creds['endpoint'] = endpoint 59 | self.credentials = creds 60 | except Exception as ex: 61 | raise AuthFailure(f'Unable to parse Microsoft exceptions file: {str(ex)}') 62 | -------------------------------------------------------------------------------- /handprint/exceptions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | exceptions.py: exceptions defined by Handprint 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | 18 | # Base class. 19 | # ............................................................................. 20 | # The base class makes it possible to use a single test to distinguish between 21 | # exceptions generated by Handprint and exceptions generated by something else. 22 | 23 | class HandprintException(Exception): 24 | '''Base class for Handprint exceptions.''' 25 | pass 26 | 27 | 28 | # Exception classes. 29 | # ............................................................................. 30 | 31 | class CannotProceed(HandprintException): 32 | '''A recognizable condition caused an early exit from the program.''' 33 | pass 34 | 35 | class UserCancelled(HandprintException): 36 | '''The user elected to cancel/quit the program.''' 37 | pass 38 | 39 | class NetworkFailure(HandprintException): 40 | '''Unrecoverable problem involving network operations.''' 41 | pass 42 | 43 | class NoContent(HandprintException): 44 | '''No content found at the given location.''' 45 | pass 46 | 47 | class CorruptedContent(HandprintException): 48 | '''Content corruption has been detected.''' 49 | pass 50 | 51 | class AuthFailure(HandprintException): 52 | '''Problem obtaining or using authentication credentials.''' 53 | pass 54 | 55 | class ServiceFailure(HandprintException): 56 | '''Unrecoverable problem involving a remote service.''' 57 | pass 58 | 59 | class RateLimitExceeded(HandprintException): 60 | '''The service flagged reports that its rate limits have been exceeded.''' 61 | pass 62 | 63 | class InternalError(HandprintException): 64 | '''Unrecoverable problem involving Handprint itself.''' 65 | pass 66 | -------------------------------------------------------------------------------- /handprint/exit_codes.py: -------------------------------------------------------------------------------- 1 | ''' 2 | exit_codes.py: define exit codes for program return values 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2020-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from aenum import Enum, MultiValue 18 | 19 | # I adapted the clever approach posted by the author of the Python aenum 20 | # package, Ethan Furman, to Stack Overflow on 2016-03-13 at 21 | # https://stackoverflow.com/a/35964875/743730 22 | # The most important bit is realizing you can define __int__(). 23 | 24 | class ExitCode(Enum): 25 | '''Class of exit codes that this program may return. 26 | 27 | The numeric value of a given code can be obtained by using int(). For 28 | example, int(ExitCode.success) will produce 0. 29 | ''' 30 | 31 | _init_ = 'value meaning' 32 | _settings_ = MultiValue 33 | 34 | success = 0, "success -- program completed normally" 35 | user_interrupt = 1, "the user interrupted the program's execution" 36 | bad_arg = 2, "encountered a bad or missing value for an option" 37 | no_network = 3, "no network detected -- cannot proceed" 38 | file_error = 4, "file error -- encountered a problem with a file or directory" 39 | server_error = 5, "server error -- encountered a problem with the server" 40 | exception = 6, "an exception or fatal error occurred" 41 | 42 | def __int__(self): 43 | return self.value 44 | -------------------------------------------------------------------------------- /handprint/main_body.py: -------------------------------------------------------------------------------- 1 | ''' 2 | main_body.py: main loop for Handprint 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from bun import inform, alert, alert_fatal, warn 18 | from commonpy.interrupt import raise_for_interrupts 19 | from commonpy.data_utils import pluralized 20 | from commonpy.file_utils import filename_extension, filename_basename 21 | from commonpy.file_utils import files_in_directory, readable, writable 22 | from commonpy.string_utils import antiformat 23 | import os 24 | from os.path import isfile, isdir, exists 25 | import sys 26 | 27 | if __debug__: 28 | from sidetrack import log 29 | 30 | import handprint 31 | from handprint import _OUTPUT_EXT, _OUTPUT_FORMAT 32 | from handprint.credentials import Credentials 33 | from handprint.exceptions import * 34 | from handprint.exit_codes import ExitCode 35 | from handprint.services import ACCEPTED_FORMATS, services_list 36 | 37 | 38 | # Exported classes. 39 | # ............................................................................. 40 | 41 | class MainBody(object): 42 | '''Main body for Handprint.''' 43 | 44 | def __init__(self, **kwargs): 45 | '''Initialize internal state.''' 46 | 47 | # Assign parameters to self to make them available within this object. 48 | for key, value in kwargs.items(): 49 | if __debug__: log(f'parameter value self.{key} = {value}') 50 | setattr(self, key, value) 51 | 52 | # We expose an attribute "exception" that callers can use to find out 53 | # if the thread finished normally or with an exception. 54 | self.exception = None 55 | 56 | # The manager object manages the process of manipulating images and 57 | # sending them to the services. 58 | from handprint.manager import Manager 59 | self._manager = Manager(self.services, self.threads, self.output_dir, 60 | self.make_grid, self.compare, self.extended, 61 | self.text_size, self.text_color, self.text_shift, 62 | self.display, self.confidence, self.reuse_json) 63 | 64 | 65 | def run(self): 66 | '''Run the main body.''' 67 | 68 | if __debug__: log('running MainBody') 69 | try: 70 | self._do_preflight() 71 | self._do_main_work() 72 | except Exception as ex: 73 | if __debug__: log(f'exception in main body: {antiformat(str(ex))}') 74 | self.exception = sys.exc_info() 75 | if __debug__: log('finished MainBody') 76 | 77 | 78 | def stop(self): 79 | if __debug__: log('stopping ...') 80 | self._manager.stop_services() 81 | 82 | 83 | def _do_preflight(self): 84 | '''Check the option values given by the user, and do other prep.''' 85 | 86 | from commonpy.network_utils import network_available 87 | if not network_available(): 88 | alert_fatal('No network connection.') 89 | raise CannotProceed(ExitCode.no_network) 90 | 91 | if self.from_file: 92 | if not exists(self.from_file): 93 | alert_fatal(f'File not found: {self.from_file}') 94 | raise CannotProceed(ExitCode.bad_arg) 95 | if not readable(self.from_file): 96 | alert_fatal(f'File not readable: {self.from_file}') 97 | raise CannotProceed(ExitCode.file_error) 98 | 99 | if self.output_dir: 100 | if isdir(self.output_dir): 101 | if not writable(self.output_dir): 102 | alert_fatal(f'Directory not writable: {self.output_dir}') 103 | raise CannotProceed(ExitCode.file_error) 104 | else: 105 | os.mkdir(self.output_dir) 106 | if __debug__: log(f'created output_dir directory {self.output_dir}') 107 | 108 | 109 | def _do_main_work(self): 110 | # Gather up some things and get prepared. 111 | targets = self.targets_from_arguments() 112 | if not targets: 113 | alert_fatal('No images to process; quitting.') 114 | raise CannotProceed(ExitCode.bad_arg) 115 | num_targets = len(targets) 116 | 117 | inform(f'Given {pluralized("image", num_targets, True)} to work on.') 118 | inform('Will apply results of {}: {}'.format( 119 | pluralized('service', len(self.services), True), 120 | ', '.join(self.services), num_targets)) 121 | inform(f'Will use credentials stored in {Credentials.credentials_dir()}/.') 122 | if self.extended: 123 | inform('Will save extended results.') 124 | num_threads = min(self.threads, len(self.services)) 125 | inform(f'Will use up to {num_threads} process threads.') 126 | 127 | # Get to work. 128 | if __debug__: log('initializing manager and starting processes') 129 | import shutil 130 | print_separators = num_targets > 1 131 | rule = '─'*(shutil.get_terminal_size().columns or 80) 132 | for index, item in enumerate(targets, start = 1): 133 | # Check whether we've been interrupted before doing another item. 134 | raise_for_interrupts() 135 | # Process next item. 136 | if print_separators: 137 | inform(rule) 138 | self._manager.run_services(item, index, self.base_name) 139 | if print_separators: 140 | inform(rule) 141 | 142 | 143 | def targets_from_arguments(self): 144 | # Validator_collection takes a long time to load. Delay loading it 145 | # until needed, so that overall application startup time is faster. 146 | from validator_collection.checkers import is_url 147 | 148 | targets = [] 149 | if self.from_file: 150 | if __debug__: log(f'reading {self.from_file}') 151 | targets = filter(None, open(self.from_file).read().splitlines()) 152 | else: 153 | for item in self.files: 154 | if is_url(item): 155 | targets.append(item) 156 | elif isfile(item) and filename_extension(item) in ACCEPTED_FORMATS: 157 | targets.append(item) 158 | elif isdir(item): 159 | # It's a directory, so look for files within. 160 | targets += files_in_directory(item, extensions = ACCEPTED_FORMATS) 161 | else: 162 | warn(f'"{item}" not a file or directory') 163 | 164 | # Filter files created in past runs. 165 | targets = filter(lambda name: '.handprint' not in name, targets) 166 | 167 | # If there is both a file in the format we generate and another 168 | # format of that file, ignore the other formats and just use ours. 169 | # Note: the value of targets is an iterator, but b/c it's tested inside 170 | # the loop, a separate list is needed (else get unexpected results). 171 | targets = list(targets) 172 | keep = [] 173 | for item in targets: 174 | ext = filename_extension(item) 175 | base = filename_basename(item) 176 | if ext != _OUTPUT_EXT and (base + _OUTPUT_EXT in targets): 177 | # png version of file is also present => skip this other version 178 | continue 179 | keep.append(item) 180 | return keep 181 | -------------------------------------------------------------------------------- /handprint/services/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Handprint module for interfacing to text recognition cloud services. 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from .amazon import AmazonRekognitionTR, AmazonTextractTR 18 | from .google import GoogleTR 19 | from .microsoft import MicrosoftTR 20 | 21 | ACCEPTED_FORMATS = ('.jpg', '.jpeg', '.jp2', '.pdf', '.png', '.gif', '.bmp', 22 | '.tif', '.tiff') 23 | 24 | KNOWN_SERVICES = { 25 | 'amazon-rekognition': AmazonRekognitionTR, 26 | 'amazon-textract': AmazonTextractTR, 27 | 'google': GoogleTR, 28 | 'microsoft': MicrosoftTR, 29 | } 30 | 31 | # Save this list to avoid recreating it all the time. 32 | SERVICES_LIST = sorted(KNOWN_SERVICES.keys()) 33 | 34 | def services_list(): 35 | return SERVICES_LIST 36 | -------------------------------------------------------------------------------- /handprint/services/amazon.py: -------------------------------------------------------------------------------- 1 | ''' 2 | amazon.py: interface to Amazon network services Rekognition and Textract 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from commonpy.file_utils import readable, relative 18 | from commonpy.interrupt import raise_for_interrupts 19 | import imagesize 20 | import os 21 | import sys 22 | 23 | if __debug__: 24 | from sidetrack import log 25 | 26 | import handprint 27 | from handprint.credentials.amazon_auth import AmazonCredentials 28 | from handprint.exceptions import * 29 | from handprint.services.base import TextRecognition, TRResult, Box 30 | 31 | 32 | # Main class. 33 | # ............................................................................. 34 | 35 | class AmazonTR(TextRecognition): 36 | '''Base class for Amazon text recognition services.''' 37 | 38 | def init_credentials(self): 39 | '''Initializes the credentials to use for accessing this service.''' 40 | try: 41 | if __debug__: log('initializing credentials') 42 | self._credentials = AmazonCredentials().creds() 43 | except Exception as ex: 44 | raise AuthFailure(str(ex)) 45 | 46 | 47 | def max_rate(self): 48 | '''Returns the number of calls allowed per second.''' 49 | # https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html#limits_textract 50 | return 0.25 51 | 52 | 53 | def max_size(self): 54 | '''Returns the maximum size of an acceptable image, in bytes.''' 55 | # https://docs.aws.amazon.com/textract/latest/dg/textract-dg.pdf#limits 56 | return 10*1024*1024 57 | 58 | 59 | def max_dimensions(self): 60 | '''Maximum image size as a tuple of pixel numbers: (width, height). 61 | A value of None indicates the limits are unknown.''' 62 | # I can't find a limit stated in the Amazon docs. 63 | return None 64 | 65 | 66 | # General scheme of things: 67 | # 68 | # * Return errors (via TRResult) if a result could not be obtained 69 | # because of an error specific to a particular path/item. The guiding 70 | # principle here is: if the calling loop is processing multiple items, 71 | # can it be expected to be able to go on to the next item if this error 72 | # occurred? 73 | # 74 | # * Raises exceptions if a problem occurs that should stop the calling 75 | # code from continuing with this service. This includes things like 76 | # authentication failures, because authentication failures tend to 77 | # involve all uses of a service and not just a specific item. 78 | # 79 | # * Otherwise, returns a TRResult if successful. 80 | 81 | def amazon_result(self, file_path, variant, method, image_keyword, 82 | result_key, value_key, block_key, result): 83 | '''Returns the result from calling the service on the 'file_path'. 84 | The result is returned as an TRResult named tuple. 85 | ''' 86 | 87 | # Delay loading the API packages until needed because they take time to 88 | # load. Doing this speeds up overall application start time. 89 | import boto3 90 | import botocore 91 | 92 | if not result: 93 | # If any exceptions occur, let them be passed to caller. 94 | (image, error) = self._image_from_file(file_path) 95 | if error: 96 | return TRResult(path = file_path, data = {}, boxes = [], 97 | text = '', error = error) 98 | try: 99 | if __debug__: log(f'setting up Amazon client function "{variant}"') 100 | creds = self._credentials 101 | session = boto3.session.Session() 102 | client = session.client(variant, region_name = creds['region_name'], 103 | aws_access_key_id = creds['aws_access_key_id'], 104 | aws_secret_access_key = creds['aws_secret_access_key']) 105 | if __debug__: log('calling Amazon API function') 106 | result = getattr(client, method)( **{ image_keyword : {'Bytes': image} }) 107 | if __debug__: log(f'received {len(result[result_key])} blocks') 108 | except botocore.exceptions.EndpointConnectionError as ex: 109 | raise AuthFailure(f'Problem with credentials file -- {str(ex)}') 110 | except KeyboardInterrupt as ex: 111 | raise 112 | except KeyError as ex: 113 | msg = f'Amazon credentials file is missing {",".join(ex.args)}' 114 | raise AuthFailure(msg) 115 | except Exception as ex: 116 | if getattr(ex, 'response', False) and 'Error' in ex.response: 117 | error = ex.response['Error'] 118 | code = error['Code'] 119 | text = error['Message'] 120 | path = relative(file_path) 121 | if code in ['UnsupportedDocumentException', 'BadDocumentException']: 122 | msg = f'Amazon {variant} reports bad or corrupted image in {path}' 123 | raise CorruptedContent(msg) 124 | elif code in ['InvalidSignatureException', 'UnrecognizedClientException']: 125 | raise AuthFailure(f'Problem with credentials file -- {text}') 126 | # Fallback if we can't get details. 127 | if __debug__: log(f'Amazon returned exception {str(ex)}') 128 | msg = f'Amazon {variant} failure for {path} -- {error["Message"]}' 129 | raise ServiceFailure(msg) 130 | 131 | raise_for_interrupts() 132 | full_text = '' 133 | boxes = [] 134 | width, height = imagesize.get(file_path) 135 | if __debug__: log(f'parsing Amazon result for {relative(file_path)}') 136 | for block in result[result_key]: 137 | if value_key not in block: 138 | continue 139 | kind = block[value_key].lower() 140 | if kind in ['word', 'line']: 141 | text = block[block_key] 142 | corners = corner_list(block['Geometry']['Polygon'], width, height) 143 | if corners: 144 | boxes.append(Box(kind = kind, bb = corners, text = text, 145 | score = block['Confidence'] / 100)) 146 | else: 147 | # Something's wrong with the vertex list. Skip & continue. 148 | if __debug__: log(f'bad bb for {text}: {bb}') 149 | if kind == "line": 150 | if 'Text' in block: 151 | full_text += block['Text'] + '\n' 152 | elif 'DetectedText' in block: 153 | full_text += block['DetectedText'] + '\n' 154 | return TRResult(path = file_path, data = result, boxes = boxes, 155 | text = full_text, error = None) 156 | 157 | 158 | class AmazonTextractTR(AmazonTR): 159 | '''Subclass of AmazonTR for the Textract service.''' 160 | 161 | @classmethod 162 | def name(self): 163 | '''Returns the canonical internal name for this service.''' 164 | return "amazon-textract" 165 | 166 | 167 | @classmethod 168 | def name_color(self): 169 | '''Returns a color code for this service. See the color definitions 170 | in messages.py.''' 171 | return 'light_goldenrod2' 172 | 173 | 174 | def result(self, file_path, saved_result): 175 | '''Returns the result from calling the service on the 'file_path'. 176 | The result is returned as an TRResult named tuple. 177 | ''' 178 | return self.amazon_result(file_path, 'textract', 179 | 'detect_document_text', 180 | 'Document', 181 | 'Blocks', # result_key 182 | 'BlockType', # value_key 183 | 'Text', # block_key 184 | saved_result) 185 | 186 | 187 | class AmazonRekognitionTR(AmazonTR): 188 | '''Subclass of AmazonTR for the Rekognition service.''' 189 | 190 | @classmethod 191 | def name(self): 192 | '''Returns the canonical internal name for this service.''' 193 | return "amazon-rekognition" 194 | 195 | 196 | @classmethod 197 | def name_color(self): 198 | '''Returns a color code for this service. See the color definitions 199 | in messages.py.''' 200 | return 'dark_orange' 201 | 202 | 203 | def result(self, file_path, saved_result = None): 204 | '''Returns the result from calling the service on the 'file_path'. 205 | The result is returned as an TRResult named tuple. 206 | ''' 207 | return self.amazon_result(file_path, 'rekognition', 208 | 'detect_text', 209 | 'Image', 210 | 'TextDetections', # result_key 211 | 'Type', # value_key 212 | 'DetectedText', # block_key 213 | saved_result) 214 | 215 | # Miscellaneous utilities. 216 | # ............................................................................. 217 | 218 | def corner_list(polygon, width, height): 219 | '''Takes a boundingBox value from Amazon's JSON output and returns 220 | a condensed version, in the form [x y x y x y x y], with the first x, y 221 | pair representing the upper left corner.''' 222 | corners = [] 223 | for poly_corner in polygon: 224 | if 'X' in poly_corner and 'Y' in poly_corner: 225 | # Results are in percentages of the image. Convert to pixels. 226 | corners.append(int(round(poly_corner['X'] * width))) 227 | corners.append(int(round(poly_corner['Y'] * height))) 228 | else: 229 | return [] 230 | return corners 231 | -------------------------------------------------------------------------------- /handprint/services/base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | base.py: base class definition for text recognition systems. 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from collections import namedtuple 18 | from commonpy.file_utils import readable, relative 19 | import imagesize 20 | 21 | if __debug__: 22 | from sidetrack import log 23 | 24 | 25 | # Named tuple definitions. 26 | # ............................................................................. 27 | 28 | TRResult = namedtuple('TRResult', 'path data text boxes error') 29 | TRResult.__doc__ = '''Results of invoking a text recognition service. 30 | 'path' is the file path or URL of the item in question 31 | 'data' is the full data result as a Python dict (or {} in case of error) 32 | 'text' is the extracted text as a string (or '' in case of error) 33 | 'boxes' is a list of text boxes 34 | 'error' is None if no error occurred, or the text of any error messages 35 | ''' 36 | 37 | Box = namedtuple('Box', 'kind bb text score') 38 | Box.__doc__ = '''Representation of a single box, possibly containing text. 39 | 'kind' is the type; this can be "word", "line", "paragraph". 40 | 'bb' is the bounding box, as XY coordinates of corners starting with u.l. 41 | 'text' is text (when the box contains text). 42 | 'score' is the confidence score given to this item by the service. 43 | ''' 44 | 45 | 46 | # Class definitions. 47 | # ............................................................................. 48 | # Basics for the __eq__ etc. methods came from 49 | # https://stackoverflow.com/questions/1061283/lt-instead-of-cmp 50 | 51 | class TextRecognition(object): 52 | def __init__(self): 53 | pass 54 | 55 | 56 | def __str__(self): 57 | return self.name() 58 | 59 | 60 | def __repr__(self): 61 | return self.name() 62 | 63 | 64 | def __eq__(self, other): 65 | if isinstance(other, type(self)): 66 | return self.__dict__ == other.__dict__ 67 | return NotImplemented 68 | 69 | 70 | def __ne__(self, other): 71 | # Based on lengthy Stack Overflow answer by user "Maggyero" posted on 72 | # 2018-06-02 at https://stackoverflow.com/a/50661674/743730 73 | eq = self.__eq__(other) 74 | if eq is not NotImplemented: 75 | return not eq 76 | return NotImplemented 77 | 78 | 79 | def __lt__(self, other): 80 | return self.name() < other.name() 81 | 82 | 83 | def __gt__(self, other): 84 | if isinstance(other, type(self)): 85 | return other.name() < self.name() 86 | return NotImplemented 87 | 88 | 89 | def __le__(self, other): 90 | if isinstance(other, type(self)): 91 | return not other.name() < self.name() 92 | return NotImplemented 93 | 94 | 95 | def __ge__(self, other): 96 | if isinstance(other, type(self)): 97 | return not self.name() < other.name() 98 | return NotImplemented 99 | 100 | 101 | def init_credentials(self): 102 | '''Initializes the credentials to use for accessing this service.''' 103 | pass 104 | 105 | 106 | def name(self): 107 | '''Returns the canonical internal name for this service.''' 108 | pass 109 | 110 | 111 | def name_color(self): 112 | '''Returns a color code for this service. See the color definitions 113 | in messages.py.''' 114 | pass 115 | 116 | 117 | def max_rate(self): 118 | '''Returns the number of calls allowed per second.''' 119 | pass 120 | 121 | 122 | def max_size(self): 123 | '''Returns the maximum size of an acceptable image, in bytes.''' 124 | pass 125 | 126 | 127 | def max_dimensions(self): 128 | '''Maximum image size as a tuple of pixel numbers: (width, height).''' 129 | pass 130 | 131 | 132 | def result(self, path, saved_result = None): 133 | '''Returns the text recognition results from the service as an 134 | TRResult named tuple. If a saved result is supplied, use that. 135 | ''' 136 | pass 137 | 138 | 139 | def _image_from_file(self, file_path): 140 | '''Helper function for subclasses to read image files. 141 | Returns a tuple, (image, error), where "error" is a TRResult with a 142 | non-empty error field value if an error occurred, and "image" is the 143 | bytes of the image if it was successfully read. 144 | ''' 145 | 146 | def error_result(error_text): 147 | return (None, TRResult(path = file_path, data = {}, text = '', 148 | error = error_text, boxes = [])) 149 | 150 | rel_path = relative(file_path) 151 | if not readable(file_path): 152 | return error_result(f'Unable to read file: {rel_path}') 153 | if __debug__: log(f'reading {rel_path} for {self.name()}') 154 | with open(file_path, 'rb') as image_file: 155 | image = image_file.read() 156 | if len(image) == 0: 157 | return error_result(f'Empty file: {rel_path}') 158 | if len(image) > self.max_size(): 159 | text = f'Exceeds {self.max_size()} byte limit for service: {rel_path}' 160 | return error_result(text) 161 | width, height = imagesize.get(file_path) 162 | if __debug__: log(f'image size is width = {width}, height = {height}') 163 | if self.max_dimensions(): 164 | max_width, max_height = self.max_dimensions() 165 | if width > max_width or height > max_height: 166 | text = f'Dimensions {width}x{height} exceed {self.name()} limits: {rel_path}' 167 | return error_result(text) 168 | return (image, None) 169 | -------------------------------------------------------------------------------- /handprint/services/google.py: -------------------------------------------------------------------------------- 1 | ''' 2 | google.py: interface to Google text recognition network service 3 | 4 | Authors 5 | ------- 6 | 7 | Michael Hucka -- Caltech Library 8 | 9 | Copyright 10 | --------- 11 | 12 | Copyright (c) 2018-2022 by the California Institute of Technology. This code 13 | is open-source software released under a 3-clause BSD license. Please see the 14 | file "LICENSE" for more information. 15 | ''' 16 | 17 | from commonpy.file_utils import relative 18 | from commonpy.interrupt import raise_for_interrupts 19 | import io 20 | import json 21 | import math 22 | import os 23 | import json 24 | 25 | if __debug__: 26 | from sidetrack import log 27 | 28 | import handprint 29 | from handprint.credentials.google_auth import GoogleCredentials 30 | from handprint.exceptions import * 31 | from handprint.services.base import TextRecognition, TRResult, Box 32 | 33 | 34 | # Main class. 35 | # ............................................................................. 36 | 37 | class GoogleTR(TextRecognition): 38 | # The following is based on the table of Google Cloud Vision features at 39 | # https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1p3beta1#type_1 40 | # as of 2018-10-25. 41 | _known_features = ['document_text_detection'] 42 | 43 | 44 | def init_credentials(self): 45 | '''Initializes the credentials to use for accessing this service.''' 46 | try: 47 | if __debug__: log('initializing credentials') 48 | GoogleCredentials() 49 | except Exception as ex: 50 | raise AuthFailure(str(ex)) 51 | 52 | 53 | @classmethod 54 | def name(self): 55 | '''Returns the canonical internal name for this service.''' 56 | return "google" 57 | 58 | 59 | @classmethod 60 | def name_color(self): 61 | '''Returns a color code for this service. See the color definitions 62 | in messages.py.''' 63 | return 'deep_sky_blue1' 64 | 65 | 66 | def max_rate(self): 67 | '''Returns the number of calls allowed per second.''' 68 | # https://cloud.google.com/vision/quotas 69 | return 30 70 | 71 | 72 | def max_size(self): 73 | '''Returns the maximum size of an acceptable image, in bytes.''' 74 | # https://cloud.google.com/vision/docs/supported-files 75 | # Google Cloud Vision API docs state that images can't exceed 20 MB 76 | # but the JSON request size limit is 10 MB. We hit the 10 MB limit 77 | # even though we're using the Google API library, which I guess must 78 | # be transferring JSON under the hood. 79 | return 10*1024*1024 80 | 81 | 82 | def max_dimensions(self): 83 | '''Maximum image size as a tuple of pixel numbers: (width, height).''' 84 | # No max dimensions are given in the Google docs. 85 | return None 86 | 87 | 88 | # General scheme of things: 89 | # 90 | # * Return errors (via TRResult) if a result could not be obtained 91 | # because of an error specific to a particular path/item. The guiding 92 | # principle here is: if the calling loop is processing multiple items, 93 | # can it be expected to be able to go on to the next item if this error 94 | # occurred? 95 | # 96 | # * Raises exceptions if a problem occurs that should stop the calling 97 | # code from continuing with this service. This includes things like 98 | # authentication failures, because authentication failures tend to 99 | # involve all uses of a service and not just a specific item. 100 | # 101 | # * Otherwise, returns a TRResult if successful. 102 | 103 | def result(self, path, result = None): 104 | '''Returns the result from calling the service on the 'file_path'. 105 | The result is returned as an TRResult named tuple. 106 | ''' 107 | 108 | # Delay loading the API packages until needed because they take time to 109 | # load. Doing this speeds up overall application start time. 110 | import google 111 | from google.cloud import vision_v1 as gv 112 | from google.api_core.exceptions import PermissionDenied 113 | from google.protobuf.json_format import MessageToDict 114 | 115 | if not result: 116 | # Read the image and proceed with contacting the service. 117 | (image, error) = self._image_from_file(path) 118 | if error: 119 | return error 120 | 121 | if __debug__: log(f'building Google API object for {relative(path)}') 122 | try: 123 | client = gv.ImageAnnotatorClient() 124 | params = gv.TextDetectionParams( 125 | mapping = { 'enable_text_detection_confidence_score': True }) 126 | context = gv.ImageContext(language_hints = ['en-t-i0-handwrit'], 127 | text_detection_params = params) 128 | img = gv.Image(content = image) 129 | if __debug__: log(f'sending image to Google for {relative(path)} ...') 130 | response = client.document_text_detection(image = img, 131 | image_context = context) 132 | if __debug__: log(f'received result from Google for {relative(path)}') 133 | result = dict_from_response(response) 134 | except google.api_core.exceptions.PermissionDenied as ex: 135 | text = 'Authentication failure for Google service -- {}'.format(ex) 136 | raise AuthFailure(text) 137 | except google.auth.exceptions.DefaultCredentialsError as ex: 138 | text = 'Credentials file error for Google service -- {}'.format(ex) 139 | raise AuthFailure(text) 140 | except google.api_core.exceptions.ServiceUnavailable as ex: 141 | text = 'Network, service, or Google configuration error -- {}'.format(ex) 142 | raise ServiceFailure(text) 143 | except KeyboardInterrupt as ex: 144 | raise 145 | except Exception as ex: 146 | if isinstance(ex, KeyError): 147 | # Can happen if you control-C in the middle of the Google call. 148 | # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'" 149 | # printed to the terminal and we end up here. 150 | raise KeyboardInterrupt 151 | else: 152 | text = 'Error: {} -- {}'.format(str(ex), path) 153 | return TRResult(path = path, data = {}, boxes = [], 154 | text = '', error = text) 155 | 156 | raise_for_interrupts() 157 | boxes = [] 158 | # See this page for more information about the structure: 159 | # https://cloud.google.com/vision/docs/handwriting#python 160 | if len(result['full_text_annotation']['pages']) > 1: 161 | warn('More than one page received from Google; using only first.') 162 | for block in result['full_text_annotation']['pages'][0]['blocks']: 163 | for para in block['paragraphs']: 164 | corners = corner_list(para['bounding_box']['vertices']) 165 | boxes.append(Box(bb = corners, kind = 'para', text = '', 166 | score = para['confidence'])) 167 | for word in para['words']: 168 | text = '' 169 | for symbol in word['symbols']: 170 | text += symbol['text'] 171 | corners = corner_list(word['bounding_box']['vertices']) 172 | if corners: 173 | boxes.append(Box(bb = corners, kind = 'word', 174 | text = text, score = para['confidence'])) 175 | else: 176 | # Something is wrong with the vertex list. 177 | # Skip it and continue. 178 | if __debug__: log(f'bad bb for {text}: {bb}') 179 | full_text = result['full_text_annotation']['text'] 180 | return TRResult(path = path, data = result, 181 | boxes = boxes, text = full_text, error = None) 182 | 183 | 184 | # Miscellaenous utilities 185 | # ............................................................................. 186 | 187 | # Grrrr. The Google API can return incomplete vertices for a bounding box. 188 | # In one of our sample images ("pbm-2421-PBM_3_1_1_0016"), I get this result: 189 | # [{'x': 2493}, {'x': 2538, 'y': 1}, {'x': 2535, 'y': 154}, {'x': 2490, 'y': 153}] 190 | # So, we have to test to make sure both 'x' and 'y' keys are in every vertex. 191 | 192 | def corner_list(vertices): 193 | '''Takes a boundingBox value from Google Vision's output and returns 194 | a condensed version, in the form [x y x y x y x y], with the first x, y 195 | pair representing the upper left corner.''' 196 | corners = [] 197 | if len(vertices) < 4: 198 | return [] 199 | for vertex in vertices: 200 | corners.append(vertex['x']) 201 | corners.append(vertex['y']) 202 | return corners 203 | 204 | 205 | # In more recent versions of googleapis-common-protos, MessageToDict is no 206 | # longer directly available as it was before. See this GitHub issue answer: 207 | # https://github.com/googleapis/python-memcache/issues/19#issuecomment-708516816 208 | # The following builds on an answer by user Tobiasz Kędzierski given at 209 | # https://github.com/googleapis/python-memcache/issues/19#issuecomment-709628506 210 | 211 | def dict_from_response(response): 212 | import google 213 | if isinstance(response, google.cloud.vision_v1.types.image_annotator.AnnotateImageResponse): 214 | return response.__class__.to_dict(response) 215 | else: 216 | return MessageToDict(response) 217 | 218 | 219 | def json_from_response(response): 220 | return json.dumps(dict_from_response(response)) 221 | -------------------------------------------------------------------------------- /pubs/joss/.gitignore: -------------------------------------------------------------------------------- 1 | auto 2 | whedon 3 | paper.pdf 4 | -------------------------------------------------------------------------------- /pubs/joss/H96566k.handprint-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/pubs/joss/H96566k.handprint-all.png -------------------------------------------------------------------------------- /pubs/joss/Makefile: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file Makefile 3 | # @brief Makefile for generating a PDF version of a JOSS paper, to preview it 4 | # @author Michael Hucka 5 | # @license https://github.com/caltechlibrary/template/blob/main/LICENSE 6 | # @website https://gist.github.com/mhucka/c0b82778417f38f7ae6ee7d051cec90a 7 | # 8 | # The procedure for using this is very simple: 9 | # 1. Create a directory for your paper 10 | # 2. Copy your paper's Markdown, bib, & other source files into the directory 11 | # 3. Copy this Makefile into the directory 12 | # 4. Update this Makefile's configuration variables (see below) 13 | # 5. Run "make setup" 14 | # 6. Run "make paper" 15 | # 16 | # To get an auto-refreshed preview while editing, open the PDF file in a PDF 17 | # viewer that auto-refreshes when it detects changes to the PDF file (e.g., 18 | # Skim on macOS), and run "make auto" instead of "make paper". 19 | # ============================================================================= 20 | 21 | # CONFIGURATION VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 22 | 23 | # The output PDF file and the bibliography .bib file are based on this name. 24 | basename := paper 25 | 26 | # Set the following to the actual DOI for the archive of your software. 27 | archive_doi := http://dx.doi.org/00.00000/zenodo.0000000 28 | 29 | # The values of the following are irrelevant for a preview, and you can just 30 | # leave them as-is. If you want to (re)create the PDF using final published 31 | # values, then go ahead and update these once you know the appropriate values. 32 | volume := 0 33 | issue := 0 34 | pages := 0 35 | year := $(shell date +'%Y') 36 | submitted := 00 January $(year) 37 | accepted := 00 January $(year) 38 | 39 | # End of configuration section. You shouldn't have to change anything below. 40 | 41 | # DEFAULT ACTION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 42 | 43 | help: 44 | @printf 'Available commands:\n\n' 45 | @printf 'make\n' 46 | @printf 'make help\n' 47 | @printf ' Print this summary of available commands.\n\n' 48 | @printf 'make setup\n' 49 | @printf ' Download a copy of Whedon and do some other setup work.\n\n' 50 | @printf 'make paper\n' 51 | @printf ' Run pandoc to create a PDF file to preview the paper.\n\n' 52 | @printf 'make auto\n' 53 | @printf ' Watch input files for changes and auto-remake the paper.\n' 54 | 55 | # MAIN CODE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 56 | 57 | setup: 58 | git clone --recursive git@github.com:openjournals/whedon.git 59 | @which pandoc > /dev/null || echo \ 60 | "\n\033[31m*** Cannot find pandoc -- please install it ***\033[0m\n" 61 | 62 | input := $(basename).md 63 | output := $(basename).pdf 64 | bib := $(basename).bib 65 | title := $(shell grep title: $(input) | sed 's/title: *//' | tr -d "'") 66 | authors := $(shell sed -n '/authors:/,/affiliations:/p' $(input) | grep name: |\ 67 | sed 's/- name: *//' | paste -d, -s - | sed 's/,/, /g') 68 | repo := $(shell git remote get-url origin |\ 69 | sed 's|git@github.com:|https://github.com/|' | sed 's/\.git//') 70 | 71 | paper $(output): check-have-pandoc check-whedon-exists $(input) $(bib) Makefile 72 | pandoc -s $(input) --verbose \ 73 | -V title="$(title)" \ 74 | -V paper_title="$(title)" \ 75 | -V citation_author="$(authors)" \ 76 | -V formatted_doi="10.21105/joss.$(pages)" \ 77 | -V review_issue_url="https://github.com/openjournals/joss-reviews/issues/$(issue)" \ 78 | -V repository="$(repo)" \ 79 | -V archive_doi="$(archive_doi)" \ 80 | -V submitted="$(submitted)" \ 81 | -V published="$(accepted)" \ 82 | -V journal_name="Journal of Open Source Software" \ 83 | -V issue="$(issue)" \ 84 | -V volume="$(volume)" \ 85 | -V year="$(year)" \ 86 | -V page="$(pages)" \ 87 | -V graphics="true" \ 88 | -V logo_path="whedon/resources/joss/logo.png" \ 89 | -V geometry:margin=1in \ 90 | -o $(output) \ 91 | --pdf-engine=xelatex \ 92 | --citeproc \ 93 | --csl "whedon/resources/apa.csl" \ 94 | --template "whedon/resources/latex.template" 95 | 96 | autorefresh auto: check-have-entr 97 | ((ls $(input) $(bib) Makefile | entr make $(output)) &) 98 | 99 | # MISCELLANEOUS DIRECTIVES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 100 | 101 | check-have-pandoc:; @which python > /dev/null 102 | 103 | check-have-entr:; @which entr > /dev/null 104 | 105 | check-whedon-exists:; @if [ ! -d whedon ]; then echo \ 106 | "\n\033[31m*** Could not find whedon -- run 'make setup' ***\033[0m\n";\ 107 | exit 1; fi; 108 | 109 | .PHONY: check-have-pandoc check-have-entr check-whedon-exists 110 | -------------------------------------------------------------------------------- /pubs/joss/README.md: -------------------------------------------------------------------------------- 1 | JOSS paper 2 | =========== 3 | 4 | This directory contains the source files for a short paper about Handprint in the Journal of Open Source Software. 5 | -------------------------------------------------------------------------------- /pubs/joss/notes.txt: -------------------------------------------------------------------------------- 1 | Read this for more good points: 2 | 3 | https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008390 4 | -------------------------------------------------------------------------------- /pubs/joss/paper.bib: -------------------------------------------------------------------------------- 1 | @misc{AmazonInc.2022amazon, 2 | title = {Amazon {{Rekognition}} [{{Computer}} software]}, 3 | author = {{Amazon, Inc.}}, 4 | year = {2022}, 5 | url = {https://aws.amazon.com/rekognition/} 6 | } 7 | 8 | @misc{AmazonInc.2022amazona, 9 | title = {Amazon {{Textract}} [{{Computer}} software]}, 10 | author = {{Amazon, Inc.}}, 11 | year = {2022}, 12 | url = {https://aws.amazon.com/textract/} 13 | } 14 | 15 | @misc{Wikipediacontributors2012first, 16 | title = {First {{Computer Bug}}}, 17 | author = {{Wikipedia contributors}}, 18 | year = {2012}, 19 | url = {https://commons.wikimedia.org/wiki/File:First_Computer_Bug,_1945.jpg}, 20 | urldate = {2022-03-28}, 21 | file = {/Users/mhucka/databases/zotero-bibliography/storage/FTVRRABT/Wikipedia contributors 2012 — Web Page — First Computer Bug.jpg} 22 | } 23 | 24 | @misc{GoogleInc.2022googlea, 25 | title = {Google {{Cloud Vision API}} [{{Computer}} software]}, 26 | author = {{Google, Inc.}}, 27 | year = {2022}, 28 | url = {https://cloud.google.com/vision/docs/handwriting} 29 | } 30 | 31 | @misc{MicrosoftInc.2022microsoft, 32 | title = {Microsoft {{Azure Computer Vision}} [{{Computer}} software]}, 33 | author = {{Microsoft, Inc.}}, 34 | year = {2022}, 35 | url = {https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/} 36 | } 37 | 38 | @misc{Hucka2022handprint, 39 | title = {Handprint user manual}, 40 | author = {Hucka, Michael}, 41 | year = {2022}, 42 | url = {https://caltechlibrary.github.io/handprint/} 43 | } 44 | 45 | 46 | -------------------------------------------------------------------------------- /pubs/joss/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Handprint: a program to explore and compare major cloud-based services for handwritten text recognition' 3 | tags: 4 | - handwritten text recognition 5 | - optical character recognition 6 | - machine learning 7 | authors: 8 | - name: Michael Hucka 9 | orcid: 0000-0001-9105-5960 10 | affiliation: 1 11 | affiliations: 12 | - name: Caltech Library, California Institute of Technology, Pasadena, CA 91125, USA 13 | index: 1 14 | date: 5 April 2022 15 | bibliography: paper.bib 16 | --- 17 | 18 | # Summary 19 | 20 | Handprint (_**Hand**written **p**age **r**ecognit**i**o**n** **t**est_) is a command-line application that can invoke cloud-based services to perform handwritten text recognition (HTR) on images of documents. It accepts images in various popular formats, sends them to the service providers, gathers the results, and then annotates copies of the images to show the results to the user. It currently supports HTR services from Amazon [@AmazonInc.2022amazon; @AmazonInc.2022amazona], Google [@GoogleInc.2022googlea], and Microsoft [@MicrosoftInc.2022microsoft], but its architecture is modular and could be extended to other services. Handprint is a command-line program written in Python and can run on macOS, Windows, and Linux computers. 21 | 22 | 23 | # Statement of need 24 | 25 | Several of cloud computing companies have developed machine learning-based methods for handwritten text recognition (HTR) and offer them as on-demand services. These network-based services can be applied to images of document pages without the need for training on samples of handwriting. The results are remarkably good overall, but there are differences in quality and features between the different offerings. Comparing the results produced by the competing services is complicated by the fact that they each have unique application programming interfaces (APIs). The purpose of Handprint is to make comparisons simple and easy, without the need for users to learn how to program with the different APIs. With Handprint, users can easily process individual images, directories of images, and URLs pointing to images on remote servers without writing a line of code. If desired, users can also use Handprint in scripts as part of automated workflows. 26 | 27 | 28 | # Summary of Handprint usage 29 | 30 | This section summarizes the user-accessible capabilities provided by Handprint. 31 | 32 | ## Configuration 33 | 34 | The only configuration necessary after installation is to run Handprint with a certain command-line option to store the user's account credentials for each cloud-based HTR service provider. The command needs to be run once for each desired provider, and thereafter, Handprint will use the account information automatically. The Handprint documentation at explains the simple file format in which the credentials need to be written. 35 | 36 | 37 | ## Basic features 38 | 39 | Handprint can read a number of common image formats: JP2, JPEG, PDF, PNG, GIF, BMP, and TIFF. Image paths or URLs can be supplied to Handprint in any of the following ways: (a) one or more directory paths or one or more image file paths on the local disk, which will be interpreted as images---either individually or in directories---to be processed; (b) one or more URLs, which will be interpreted as network locations of image files to be processed; or (c) if given the `-f` command-line option (`/f` on Windows), a file containing either image paths or image URLs to be processed. When using URLs, Handprint first downloads the image found at the given URL(s) to a directory of the user's choosing on the local disk. No matter whether files or URLs, each item should be a single image of a document page containing text. 40 | 41 | Handprint's basic features include the ability to display different kinds of bounding boxes, save the full raw results from HTR services as JSON or text files, and use multiple processor threads to speed up processing. For example, using one of the sample images found in Handprint's source directory, the following command, 42 | ``` 43 | handprint --text-size 19 --display text,bb-line H96566k.jpg 44 | ``` 45 | will send, in parallel, the image file named `H96566K.jpg` to the four services currently supported (Amazon Rekognition, Amazon Textract, Google Cloud Vision, and Microsoft Azure Computer Vision). The output will be a file named `H96566k.handprint-all.png` with the contents shown in the figure below. 46 | 47 | ![**Figure 1**: Example of output from Handprint using default settings.(Source image obtained from Wikipedia [@Wikipediacontributors2012first].)](H96566k.handprint-all.png) 48 | 49 | Users can also select a subset of services to use, and can opt to skip the creation of the overview grid image if they only need the other types of outputs that Handprint can produce. 50 | 51 | 52 | ## Advanced features 53 | 54 | Handprint also includes additional, more advanced features. One is the ability to filter the displayed results by confidence scores, allowing users to see which words or other components have confidence values that meet or exceed a chosen threshold. Another is a facility to compare text results to expected (ground truth) text. The comparison algorithm has some novel capabilities, notably in how it can treat missing, extra, or transposed lines of text from the HTR results (a common difference between the outputs of different services). 55 | 56 | 57 | # Documentation 58 | 59 | A detailed user manual is available as a GitHub Pages website at . Handprint also prints usage information to the terminal when given the command-line option `--help`. 60 | 61 | 62 | # Acknowledgments 63 | 64 | The development of Handprint was supported by the Caltech Library. Handprint benefitted from feedback from several people, notably Tommy Keswick, Mariella Soprano, Peter Collopy and Stephen Davison of the Caltech Library. The [vector artwork](https://thenounproject.com/search/?q=hand&i=733265) of a hand used as a logo for Handprint was created by [Kevin](https://thenounproject.com/kevn/) for the [Noun Project](https://thenounproject.com). 65 | 66 | 67 | # References 68 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file requirements.txt 3 | # @brief Python dependencies for Handprint for development 4 | # @created 2022-03-14 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # ============================================================================= 8 | 9 | -r requirements.txt 10 | 11 | pytest-mock == 3.7.0 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file requirements.txt 3 | # @brief Python dependencies for Handprint for normal use 4 | # @created 2018-10-23 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # ============================================================================= 8 | 9 | aenum == 3.1.0 10 | appdirs == 1.4.4 11 | boltons == 21.0.0 12 | boto3 == 1.17.91 13 | bun == 0.0.8 14 | commonpy >= 1.9.1 15 | fastnumbers == 3.1.0 16 | google-api-core == 1.30.0 17 | google-api-python-client == 2.8.0 18 | google-auth == 1.30.2 19 | google-auth-httplib2 == 0.1.0 20 | google-cloud == 0.34.0 21 | google-cloud-vision == 2.3.1 22 | googleapis-common-protos == 1.53.0 23 | grpcio == 1.44.0 24 | humanize >= 3.7.1 25 | imagesize == 1.2.0 26 | matplotlib == 3.4.2 27 | numpy == 1.22.2 28 | Pillow == 9.0.1 29 | plac == 1.3.4 30 | psutil == 5.8.0 31 | PyMuPDF == 1.19.6 32 | requests == 2.25.0 33 | rich == 12.0.1 34 | setuptools >= 62.1.0 35 | sidetrack == 2.0.0 36 | StringDist == 1.0.9 37 | textdistance == 4.2.2 38 | urllib3 == 1.26.5 39 | validator-collection == 1.5.0 40 | wheel == 0.36.2 41 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # @file setup.cfg 3 | # @brief Package metadata and PyPI configuration 4 | # @author Michael Hucka 5 | # @license Please see the file named LICENSE in the project directory 6 | # @website https://github.com/caltechlibrary/handprint 7 | # ============================================================================= 8 | 9 | [metadata] 10 | name = handprint 11 | version = 1.6.0 12 | description = Run handwritten text recognition services on images of documents 13 | author = Michael Hucka 14 | author_email = mhucka@caltech.edu 15 | license = BSD 3-clause 16 | license_files = LICENSE 17 | url = https://github.com/caltechlibrary/handprint 18 | # The remaining items below are used by PyPI. 19 | project_urls = 20 | Source Code = https://github.com/caltechlibrary/handprint 21 | Bug Tracker = https://github.com/caltechlibrary/handprint/issues 22 | keywords = HTR, OCR, handwritten text recognition, machine learning 23 | classifiers = 24 | Development Status :: 5 - Production/Stable 25 | Environment :: Console 26 | License :: OSI Approved :: BSD License 27 | Intended Audience :: Science/Research 28 | Operating System :: MacOS :: MacOS X 29 | Operating System :: Microsoft :: Windows 30 | Operating System :: POSIX 31 | Operating System :: POSIX :: Linux 32 | Operating System :: Unix 33 | Programming Language :: Python 34 | Programming Language :: Python :: 3.8 35 | Topic :: Scientific/Engineering :: Artificial Intelligence 36 | Topic :: Scientific/Engineering :: Image Recognition 37 | Topic :: Text Processing :: Linguistic 38 | long_description = file:README.md 39 | long_description_content_type = text/markdown 40 | 41 | [options] 42 | packages = find: 43 | zip_safe = False 44 | python_requires = >= 3.8 45 | 46 | [options.entry_points] 47 | console_scripts = 48 | handprint = handprint.__main__:console_scripts_main 49 | 50 | [check-manifest] 51 | ignore = 52 | .graphics/sample-annotated-image.png 53 | .graphics/handprint-screencast.gif 54 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ============================================================================= 3 | # @file setup.py 4 | # @brief Installation setup file 5 | # @author Michael Hucka 6 | # @license Please see the file named LICENSE in the project directory 7 | # @website https://github.com/caltechlibrary/handprint 8 | # 9 | # Note: configuration metadata is maintained in setup.cfg. This file exists 10 | # primarily to hook in setup.cfg and requirements.txt. 11 | # 12 | # ============================================================================= 13 | 14 | import os 15 | from os.path import exists, join, abspath, dirname 16 | from setuptools import setup 17 | 18 | here = abspath(dirname(__file__)) 19 | 20 | requirements = [] 21 | if exists(join(here, 'requirements.txt')): 22 | with open(join(here, 'requirements.txt')) as f: 23 | requirements = f.read().rstrip().splitlines() 24 | 25 | setup( 26 | setup_requires = ['wheel'], 27 | install_requires = requirements, 28 | ) 29 | -------------------------------------------------------------------------------- /tests/data/README.md: -------------------------------------------------------------------------------- 1 | Test images 2 | =========== 3 | 4 | This directory contains a small number of test images from various sources. Note that this is _not_ intended as a complete set of test cases; it is only present for demonstration and testing of Handprint. 5 | 6 | For a larger collection of test cases, please see the [htr-test-cases](https://github.com/caltechlibrary/htr-test-cases) repository that we also make available on GitHub. 7 | -------------------------------------------------------------------------------- /tests/data/bad-images/corrupted-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/bad-images/corrupted-image.png -------------------------------------------------------------------------------- /tests/data/caltech-archives/caltech-archives-urls.txt: -------------------------------------------------------------------------------- 1 | https://digital.archives.caltech.edu/adore-djatoka//resolver?rft_id=https%3A%2F%2Fdigital.archives.caltech.edu%2Fislandora%2Fobject%2Fhale%253A41656%2Fdatastream%2FJP2%2Fview%3Ftoken%3D1c8de85be701a28063d1fbe7b6f4c3849d95c0cabbb79cbe8eb077daf219a43d&url_ver=Z39.88-2004&svc_id=info%3Alanl-repo%2Fsvc%2FgetRegion&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajpeg2000&svc.format=image%2Fjpeg&svc.level=4&svc.rotate=0 2 | https://digital.archives.caltech.edu/adore-djatoka//resolver?rft_id=https%3A%2F%2Fdigital.archives.caltech.edu%2Fislandora%2Fobject%2Fhale%253A44634%2Fdatastream%2FJP2%2Fview%3Ftoken%3D1b9e848d4f1ddae21fde96820b6f6592521cb7588b3e54bb468762a61be1ded2&url_ver=Z39.88-2004&svc_id=info%3Alanl-repo%2Fsvc%2FgetRegion&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajpeg2000&svc.format=image%2Fjpeg&svc.level=4&svc.rotate=0 3 | 4 | -------------------------------------------------------------------------------- /tests/data/caltech-archives/glaser/DAG_5_1_6 1952-1957 Notebook VI p2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/caltech-archives/glaser/DAG_5_1_6 1952-1957 Notebook VI p2.jpg -------------------------------------------------------------------------------- /tests/data/caltech-archives/glaser/DAG_5_2_1 1950-1953 notebook VIII p7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/caltech-archives/glaser/DAG_5_2_1 1950-1953 notebook VIII p7.jpg -------------------------------------------------------------------------------- /tests/data/caltech-archives/glaser/README.md: -------------------------------------------------------------------------------- 1 | Origin and conditions governing use 2 | =================================== 3 | 4 | The images in this sample came from the Caltech Archives' [Donald A. Glaser Digital Collection](http://glaser.library.caltech.edu). 5 | 6 | Here are some notes about specific documents: 7 | 8 | * DAG_5_2_1 1950-1953 notebook VIII p7.jpg: Notebook VIII, dated 1950 May - 1953 November. Part of: Donald A Glaser Papers. Series 5: Bubble Chamber (Ann Arbor); Subseries 1: Notebooks; Box 2, Folder 1 9 | 10 | * DAG_5_1_6 1952-1957 Notebook VI p2.jpg: Notebook VI - High Energy Physics Colloquium (21 items), ca. 1952-1957, 1991. Part of: Donald A Glaser Papers. Series 5: Bubble Chamber (Ann Arbor); Subseries 1: Notebooks; Box 1, Folder 6. 11 | -------------------------------------------------------------------------------- /tests/data/caltech-archives/glaser/dag-285-DAG_1_1_8_0003.jp2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/caltech-archives/glaser/dag-285-DAG_1_1_8_0003.jp2 -------------------------------------------------------------------------------- /tests/data/caltech-archives/glaser/dag-304-DAG_1_1_8_0029.jp2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/caltech-archives/glaser/dag-304-DAG_1_1_8_0029.jp2 -------------------------------------------------------------------------------- /tests/data/fragments/f1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/fragments/f1.png -------------------------------------------------------------------------------- /tests/data/fragments/f2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/fragments/f2.png -------------------------------------------------------------------------------- /tests/data/fragments/f6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/fragments/f6.png -------------------------------------------------------------------------------- /tests/data/fragments/f7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/fragments/f7.png -------------------------------------------------------------------------------- /tests/data/public-domain/LOC-urls.txt: -------------------------------------------------------------------------------- 1 | https://tile.loc.gov/storage-services/master/mss/mss41335/006/0400/0441.tif 2 | https://tile.loc.gov/storage-services/master/mss/mss41335/006/0400/0442.tif 3 | -------------------------------------------------------------------------------- /tests/data/public-domain/images/AGBell_Notebook.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/public-domain/images/AGBell_Notebook.jpg -------------------------------------------------------------------------------- /tests/data/public-domain/images/H96566k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/public-domain/images/H96566k.jpg -------------------------------------------------------------------------------- /tests/data/public-domain/images/README.md: -------------------------------------------------------------------------------- 1 | Origin and conditions governing use 2 | =================================== 3 | 4 | The following are the sources of the images in this directory. 5 | 6 | * [H96566k.jpg](https://upload.wikimedia.org/wikipedia/commons/8/8a/H96566k.jpg) – Courtesy of the Naval Surface Warfare Center, Dahlgren, VA., 1988. [Public domain], via Wikimedia Commons. Picture of a notebook page showing the first "Computer Bug" Moth found trapped between points in a relay of the Mark II Aiken Relay Calculator while it was being tested at Harvard University, 9 September 1947. The operators affixed the moth to the computer log, with the entry: "First actual case of bug being found". 7 | 8 | * [AGBell_Notebook.jpg](https://upload.wikimedia.org/wikipedia/commons/0/0c/AGBell_Notebook.jpg) – pages 40-1 of Alexander Graham Bell's unpublished laboratory notebook (1875-76), describing first successful experiment with the telephone. 9 | 10 | * [mabel-h-bell-to-eliza-s-bell-sept-28-1879-1-1600.jpg](mabel-h-bell-to-eliza-s-bell-sept-28-1879-1-1600.jpg) – downloaded from [Picryl](https://picryl.com/media/letter-from-mabel-hubbard-bell-to-eliza-symonds-bell-september-28-1879-1) on 2019-08-11. Original source is given as Library of Congress. No copyright restrictions. 11 | 12 | * [clara-barton-life-of-my-childhood-p90.jpg](clara-barton-life-of-my-childhood-p90.jpg) – downloaded from [Pycryl](https://picryl.com/media/clara-barton-papers-speeches-and-writings-file-1849-1947-books-the-life-of-71) on 2019-08-11. Original source is given as Library of Congress. No copyright restrictions. 13 | -------------------------------------------------------------------------------- /tests/data/public-domain/images/clara-barton-life-of-my-childhood-p90.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/public-domain/images/clara-barton-life-of-my-childhood-p90.jpg -------------------------------------------------------------------------------- /tests/data/public-domain/images/mabel-h-bell-to-eliza-s-bell-sept-28-1879-1-1600.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/handprint/1b84db7eeb40dfaa5e4ced9b2cfdb021c1a1ba68/tests/data/public-domain/images/mabel-h-bell-to-eliza-s-bell-sept-28-1879-1-1600.jpg -------------------------------------------------------------------------------- /tests/test_comparison.py: -------------------------------------------------------------------------------- 1 | from commonpy.file_utils import delete_existing 2 | import os 3 | import os.path 4 | import pytest 5 | import sys 6 | import tempfile 7 | from time import time 8 | 9 | try: 10 | thisdir = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(thisdir, '..')) 12 | except: 13 | sys.path.append('..') 14 | 15 | from handprint.comparison import * 16 | 17 | 18 | def test_text_comparison(): 19 | x = text_comparison('a', 'a') 20 | assert x == 'Errors\tCER (%)\tExpected text\tReceived text\n0\t0.00\ta\ta\nTotal errors\t\t\t\n0\t\t\t' 21 | x = text_comparison('a', 'b') 22 | assert x == 'Errors\tCER (%)\tExpected text\tReceived text\n1\t100.00\tb\t\nTotal errors\t\t\t\n1\t\t\t' 23 | -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | from contextlib import redirect_stdout 2 | import io 3 | import os 4 | import plac 5 | import pytest 6 | import sys 7 | from time import time 8 | 9 | try: 10 | thisdir = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(thisdir, '..')) 12 | except: 13 | sys.path.append('..') 14 | 15 | from handprint.__main__ import main 16 | from handprint.exceptions import * 17 | from handprint.exit_codes import ExitCode 18 | 19 | def test_exceptions(): 20 | try: 21 | raise InternalError('foo') 22 | except Exception as ex: 23 | assert isinstance(ex, HandprintException) 24 | assert str(ex) == 'foo' 25 | 26 | 27 | def test_bad_cli_arg(): 28 | with pytest.raises(SystemExit) as ex_info: 29 | assert plac.call(main, ['-s', 'bogus']) 30 | 31 | assert ex_info.type == SystemExit 32 | assert ex_info.value.code == int(ExitCode.bad_arg) 33 | 34 | 35 | def test_cli_arg_version(): 36 | output = io.StringIO() 37 | with pytest.raises(SystemExit) as ex_info: 38 | with redirect_stdout(output): 39 | plac.call(main, ['-V']) 40 | 41 | assert output.getvalue().startswith('handprint version') 42 | assert ex_info.type == SystemExit 43 | assert ex_info.value.code == int(ExitCode.success) 44 | 45 | 46 | def test_cli_arg_services(): 47 | output = io.StringIO() 48 | with pytest.raises(SystemExit) as ex_info: 49 | with redirect_stdout(output): 50 | plac.call(main, ['-l']) 51 | 52 | assert output.getvalue().startswith('Known services') 53 | assert ex_info.type == SystemExit 54 | assert ex_info.value.code == int(ExitCode.success) 55 | -------------------------------------------------------------------------------- /tests/test_exit_codes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import sys 4 | from time import time 5 | 6 | try: 7 | thisdir = os.path.dirname(os.path.abspath(__file__)) 8 | sys.path.append(os.path.join(thisdir, '..')) 9 | except: 10 | sys.path.append('..') 11 | 12 | from handprint.exit_codes import * 13 | 14 | def test_exceptions(): 15 | assert int(ExitCode.success) == 0 16 | assert ExitCode.success.meaning == "success -- program completed normally" 17 | -------------------------------------------------------------------------------- /tests/test_images.py: -------------------------------------------------------------------------------- 1 | from commonpy.file_utils import delete_existing 2 | import os 3 | import os.path 4 | import pytest 5 | import sys 6 | import tempfile 7 | from time import time 8 | 9 | try: 10 | thisdir = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(thisdir, '..')) 12 | except: 13 | sys.path.append('..') 14 | 15 | from handprint.images import * 16 | 17 | 18 | def test_format_name(): 19 | assert canonical_format_name('jpeg') == 'jpeg' 20 | assert canonical_format_name('jpg') == 'jpeg' 21 | assert canonical_format_name('TIF') == 'tiff' 22 | 23 | 24 | def test_image_size(): 25 | thisdir = path.dirname(os.path.abspath(__file__)) 26 | assert image_size(path.join(thisdir, 'data', 'fragments', 'f1.png')) == 15553 27 | assert image_size(path.join(thisdir, 'data', 'fragments', 'f2.png')) == 8613 28 | 29 | 30 | def test_image_dimensions(): 31 | thisdir = path.dirname(os.path.abspath(__file__)) 32 | assert image_dimensions(path.join(thisdir, 'data', 'fragments', 'f1.png')) == (340, 106) 33 | assert image_dimensions(path.join(thisdir, 'data', 'fragments', 'f2.png')) == (228, 60) 34 | 35 | 36 | def test_reduced_image_size(): 37 | _, tmpfile = tempfile.mkstemp(dir = '/tmp', suffix = '.png') 38 | thisdir = path.dirname(os.path.abspath(__file__)) 39 | f1_file = path.join(thisdir, 'data', 'fragments', 'f1.png') 40 | (a, b) = reduced_image_size(f1_file, tmpfile, 1000) 41 | assert isinstance(a, str) 42 | assert b is None 43 | assert image_dimensions(tmpfile) == (22, 7) 44 | delete_existing(tmpfile) 45 | 46 | 47 | def test_reduced_image_dimensions(): 48 | _, tmpfile = tempfile.mkstemp(dir = '/tmp', suffix = '.png') 49 | thisdir = path.dirname(os.path.abspath(__file__)) 50 | f1_file = path.join(thisdir, 'data', 'fragments', 'f1.png') 51 | (a, b) = reduced_image_dimensions(f1_file, tmpfile, 100, 100) 52 | assert isinstance(a, str) 53 | assert b is None 54 | assert image_dimensions(tmpfile) == (100, 31) 55 | delete_existing(tmpfile) 56 | 57 | 58 | def test_converted_image(): 59 | _, tmpfile = tempfile.mkstemp(dir = '/tmp', suffix = '.tiff') 60 | thisdir = path.dirname(os.path.abspath(__file__)) 61 | f1_file = path.join(thisdir, 'data', 'fragments', 'f1.png') 62 | (a, b) = converted_image(f1_file, 'tif', tmpfile) 63 | assert isinstance(a, str) 64 | assert b is None 65 | delete_existing(tmpfile) 66 | --------------------------------------------------------------------------------