├── .circleci
    ├── config.for-lab.yml
    └── config.yml
├── .dockerignore
├── .gitattributes
├── .gitignore
├── .pylintrc
├── Dockerfile
├── README.md
├── admin
    ├── endpoint_tester
    │   ├── app.py
    │   ├── endpoints.txt
    │   ├── images.txt
    │   ├── inv_images.txt
    │   ├── make_paths_txt.py
    │   ├── remote_images.txt
    │   └── run_siege.sh
    ├── handwriting_data_gathering
    │   ├── output_markdown.py
    │   ├── print.pdf
    │   ├── run.sh
    │   └── template.md
    ├── outstanding_tasks.md
    ├── readme.md
    ├── tasks
    │   ├── lab_specific_files.yml
    │   ├── print_repo_structure.sh
    │   ├── subset_repo_for_labs.py
    │   └── subset_repo_for_labs.sh
    └── wandb_hub
    │   ├── Dockerfile
    │   └── readme.md
├── api
    ├── Dockerfile
    ├── __init__.py
    ├── app.py
    ├── requirements.txt
    └── tests
    │   └── test_app.py
├── data
    └── raw
    │   ├── emnist
    │       ├── metadata.toml
    │       └── readme.md
    │   ├── fsdl_handwriting
    │       ├── fsdl_handwriting.json
    │       ├── metadata.toml
    │       └── readme.md
    │   └── iam
    │       ├── metadata.toml
    │       └── readme.md
├── environment.yml
├── evaluation
    ├── evaluate_character_predictor.py
    └── evaluate_line_predictor.py
├── instructions
    ├── editor.md
    ├── lab1.md
    ├── lab2.md
    ├── lab3.md
    ├── lab4.md
    ├── lab5.md
    ├── lab6.md
    ├── lab7.md
    ├── lab8.md
    ├── lab8_notes.md
    ├── lab9.md
    ├── lab9_aws_and_monitoring.md
    ├── project_structure.md
    ├── readme.md
    ├── setup.md
    └── setup_extra.md
├── notebooks
    ├── 01-look-at-emnist.ipynb
    ├── 01b-cnn-for-emnist.ipynb
    ├── 02-look-at-emnist-lines.ipynb
    ├── 02b-cnn-for-simple-emnist-lines.ipynb
    ├── 03-look-at-iam-lines.ipynb
    ├── 04-look-at-iam-paragraphs.ipynb
    ├── 04b-look-at-line-detector-predictions.ipynb
    ├── 05-look-at-fsdl-handwriting.ipynb
    ├── archive
    │   ├── 00-download-emnist.ipynb
    │   ├── 02-train-emnist-mlp.ipynb
    │   ├── 04-line-cnn.ipynb
    │   ├── 05-line-lstm.ipynb
    │   ├── 06-line-lstm-with-ctc.ipynb
    │   ├── xx-all-in-one.ipynb
    │   ├── xx-ctc-loss.ipynb
    │   ├── xx-fc-vs-1x1.ipynb
    │   └── xx-re-pad-iam-lines.ipynb
    ├── future_work
    │   └── 03b-generate-iam-lines.ipynb
    └── line detection experiments
    │   ├── 0-iam-pages.ipynb
    │   ├── 4-experimenting-with-line-prediction-on-synthetic-data.ipynb
    │   ├── 5-predicting-iam-line-locations.ipynb
    │   ├── 5b-prediction-mse.ipynb
    │   ├── 5c-prediction-just-y-coordinates.ipynb
    │   ├── 5d-predicting-iam-line-locations-dual-channel-unet.ipynb
    │   └── paragraph_text_recognizer_debug.ipynb
├── pyproject.toml
├── requirements-dev.in
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── setup.cfg
├── tasks
    ├── build_api_docker.sh
    ├── clean.sh
    ├── format.sh
    ├── lint.sh
    ├── prepare_sample_experiments.sh
    ├── run_api_docker.sh
    ├── sync_requirements.sh
    ├── test_api.sh
    ├── test_functionality.sh
    ├── test_validation.sh
    ├── train_character_predictor.sh
    ├── train_line_detector.sh
    ├── train_lstm_line_predictor.sh
    ├── train_lstm_line_predictor_on_iam.sh
    ├── update_fsdl_paragraphs_metadata.sh
    └── update_requirements.sh
├── text_recognizer
    ├── __init__.py
    ├── character_predictor.py
    ├── datasets
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── dataset_sequence.py
    │   ├── emnist_dataset.py
    │   ├── emnist_essentials.json
    │   ├── emnist_lines_dataset.py
    │   ├── fsdl_handwriting_dataset.py
    │   ├── iam_dataset.py
    │   ├── iam_lines_dataset.py
    │   ├── iam_paragraphs_dataset.py
    │   └── sentence_generator.py
    ├── line_predictor.py
    ├── models
    │   ├── __init__.py
    │   ├── base.py
    │   ├── character_model.py
    │   ├── line_detector_model.py
    │   ├── line_model.py
    │   └── line_model_ctc.py
    ├── networks
    │   ├── __init__.py
    │   ├── ctc.py
    │   ├── fcn.py
    │   ├── lenet.py
    │   ├── line_cnn_all_conv.py
    │   ├── line_lstm_ctc.py
    │   ├── misc.py
    │   └── mlp.py
    ├── paragraph_text_recognizer.py
    ├── tests
    │   ├── __init__.py
    │   ├── support
    │   │   ├── create_emnist_lines_support_files.py
    │   │   ├── create_emnist_support_files.py
    │   │   ├── create_iam_lines_support_files.py
    │   │   ├── emnist
    │   │   │   ├── 8.png
    │   │   │   ├── U.png
    │   │   │   └── e.png
    │   │   ├── emnist_lines
    │   │   │   ├── Corsi left for.png
    │   │   │   ├── do that In.png
    │   │   │   └── or if used the results.png
    │   │   ├── iam_lines
    │   │   │   ├── He rose from his breakfast-nook bench.png
    │   │   │   ├── and came into the livingroom, where.png
    │   │   │   └── his entrance. He came, almost falling.png
    │   │   └── iam_paragraphs
    │   │   │   └── a01-000u-cropped.jpg
    │   ├── test_character_predictor.py
    │   ├── test_line_predictor.py
    │   └── test_paragraph_text_recognizer.py
    ├── util.py
    └── weights
    │   ├── CharacterModel_EmnistDataset_mlp_weights.h5
    │   ├── LineDetectorModel_IamParagraphsDataset_fcn_weights.h5
    │   ├── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5
    │   └── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5
├── training
    ├── __init__.py
    ├── experiments
    │   ├── cnn.json
    │   ├── lstm_ctc.json
    │   └── sample.json
    ├── gpu_manager.py
    ├── prepare_experiments.py
    ├── run_experiment.py
    ├── run_sweep.py
    ├── sweep_emnist.yaml
    ├── sweep_iam.yaml
    ├── update_metadata.py
    └── util.py
└── wandb
    └── settings


/.circleci/config.for-lab.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   build:
 8 |     docker:
 9 |       - image: circleci/python:3.7
10 | 
11 |     steps:
12 |       - checkout
13 | 
14 |       - restore_cache:
15 |           keys:
16 |           - cache-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
17 | 
18 |       - run:
19 |           name: Install Git LFS
20 |           command: |
21 |             curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
22 |             sudo apt-get install git-lfs
23 |             git lfs install
24 | 
25 |       - run:
26 |           name: Pull LFS Files
27 |           command: git lfs pull
28 | 
29 |       - run:
30 |           name: Install Shellcheck
31 |           command: |
32 |             curl -OL https://storage.googleapis.com/shellcheck/shellcheck-stable.linux.x86_64.tar.xz
33 |             tar xf shellcheck-stable.linux.x86_64.tar.xz
34 |             sudo mv shellcheck-stable/shellcheck /usr/local/bin
35 |           working_directory: /tmp/shellcheck
36 | 
37 |       - run:
38 |           name: install dependencies
39 |           command: |
40 |             sed -i 's/tensorflow==/tensorflow-cpu==/' requirements.txt
41 |             pip install -r requirements.txt
42 |             pip install -r requirements-dev.txt
43 | 
44 |       - save_cache:
45 |           key: cache-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
46 |           paths:
47 |             - ~/.local
48 | 
49 |       - run:
50 |           name: run linting
51 |           when: always
52 |           command: |
53 |             cd lab8 && PYTHONPATH=. ./tasks/lint.sh
54 | 
55 |       - run:
56 |           name: run prediction tests
57 |           when: always
58 |           command: |
59 |             cd lab8 && PYTHONPATH=. pytest -s text_recognizer/tests/*
60 | 
61 |       - run:
62 |           name: run evaluation tests
63 |           command: |
64 |             cd lab8 && PYTHONPATH=. pytest -s evaluation/*
65 | 
66 |       - store_artifacts:
67 |           path: test-reports
68 |           destination: test-reports
69 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   build:
 8 |     docker:
 9 |       - image: circleci/python:3.7
10 | 
11 |     steps:
12 |       - checkout
13 | 
14 |       - restore_cache:
15 |           keys:
16 |           - cache-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
17 | 
18 |       - run:
19 |           name: Install Git LFS
20 |           command: |
21 |             curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
22 |             sudo apt-get install git-lfs
23 |             git lfs install
24 | 
25 |       - run:
26 |           name: Pull LFS Files
27 |           command: git lfs pull
28 | 
29 |       - run:
30 |           name: Install Shellcheck
31 |           command: |
32 |             curl -OL https://storage.googleapis.com/shellcheck/shellcheck-stable.linux.x86_64.tar.xz
33 |             tar xf shellcheck-stable.linux.x86_64.tar.xz
34 |             sudo mv shellcheck-stable/shellcheck /usr/local/bin
35 |           working_directory: /tmp/shellcheck
36 | 
37 |       - run:
38 |           name: install dependencies
39 |           command: |
40 |             sed -i 's/tensorflow==/tensorflow-cpu==/' requirements.txt
41 |             pip install -r requirements.txt
42 |             pip install -r requirements-dev.txt
43 | 
44 |       - save_cache:
45 |           key: cache-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
46 |           paths:
47 |             - ~/.local
48 | 
49 |       - run:
50 |           name: run linting
51 |           when: always
52 |           command: |
53 |             PYTHONPATH=. ./tasks/lint.sh
54 | 
55 |       - run:
56 |           name: run prediction tests
57 |           when: always
58 |           command: |
59 |             PYTHONPATH=. pytest -s text_recognizer/tests/*
60 | 
61 |       - run:
62 |           name: run evaluation tests
63 |           command: |
64 |             PYTHONPATH=. pytest -s evaluation/*
65 | 
66 |       - store_artifacts:
67 |           path: test-reports
68 |           destination: test-reports
69 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | *
2 | !api
3 | !text_recognizer
4 | !requirements*
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png filter=lfs diff=lfs merge=lfs -text
2 | *.jpg filter=lfs diff=lfs merge=lfs -text
3 | *.h5 filter=lfs diff=lfs merge=lfs -text
4 | data/**/*.json filter=lfs diff=lfs merge=lfs -text
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Data
 2 | data/processed
 3 | data/interim
 4 | data/raw/emnist/matlab*
 5 | data/raw/fsdl_handwriting/pages
 6 | data/raw/iam/iamdb
 7 | data/raw/iam/iamdb.zip
 8 | data/raw/nltk
 9 | 
10 | # Editors
11 | .vscode
12 | 
13 | # Node
14 | node_modules
15 | 
16 | # Python
17 | __pycache__
18 | .pytest_cache
19 | .ipynb_checkpoints
20 | 
21 | # Distribution / packaging
22 | .Python
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | 
39 | # W&B
40 | wandb-debug.log
41 | wandb/*
42 | !wandb/settings
43 | 
44 | # Misc
45 | .DS_Store
46 | _labs
47 | logs
48 | .mypy_cache
49 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MESSAGES CONTROL]
 2 | 
 3 | # Disable the message(s) with the given id(s).
 4 | # E1130 - invalid-unary-operand-type false positive https://github.com/PyCQA/pylint/issues/1472
 5 | # E1136 - unsubscriptable (unsubscriptable-object) - Pylint is failing to infer correct type from astroid https://github.com/PyCQA/pylint/issues/2849
 6 | # R0801 - similar lines across files
 7 | # W0511 - TODO comments
 8 | # W1202 - logging-format-interpolation - Behavior barring fstrings in logging https://github.com/PyCQA/pylint/issues/2395
 9 | # missing-function-dosctring: docstyle handles
10 | # bad-continuation: disagrees with black formatter
11 | disable=E1130,E1136,R0801,W0511,W1202,missing-function-docstring,bad-continuation
12 | # LAST AUDITED: 2019-01-09
13 | 
14 | [MASTER]
15 | 
16 | # A comma-separated list of package or module names from where C extensions may
17 | # be loaded. Extensions are loading into the active Python interpreter and may
18 | # run arbitrary code
19 | extension-pkg-whitelist=numpy
20 | 
21 | [TYPECHECK]
22 | 
23 | # List of module names for which member attributes should not be checked
24 | # (useful for modules/projects where namespaces are manipulated during runtime
25 | # and thus existing member attributes cannot be deduced by static analysis. It
26 | # supports qualified module names, as well as Unix pattern matching.
27 | ignored-modules=cv2,numpy,tensorflow
28 | 
29 | # List of classes names for which member attributes should not be checked
30 | # (useful for classes with attributes dynamically set). This supports can work
31 | # with qualified names.
32 | ignored-classes=cv2,numpy,tensorflow
33 | 
34 | [BASIC]
35 | 
36 | # Good variable names which should always be accepted, separated by a comma
37 | good-names = _, e, f, fn, i, j, k, n, N, m, M, D, p, t, v, x, X, y, Y, w, h, W, H, x1, x2, y1, y2, ax, df
38 | 
39 | # Regular expression which should only match correct function names
40 | function-rgx=[a-z_][a-z0-9_]{2,70}$
41 | 
42 | # Regular expression which should only match correct method names
43 | method-rgx=[a-z_][a-z0-9_]{2,70}$
44 | 
45 | [FORMAT]
46 | 
47 | # Maximum number of characters on a single line.
48 | max-line-length = 120
49 | 
50 | [DESIGN]
51 | # Minimum number of public methods for a class (see R0903).
52 | min-public-methods = 0
53 | 
54 | # Maximum number of attributes for a class (see R0902).
55 | max-attributes = 15
56 | 
57 | max-locals = 18
58 | 
59 | max-args = 8
60 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 | 
3 | RUN apt-get update
4 | RUN apt-get install -y python3-pip
5 | RUN pip3 install --upgrade pip
6 | RUN pip install -r requirements.txt
7 | RUN pip install -r requirements-dev.txt
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FSDL Text Recognizer Project
2 | 
3 | This is the admin version of the Full Stack Deep Learning project.
4 | 
5 | The instructions that students will see start in [Lab 1 Instructions](instructions/lab1.md).
6 | 
7 | To create the `fsdl-text-recognizer-project` directory, with files subsetted into labs and lab solutions, run `admin/tasks/subset_repo_for_labs.sh`.
8 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import base64
 4 | import glob
 5 | 
 6 | import grequests
 7 | 
 8 | NUM_CALLS = 500  # per each HTTP method
 9 | TIMEOUT = 2.0
10 | LOCAL_IMAGE_GLOB = "../text_recognizer/tests/support/emnist/*.png"
11 | ENDPOINTS_FILE = "./endpoints.txt"
12 | IMAGE_URLS_FILE = "./remote_images.txt"
13 | 
14 | 
15 | def url_for_get(api_url, img_url):
16 |     """Returns a url suitable for testing GET."""
17 |     return "%s?image_url=%s" % (api_url.strip("/"), img_url)
18 | 
19 | 
20 | def data_for_post(api_url, img_path):
21 |     """Returns data param for testing POST."""
22 |     with open(img_path, "rb") as f:
23 |         text = base64.b64encode(f.read()).decode("ascii")
24 |     return {"image": "data:image/png;base64,'%s'" % text}
25 | 
26 | 
27 | def build_get_calls(api_url, img_urls):
28 |     """Returns frozen GET calls."""
29 |     return [grequests.get(url_for_get(api_url, img_url), timeout=TIMEOUT) for img_url in img_urls]
30 | 
31 | 
32 | def build_post_calls(api_url, local_images):
33 |     """Returns frozen POST calls."""
34 |     return [
35 |         grequests.post(api_url, data=data_for_post(api_url, img_path), timeout=TIMEOUT) for img_path in local_images
36 |     ]
37 | 
38 | 
39 | def main():
40 |     """Reads the files and runs everything."""
41 |     with open(ENDPOINTS_FILE) as endpoints_file:
42 |         endpoints = [x.strip() for x in endpoints_file.readlines()]
43 |     with open(IMAGE_URLS_FILE) as image_urls_file:
44 |         remote_image_urls = [x.strip() for x in image_urls_file.readlines()]
45 |     local_images = glob.glob(LOCAL_IMAGE_GLOB)
46 | 
47 |     # build set of roughly 200 calls
48 |     stuff = []
49 |     for url in endpoints:
50 |         stuff.extend(build_get_calls(url, remote_image_urls))
51 |         stuff.extend(build_post_calls(url, local_images))
52 |     stuff *= int(200 / len(stuff))
53 | 
54 |     good = 0
55 |     total = 0
56 |     while True:
57 |         responses = grequests.map(stuff)
58 |         total += len(stuff)
59 |         good += len(stuff) - responses.count(None)
60 |         b = "%s of %s completed." % (good, total)
61 |         print(b, end="\r")
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/images.txt:
--------------------------------------------------------------------------------
 1 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png
 2 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Cousin%2BElecs.png
 3 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Vic%2Btheater%2BSaturday%2Bafternoon.png
 4 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Yesterday%2Bit%2Boffered.png
 5 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/and%2Bthe%2Bpiston%2Bis%2Bin%2Btop.png
 6 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/distributions.png
 7 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/do%2Bthat%2BIn.png
 8 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/far%2Bas%2Bto%2Bsay.png
 9 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/force%2Brequirements%2Band%2Bas%2Bit.png
10 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/he%2Bhad%2Blittle%2Btolerance%2Bfor%2BWhigs.png
11 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/of%2B120degrees%2B160degreesF%2B490.png
12 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png
13 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/right%2BYour.png
14 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/secretaries.png
15 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the%2Babsence%2Bof%2Bthe%2Bhymen%2Bis%2Bby%2Bno.png
16 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the.png
17 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/to%2Bthe%2Bmarket.png
18 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/velocity%2Bis%2Bknown%2BCook%2Band.png
19 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/what.png
20 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/who.png
21 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/inv_images.txt:
--------------------------------------------------------------------------------
 1 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Corsi%2Bleft%2Bfor_inv.png
 2 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Cousin%2BElecs_inv.png
 3 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Vic%2Btheater%2BSaturday%2Bafternoon_inv.png
 4 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Yesterday%2Bit%2Boffered_inv.png
 5 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/and%2Bthe%2Bpiston%2Bis%2Bin%2Btop_inv.png
 6 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/distributions_inv.png
 7 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/do%2Bthat%2BIn_inv.png
 8 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/far%2Bas%2Bto%2Bsay_inv.png
 9 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/force%2Brequirements%2Band%2Bas%2Bit_inv.png
10 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/he%2Bhad%2Blittle%2Btolerance%2Bfor%2BWhigs_inv.png
11 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/of%2B120degrees%2B160degreesF%2B490_inv.png
12 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults_inv.png
13 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/right%2BYour_inv.png
14 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/secretaries_inv.png
15 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the%2Babsence%2Bof%2Bthe%2Bhymen%2Bis%2Bby%2Bno_inv.png
16 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the_inv.png
17 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/to%2Bthe%2Bmarket_inv.png
18 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/velocity%2Bis%2Bknown%2BCook%2Band_inv.png
19 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/what_inv.png
20 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/who_inv.png
21 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/make_paths_txt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | with open("endpoints.txt") as endpoints_file:
 4 |     endpoints = [x.strip() for x in endpoints_file.readlines()]
 5 | with open(sys.argv[1]) as image_urls_file:
 6 |     remote_image_urls = [x.strip() for x in image_urls_file.readlines()]
 7 | 
 8 | paths = []
 9 | for endpoint in endpoints:
10 |     for rem in remote_image_urls:
11 |         s = "{0}/v1/predict?image_url={1}".format(endpoint, rem)
12 |         paths.append(s)
13 |         print(s)
14 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/remote_images.txt:
--------------------------------------------------------------------------------
 1 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png
 2 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Corsi%20left%20for_inv.png
 3 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Cousin%20Elecs.png
 4 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Cousin%20Elecs_inv.png
 5 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Vic%20theater%20Saturday%20afternoon.png
 6 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Vic%20theater%20Saturday%20afternoon_inv.png
 7 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Yesterday%20it%20offered.png
 8 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/Yesterday%20it%20offered_inv.png
 9 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/and%20the%20piston%20is%20in%20top.png
10 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/and%20the%20piston%20is%20in%20top_inv.png
11 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/distributions.png
12 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/distributions_inv.png
13 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/do%20that%20In.png
14 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/do%20that%20In_inv.png
15 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/far%20as%20to%20say.png
16 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/far%20as%20to%20say_inv.png
17 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/force%20requirements%20and%20as%20it.png
18 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/force%20requirements%20and%20as%20it_inv.png
19 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/he%20had%20little%20tolerance%20for%20Whigs.png
20 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/he%20had%20little%20tolerance%20for%20Whigs_inv.png
21 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/of%20120degrees%20160degreesF%20490.png
22 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/of%20120degrees%20160degreesF%20490_inv.png
23 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%20if%20used%20the%20results.png
24 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%20if%20used%20the%20results_inv.png
25 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/right%20Your.png
26 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/right%20Your_inv.png
27 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/secretaries.png
28 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/secretaries_inv.png
29 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the%20absence%20of%20the%20hymen%20is%20by%20no.png
30 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the%20absence%20of%20the%20hymen%20is%20by%20no_inv.png
31 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the.png
32 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/the_inv.png
33 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/to%20the%20market.png
34 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/to%20the%20market_inv.png
35 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/velocity%20is%20known%20Cook%20and.png
36 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/velocity%20is%20known%20Cook%20and_inv.png
37 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/what.png
38 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/what_inv.png
39 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/who.png
40 | http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/who_inv.png
41 | 


--------------------------------------------------------------------------------
/admin/endpoint_tester/run_siege.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | siege -c150 -d1 -i -f paths.txt
3 | 


--------------------------------------------------------------------------------
/admin/handwriting_data_gathering/print.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer/a99a3d3f0594dfceb249a56e8362337f9e12897e/admin/handwriting_data_gathering/print.pdf


--------------------------------------------------------------------------------
/admin/handwriting_data_gathering/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "outputting markdown..."
 4 | mkdir -p mds
 5 | python output_markdown.py
 6 | 
 7 | echo "converting to pdfs..."
 8 | mkdir -p pdfs
 9 | for i in {0..13}; do pandoc "mds/$i.md" -o "pdfs/$i.pdf"; done
10 | 
11 | pdfunite pdfs/*.pdf print.pdf
12 | rm -r mds
13 | rm -r pdfs
14 | echo "print.pdf is ready to be printed!"
15 | 


--------------------------------------------------------------------------------
/admin/handwriting_data_gathering/template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | geometry: margin=1in
 3 | output: pdf_document
 4 | header-includes: |
 5 |   \definecolor{light-gray}{gray}{0.8}
 6 | ---
 7 | 
 8 | # Full Stack Deep Learning - November 2019
 9 | 
10 | ## Handwriting Data Collection
11 | 
12 | Please write the following paragraph by hand in the space below. Do not sign your name.
13 | 
14 | > {{ text }}
15 | 
16 | ---
17 | 
18 | <!-- \textcolor{light-gray}{\rule{\textwidth}{0.1pt}}
19 | 
20 | \textcolor{light-gray}{\rule{\textwidth}{0.1pt}}
21 | 
22 | \textcolor{light-gray}{\rule{\textwidth}{0.1pt}} -->
23 | 
24 | \vfill
25 | 
26 | ---
27 | 
28 | By submitting this page, you consent to your handwriting becoming part of a publicly available dataset.
29 | 
30 | Paragraph is from {{ source }}
31 | 


--------------------------------------------------------------------------------
/admin/outstanding_tasks.md:
--------------------------------------------------------------------------------
 1 | ## Next
 2 | 
 3 | - [ ] 1 go through all labs in Jupyterhub and take screenshots, putting them into the readme
 4 | - [ ] 1 make the app.py use the joint model
 5 | 
 6 | - [ ] 2 add more information to slides as preview of the important things we'll be doing
 7 |   - [ ] ability to run end-to-end from raw data, with caching along the way to speed up future runs
 8 |   - [ ] dataset streaming and augmentations (fast.ai, TFRecord)
 9 |   - [ ] specifying and recording experiments via config file
10 |   - [ ] ability to run experiments and automatically pick best model
11 |   - [ ] ability to create a deployment package in CI
12 | - [ ] 2 introduce code that picks best run from weights and biases (2 hours)
13 | - [ ] 2 add multi-gpu data parallelism option in run_experiment.py
14 | - [ ] 2 look into switching from flask to that async one in fast.ai course
15 | - [ ] 2 kick off another IAM training with ImageDataGenerator
16 | - [ ] 2 add tests for training (but don't run them in circleci)
17 | - [ ] 2 add to lab 4: output sample predictions every epoch so that they can be reviewed in weights and biases
18 | - [ ] 2 save experiment json along with weights, and just call it canonical_character_predictor_weights.h5 and canonical_character_predictor_config.json
19 |     - easiest way to implement would probably be to pass in experiment_config from run_experiment to Model#save_weights
20 | 
21 | - [ ] 3 add a notebook that uses our trained line detector on the fsdl handwriting data
22 | - [ ] 3 share pre-processing logic in predict() and fit()/evaluate()
23 | - [ ] 3 compute validation accuracy in ctc training (run decoding)
24 | 
25 | - [ ] 4 make a flag for overfitting on one batch
26 | 
27 | ## Done
28 | 
29 | - [x] 20191029 look into writing lab readme's as slides using Marp, but decided against it for now, because wasn't able to find a solution that looked good in both github readme format (and typora) and marp
30 | - [x] 20191030 1 update Pipfile
31 |   - tensorflow 1.15 seems to depend on functools32 which can't be installed for python3
32 |   - tensorflow 1.14 has the dual -gpu and not-gpu nature, which is a little annoying, but fine
33 |   - tensorflow 2.0 also has dual gpu
34 |   - python3.7 has trouble installing a dependency of wandb (forgot the name)
35 |   - settled on python3.6 and tensorflow 1.14
36 | 


--------------------------------------------------------------------------------
/admin/readme.md:
--------------------------------------------------------------------------------
 1 | # Text Recognizer Project - Admin Readme
 2 | 
 3 | ## Tasks
 4 | 
 5 | ```sh
 6 | admin/tasks/subset_repo_for_labs.py  # Creates -in _labs by default
 7 | 
 8 | admin/tasks/subset_repo_for_labs.sh # Creates in ../fsdl-text-recognition-project, which should be the public git repo
 9 | ```
10 | 
11 | Uploading data to S3 is done with `aws s3 cp data/raw/iam/iamdb.zip s3://fsdl-public-assets/iam/iamdb.zip --profile fsdl`
12 | 


--------------------------------------------------------------------------------
/admin/tasks/lab_specific_files.yml:
--------------------------------------------------------------------------------
  1 | 1:
  2 |   - notebooks/01-look-at-emnist.ipynb
  3 |   - tasks/train_character_predictor.sh
  4 |   - tasks/test_functionality.sh
  5 |   - text_recognizer/__init__.py
  6 |   - text_recognizer/util.py
  7 |   - text_recognizer/character_predictor.py
  8 |   - text_recognizer/datasets/__init__.py
  9 |   - text_recognizer/datasets/dataset.py
 10 |   - text_recognizer/datasets/emnist_dataset.py
 11 |   - text_recognizer/datasets/emnist_essentials.json
 12 |   - text_recognizer/datasets/dataset_sequence.py
 13 |   - text_recognizer/models/__init__.py
 14 |   - text_recognizer/models/base.py
 15 |   - text_recognizer/models/character_model.py
 16 |   - text_recognizer/networks/__init__.py
 17 |   - text_recognizer/networks/mlp.py
 18 |   - text_recognizer/tests/support/create_emnist_support_files.py
 19 |   - text_recognizer/tests/support/emnist/8.png
 20 |   - text_recognizer/tests/support/emnist/U.png
 21 |   - text_recognizer/tests/support/emnist/e.png
 22 |   - text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5
 23 |   - text_recognizer/tests/test_character_predictor.py
 24 |   - training/util.py
 25 |   - training/run_experiment.py
 26 |   - text_recognizer/networks/lenet.py
 27 |   - text_recognizer/networks/misc.py
 28 |   - text_recognizer/tests/support/create_emnist_lines_support_files.py
 29 |   - "text_recognizer/tests/support/emnist_lines/Corsi left for.png"
 30 |   - "text_recognizer/tests/support/emnist_lines/do that In.png"
 31 |   - "text_recognizer/tests/support/emnist_lines/or if used the results.png"
 32 | 
 33 | 2:
 34 |   - notebooks/02-look-at-emnist-lines.ipynb
 35 |   - notebooks/01b-cnn-for-emnist.ipynb
 36 |   - notebooks/02b-cnn-for-simple-emnist-lines.ipynb
 37 |   - text_recognizer/datasets/emnist_lines_dataset.py
 38 |   - text_recognizer/datasets/sentence_generator.py
 39 |   - text_recognizer/line_predictor.py
 40 |   - text_recognizer/models/line_model.py
 41 |   - text_recognizer/networks/line_cnn_all_conv.py
 42 | 
 43 | 3:
 44 |   - tasks/train_lstm_line_predictor.sh
 45 |   - text_recognizer/models/line_model_ctc.py
 46 |   - text_recognizer/networks/ctc.py
 47 |   - text_recognizer/networks/line_lstm_ctc.py
 48 |   - text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5
 49 |   - text_recognizer/tests/test_line_predictor.py
 50 | 
 51 | 4:
 52 |   - "text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png"
 53 |   - "text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png"
 54 |   - "text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png"
 55 |   - notebooks/03-look-at-iam-lines.ipynb
 56 |   - tasks/prepare_sample_experiments.sh
 57 |   - tasks/train_lstm_line_predictor_on_iam.sh
 58 |   - text_recognizer/datasets/iam_lines_dataset.py
 59 |   - text_recognizer/tests/support/create_iam_lines_support_files.py
 60 |   - text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5
 61 |   - training/experiments/sample.json
 62 |   - training/gpu_manager.py
 63 |   - training/prepare_experiments.py
 64 |   - training/run_sweep.py
 65 |   - training/sweep_emnist.yaml
 66 |   - training/sweep_iam.yaml
 67 |   - wandb/settings
 68 | 
 69 | 5:
 70 |   - notebooks/04-look-at-iam-paragraphs.ipynb
 71 |   - notebooks/04b-look-at-line-detector-predictions.ipynb
 72 |   - tasks/train_line_detector.sh
 73 |   - text_recognizer/datasets/iam_dataset.py
 74 |   - text_recognizer/datasets/iam_paragraphs_dataset.py
 75 |   - text_recognizer/models/line_detector_model.py
 76 |   - text_recognizer/networks/fcn.py
 77 |   - text_recognizer/paragraph_text_recognizer.py
 78 |   - text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg
 79 |   - text_recognizer/tests/test_paragraph_text_recognizer.py
 80 |   - text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5
 81 | 
 82 | 6:
 83 |   - notebooks/05-look-at-fsdl-handwriting.ipynb
 84 |   - tasks/update_fsdl_paragraphs_metadata.sh
 85 |   - text_recognizer/datasets/fsdl_handwriting_dataset.py
 86 |   - training/update_metadata.py
 87 | 
 88 | 7:
 89 |   - api/__init__.py
 90 |   - evaluation/evaluate_character_predictor.py
 91 |   - evaluation/evaluate_line_predictor.py
 92 |   - tasks/lint.sh
 93 |   - tasks/test_validation.sh
 94 |   - .pylintrc
 95 |   - pyproject.toml
 96 |   - setup.cfg
 97 | 
 98 | 8:
 99 |   - api/app.py
100 |   - api/tests/test_app.py
101 |   - api/Dockerfile
102 |   - tasks/build_api_docker.sh
103 |   - tasks/run_api_docker.sh
104 |   - tasks/test_api.sh
105 | 


--------------------------------------------------------------------------------
/admin/tasks/print_repo_structure.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | tree -L 3 -I "run-*|node_modules|admin"
4 | 


--------------------------------------------------------------------------------
/admin/tasks/subset_repo_for_labs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Script to generate directories (or git branches) corresponding to subsets of the repo appropriate for different labs.
  4 | 
  5 | The script creates a subset of files corresponding to labs with index less than or equal than the one given,
  6 | as specified in lab_specific_files.yml
  7 | 
  8 | Furthermore, it also strips out text between blocks like
  9 |     # Your code below (Lab1)
 10 |     # <content>
 11 |     # Your code above (Lab1)
 12 | for labs with index greater than or equal to the one given.
 13 | 
 14 | It also strips text between blocks like
 15 |     # Hide lines below until Lab 2
 16 |     # <content>
 17 |     # Hide lines above until Lab 2
 18 | for labs with index greater than the one given.
 19 | 
 20 | NOTE that the stripping is only performed on .py files.
 21 | """
 22 | from pathlib import Path
 23 | import argparse
 24 | import os
 25 | import glob
 26 | import re
 27 | import shutil
 28 | 
 29 | import yaml
 30 | 
 31 | MAX_LAB_NUMBER = 9  # NOTE: Setting this to 10 will break the regexp!
 32 | REPO_DIRNAME = Path(__file__).resolve().parents[2]
 33 | INFO_FILENAME = REPO_DIRNAME / "admin" / "tasks" / "lab_specific_files.yml"
 34 | SOLUTION_VERSION_LABS = True
 35 | 
 36 | 
 37 | def _filter_your_code_blocks(lines, lab_number):
 38 |     """
 39 |     Strip out stuff between "Your code here" blocks.
 40 |     """
 41 |     if lab_number == MAX_LAB_NUMBER:
 42 |         lab_numbers_to_strip = str(lab_number)
 43 |     else:
 44 |         lab_numbers_to_strip = f"[{'|'.join(str(num) for num in range(lab_number, MAX_LAB_NUMBER))}]"
 45 |     beginning_comment = f"# Your code below \(Lab {lab_numbers_to_strip}\)"
 46 |     ending_comment = f"# Your code above \(Lab {lab_numbers_to_strip}\)"
 47 |     filtered_lines = []
 48 |     filtering = False
 49 |     for line in lines:
 50 |         if not filtering:
 51 |             filtered_lines.append(line)
 52 |         if re.search(beginning_comment, line):
 53 |             filtering = True
 54 |             filtered_lines.append("")
 55 |         if re.search(ending_comment, line):
 56 |             filtered_lines.append(line)
 57 |             filtering = False
 58 |     return filtered_lines
 59 | 
 60 | 
 61 | def _filter_hidden_blocks(lines, lab_number):
 62 |     if lab_number == MAX_LAB_NUMBER:
 63 |         return lines
 64 |     if lab_number + 1 == MAX_LAB_NUMBER:
 65 |         lab_numbers_to_hide = str(MAX_LAB_NUMBER)
 66 |     else:
 67 |         lab_numbers_to_hide = f"[{'|'.join(str(num) for num in range(lab_number + 1, MAX_LAB_NUMBER))}]"
 68 |     beginning_comment = f"# Hide lines below until Lab {lab_numbers_to_hide}"
 69 |     ending_comment = f"# Hide lines above until Lab {lab_numbers_to_hide}"
 70 |     filtered_lines = []
 71 |     filtering = False
 72 |     for line in lines:
 73 |         if re.search(beginning_comment, line):
 74 |             filtering = True
 75 |         if re.search(ending_comment, line):
 76 |             filtering = False
 77 |             continue
 78 |         if not filtering:
 79 |             filtered_lines.append(line)
 80 |     return filtered_lines
 81 | 
 82 | 
 83 | def _replace_data_dirname(lines):
 84 |     filtered_lines = []
 85 |     for line in lines:
 86 |         if line == '        return Path(__file__).resolve().parents[2] / "data"':
 87 |             line = '        return Path(__file__).resolve().parents[3] / "data"'
 88 |         filtered_lines.append(line)
 89 |     return filtered_lines
 90 | 
 91 | 
 92 | def _copy_files_for_lab(info, lab_number, lab_output_dir):
 93 |     selected_paths = sum([info.get(number, []) for number in range(lab_number + 1)], [])
 94 |     new_paths = []
 95 |     for path in selected_paths:
 96 |         new_path = lab_output_dir / path
 97 |         new_path.parents[0].mkdir(parents=True, exist_ok=True)
 98 |         shutil.copy(path, new_path)
 99 |         new_paths.append(new_path)
100 |     return new_paths
101 | 
102 | 
103 | def _process_new_files(new_paths, lab_number, filter_your_code=True, filter_hidden=True, replace_data_dirname=True):
104 |     for path in new_paths:
105 |         if path.suffix != ".py":
106 |             continue
107 | 
108 |         with open(path) as f:
109 |             lines = f.read().split("\n")
110 | 
111 |         if filter_your_code:
112 |             lines = _filter_your_code_blocks(lines, lab_number)
113 |         if filter_hidden:
114 |             lines = _filter_hidden_blocks(lines, lab_number)
115 |         if replace_data_dirname:
116 |             lines = _replace_data_dirname(lines)
117 | 
118 |         with open(path, "w") as f:
119 |             f.write("\n".join(lines))
120 | 
121 | 
122 | def subset_repo(info, output_dirname):
123 |     """See module docstring."""
124 |     output_dir = Path(output_dirname)
125 |     if output_dir.exists():
126 |         for directory in glob.glob(f"{str(output_dir)}/lab*"):
127 |             shutil.rmtree(directory)
128 |         if os.path.exists(output_dir / "data"):
129 |             shutil.rmtree(output_dir / "data")
130 | 
131 |     output_dir.mkdir(parents=True, exist_ok=True)
132 |     shutil.copytree(REPO_DIRNAME / "data", output_dir / "data")
133 | 
134 |     shutil.copy(".gitignore", output_dir)
135 |     shutil.copy("environment.yml", output_dir)
136 |     shutil.copy("requirements.in", output_dir)
137 |     shutil.copy("requirements-dev.in", output_dir)
138 |     shutil.copy("requirements.txt", output_dir)
139 |     shutil.copy("requirements-dev.txt", output_dir)
140 |     shutil.copy("instructions/readme.md", output_dir)
141 |     shutil.copy("instructions/setup.md", output_dir)
142 | 
143 |     # Labs
144 |     for lab_number in info.keys():
145 |         lab_output_dir = output_dir / f"lab{lab_number}"
146 |         lab_output_dir.mkdir(parents=True)
147 |         new_paths = _copy_files_for_lab(info, lab_number, lab_output_dir)
148 |         _process_new_files(new_paths, lab_number, filter_your_code=(not SOLUTION_VERSION_LABS))
149 |         shutil.copy(f"instructions/lab{lab_number}.md", output_dir / f"lab{lab_number}" / "readme.md")
150 | 
151 |     (output_dir / ".circleci").mkdir(exist_ok=True)
152 |     shutil.copy(".circleci/config.for-lab.yml", output_dir / ".circleci" / "config.yml")
153 | 
154 |     if not SOLUTION_VERSION_LABS:
155 |         os.remove(output_dir / "lab1/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5")
156 |         os.remove(output_dir / "lab2/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5")
157 |         os.remove(output_dir / "lab4/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5")
158 |         os.remove(output_dir / "lab5/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5")
159 | 
160 | 
161 | def main():
162 |     parser = argparse.ArgumentParser()
163 |     parser.add_argument("--output_dirname", default="_labs", help="Where to output the lab subset directories.")
164 |     args = parser.parse_args()
165 | 
166 |     with open(INFO_FILENAME) as f:
167 |         info = yaml.full_load(f.read())
168 | 
169 |     subset_repo(info, args.output_dirname)
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     main()
174 | 


--------------------------------------------------------------------------------
/admin/tasks/subset_repo_for_labs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python admin/tasks/subset_repo_for_labs.py --output_dir ../fsdl-text-recognizer-project
4 | 


--------------------------------------------------------------------------------
/admin/wandb_hub/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install basics
 4 | RUN apt-get update
 5 | RUN apt-get install -y build-essential  # For being able to compile bottleneck and some other Python package
 6 | RUN apt-get install -y curl             # For downloading conda
 7 | RUN apt-get install -y git              # We need git!
 8 | RUN apt-get install -y shellcheck       # For linting
 9 | 
10 | # Install git-lfs
11 | RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && apt-get install git-lfs && git-lfs install
12 | 
13 | # Install miniconda
14 | RUN cd /tmp && curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && bash Miniconda3-latest-Linux-x86_64.sh -p ~/miniconda -b && rm Miniconda3-latest-Linux-x86_64.sh && cd -
15 | ENV PATH=$HOME/miniconda/bin:$PATH
16 | 
17 | # Install base conda packages to speed things up
18 | RUN conda update -y conda
19 | RUN conda install cudatoolkit=10.1 cudnn=7.6 pip python=3.7
20 | 
21 | # Install some heavy packages in the default conda environment simply to cache these packages and save time later
22 | RUN pip install tensorflow==2.2.0rc2 torch
23 | 


--------------------------------------------------------------------------------
/admin/wandb_hub/readme.md:
--------------------------------------------------------------------------------
 1 | ## July 31 Sync with Chris re: W&B Jupyter Hub
 2 | 
 3 | - How many CPUs and how much RAM per container?
 4 |     - shows 8 cores, 32 GB
 5 |     - limit pod to 8GB, 2CPUs, 2GPUs
 6 | - Is it possible to do 2 GPUs per container?
 7 |     - It is possible. They're running 2 GPUs per docker right now.
 8 | - Persistent space
 9 |     - Chris will turn it on
10 |     - 10GB
11 | - What should be mounted
12 |     - next docker build will start in home directory
13 |     - can set env variable to clone a repo other than ml-class
14 |         - [ ] send chris repo to clone
15 | - Github access
16 |     - should store username and password
17 | - Admin (see other sessions, etc)?
18 |     - done
19 | - Troubleshooting (how to handle frozen sessions, for example)?
20 |     - right-click in terminal, click Refresh Terminal
21 |     - if people get "invalid code" messages, have them sign up for wandb again
22 |     - [ ] send chris the schedule to pre-launch the cluster
23 | - Is it possible to run Docker inside of container? (can the container run privileged?)
24 | 
25 | https://hub.wandb.us/hub/login
26 | https://hub.wandb.us/hub/login?gpu=true
27 | 
28 | ## Things that should be set in environment
29 | 
30 | ```sh
31 | export CUDA_DEVICE_ORDER=PCI_BUS_ID
32 | export PYTHONPATH=.
33 | alias ll="ls -lh"
34 | ```
35 | 


--------------------------------------------------------------------------------
/api/Dockerfile:
--------------------------------------------------------------------------------
 1 | # The "buster" flavor of the official docker Python image is based on Debian and includes common packages.
 2 | FROM python:3.7-buster
 3 | 
 4 | # Create the working directory
 5 | RUN set -ex && mkdir /repo
 6 | WORKDIR /repo
 7 | 
 8 | # Copy only the relevant directories to the working diretory
 9 | COPY text_recognizer/ ./text_recognizer
10 | COPY api/ ./api
11 | 
12 | # Install Python dependencies
13 | RUN set -ex && pip3 install -r api/requirements.txt
14 | 
15 | # Run the web server
16 | EXPOSE 8000
17 | ENV PYTHONPATH /repo
18 | CMD python3 /repo/api/app.py
19 | 


--------------------------------------------------------------------------------
/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer/a99a3d3f0594dfceb249a56e8362337f9e12897e/api/__init__.py


--------------------------------------------------------------------------------
/api/app.py:
--------------------------------------------------------------------------------
 1 | """Flask web server serving text_recognizer predictions."""
 2 | import os
 3 | 
 4 | from flask import Flask, request, jsonify
 5 | import tensorflow.keras.backend as K
 6 | 
 7 | from text_recognizer.line_predictor import LinePredictor
 8 | import text_recognizer.util as util
 9 | 
10 | os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Do not use GPU
11 | 
12 | app = Flask(__name__)  # pylint: disable=invalid-name
13 | 
14 | 
15 | @app.route("/")
16 | def index():
17 |     """Provide simple health check route."""
18 |     return "Hello, world!"
19 | 
20 | 
21 | @app.route("/v1/predict", methods=["GET", "POST"])
22 | def predict():
23 |     """Provide main prediction API route. Responds to both GET and POST requests."""
24 |     K.clear_session()
25 |     predictor = LinePredictor()
26 |     image = _load_image()
27 |     pred, conf = predictor.predict(image)
28 |     print("METRIC confidence {}".format(conf))
29 |     print("METRIC mean_intensity {}".format(image.mean()))
30 |     print("INFO pred {}".format(pred))
31 |     return jsonify({"pred": str(pred), "conf": float(conf)})
32 | 
33 | 
34 | def _load_image():
35 |     if request.method == "POST":
36 |         data = request.get_json()
37 |         if data is None:
38 |             return "no json received"
39 |         return util.read_b64_image(data["image"], grayscale=True)
40 |     if request.method == "GET":
41 |         image_url = request.args.get("image_url")
42 |         if image_url is None:
43 |             return "no image_url defined in query string"
44 |         print("INFO url {}".format(image_url))
45 |         return util.read_image(image_url, grayscale=True)
46 |     raise ValueError("Unsupported HTTP method")
47 | 
48 | 
49 | def main():
50 |     """Run the app."""
51 |     app.run(host="0.0.0.0", port=8000, debug=False)  # nosec
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/api/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | absl-py==0.9.0            # via tensorboard, tensorflow
 8 | astunparse==1.6.3         # via tensorflow
 9 | boltons==20.0.0           # via -r requirements.in
10 | cachetools==4.0.0         # via google-auth
11 | certifi==2019.11.28       # via requests
12 | chardet==3.0.4            # via requests
13 | click==7.1.1              # via flask
14 | editdistance==0.5.3       # via -r requirements.in
15 | flask==1.1.1              # via -r requirements.in
16 | gast==0.3.3               # via tensorflow
17 | google-auth-oauthlib==0.4.1  # via tensorboard
18 | google-auth==1.12.0       # via google-auth-oauthlib, tensorboard
19 | google-pasta==0.2.0       # via tensorflow
20 | grpcio==1.27.2            # via tensorboard, tensorflow
21 | h5py==2.10.0              # via -r requirements.in, tensorflow
22 | idna==2.9                 # via requests
23 | itsdangerous==1.1.0       # via flask
24 | jinja2==2.11.1            # via flask
25 | keras-preprocessing==1.1.0  # via tensorflow
26 | markdown==3.2.1           # via tensorboard
27 | markupsafe==1.1.1         # via jinja2
28 | numpy==1.18.2             # via -r requirements.in, h5py, keras-preprocessing, opencv-python-headless, opt-einsum, scipy, tensorboard, tensorflow
29 | oauthlib==3.1.0           # via requests-oauthlib
30 | opencv-python-headless==4.2.0.32  # via -r requirements.in
31 | opt-einsum==3.2.0         # via tensorflow
32 | protobuf==3.11.3          # via tensorboard, tensorflow
33 | pyasn1-modules==0.2.8     # via google-auth
34 | pyasn1==0.4.8             # via pyasn1-modules, rsa
35 | requests-oauthlib==1.3.0  # via google-auth-oauthlib
36 | requests==2.23.0          # via -r requirements.in, requests-oauthlib, tensorboard
37 | rsa==4.0                  # via google-auth
38 | scipy==1.4.1              # via tensorflow
39 | six==1.14.0               # via absl-py, astunparse, google-auth, google-pasta, grpcio, h5py, keras-preprocessing, protobuf, tensorboard, tensorflow
40 | tensorboard-plugin-wit==1.6.0.post2  # via tensorboard
41 | tensorboard==2.2.0        # via tensorflow
42 | tensorflow-estimator==2.2.0rc0  # via tensorflow
43 | tensorflow-cpu==2.2.0rc2      # via -r requirements.in
44 | termcolor==1.1.0          # via tensorflow
45 | toml==0.10.0              # via -r requirements.in
46 | tqdm==4.44.1              # via -r requirements.in
47 | urllib3==1.25.8           # via requests
48 | werkzeug==1.0.0           # via flask, tensorboard
49 | wheel==0.34.2             # via astunparse, tensorboard, tensorflow
50 | wrapt==1.11.2             # via -r requirements.in, tensorflow
51 | 
52 | # The following packages are considered to be unsafe in a requirements file:
53 | # setuptools
54 | 


--------------------------------------------------------------------------------
/api/tests/test_app.py:
--------------------------------------------------------------------------------
 1 | """Tests for web app."""
 2 | import os
 3 | from pathlib import Path
 4 | from unittest import TestCase
 5 | import base64
 6 | 
 7 | from api.app import app
 8 | 
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
10 | 
11 | REPO_DIRNAME = Path(__file__).parents[2].resolve()
12 | # SUPPORT_DIRNAME = REPO_DIRNAME / 'text_recognizer' / 'tests' / 'support' / 'iam_lines'
13 | SUPPORT_DIRNAME = REPO_DIRNAME / "text_recognizer" / "tests" / "support" / "emnist_lines"
14 | 
15 | 
16 | class TestIntegrations(TestCase):
17 |     def setUp(self):
18 |         self.app = app.test_client()
19 | 
20 |     def test_index(self):
21 |         response = self.app.get("/")
22 |         assert response.get_data().decode() == "Hello, world!"
23 | 
24 |     def test_predict(self):
25 |         with open(SUPPORT_DIRNAME / "or if used the results.png", "rb") as f:
26 |             b64_image = base64.b64encode(f.read())
27 |         response = self.app.post("/v1/predict", json={"image": f"data:image/jpeg;base64,{b64_image.decode()}"})
28 |         json_data = response.get_json()
29 |         self.assertEqual(json_data["pred"], "or if used the resuits")
30 | 


--------------------------------------------------------------------------------
/data/raw/emnist/metadata.toml:
--------------------------------------------------------------------------------
1 | filename = 'matlab.zip'
2 | sha256 = 'e1fa805cdeae699a52da0b77c2db17f6feb77eed125f9b45c022e7990444df95'
3 | url = 'https://s3-us-west-2.amazonaws.com/fsdl-public-assets/matlab.zip'
4 | 


--------------------------------------------------------------------------------
/data/raw/emnist/readme.md:
--------------------------------------------------------------------------------
 1 | # EMNIST dataset
 2 | 
 3 | The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19
 4 | and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset."
 5 | From https://www.nist.gov/itl/iad/image-group/emnist-dataset
 6 | 
 7 | Original url is http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/matlab.zip
 8 | 
 9 | We uploaded the same file to our S3 bucket for faster download.
10 | 


--------------------------------------------------------------------------------
/data/raw/fsdl_handwriting/fsdl_handwriting.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:720d6c72b4317a9a5492630a1c9f6d83a20d36101a29311a5cf7825c1d60c180
3 | size 170325
4 | 


--------------------------------------------------------------------------------
/data/raw/fsdl_handwriting/metadata.toml:
--------------------------------------------------------------------------------
1 | url = "https://dataturks.com/projects/sergeykarayev/fsdl_handwriting/export"
2 | filename = "fsdl_handwriting.json"
3 | sha256 = "720d6c72b4317a9a5492630a1c9f6d83a20d36101a29311a5cf7825c1d60c180"
4 | 


--------------------------------------------------------------------------------
/data/raw/fsdl_handwriting/readme.md:
--------------------------------------------------------------------------------
1 | # FSDL Handwriting Dataset
2 | 
3 | Handwritten paragraphs generated in the FSDL March 2019 class and annotated using the DataTurks UX.
4 | 
5 | Export via manual download on https://dataturks.com/projects/sergeykarayev/fsdl_handwriting/export
6 | 


--------------------------------------------------------------------------------
/data/raw/iam/metadata.toml:
--------------------------------------------------------------------------------
1 | url = 'https://s3-us-west-2.amazonaws.com/fsdl-public-assets/iam/iamdb.zip'
2 | filename = 'iamdb.zip'
3 | sha256 = 'f3c9e87a88a313e557c6d3548ed8a2a1af2dc3c4a678c5f3fc6f972ba4a50c55'
4 | 


--------------------------------------------------------------------------------
/data/raw/iam/readme.md:
--------------------------------------------------------------------------------
 1 | # IAM Dataset
 2 | 
 3 | The IAM Handwriting Database contains forms of handwritten English text which can be used to train and test handwritten text recognizers and to perform writer identification and verification experiments.
 4 | 
 5 | - 657 writers contributed samples of their handwriting
 6 | - 1,539 pages of scanned text
 7 | - 13,353 isolated and labeled text lines
 8 | 
 9 | - http://www.fki.inf.unibe.ch/databases/iam-handwriting-database
10 | 
11 | ## Pre-processing
12 | 
13 | First, all forms were placed into one directory called `forms`, from original directories like `formsA-D`.
14 | 
15 | To save space, I converted the original PNG files to JPG, and resized them to half-size
16 | ```
17 | mkdir forms-resized
18 | cd forms
19 | ls -1 *.png | parallel --eta -j 6 convert '{}' -adaptive-resize 50% '../forms-resized/{.}.jpg'
20 | ```
21 | 
22 | ## Split
23 | 
24 | The data split we will use is
25 | IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines.
26 | 
27 | - The validation set has been merged into the train set.
28 | - The train set has 7,101 lines from 326 writers.
29 | - The test set has 1,861 lines from 128 writers.
30 | - The text lines of all data sets are mutually exclusive, thus each writer has contributed to one set only.
31 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: fsdl-text-recognizer
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.7
 6 |   - cudatoolkit=10.1
 7 |   - cudnn=7.6
 8 |   - pip
 9 |   - pip:
10 |     - pip-tools
11 | 


--------------------------------------------------------------------------------
/evaluation/evaluate_character_predictor.py:
--------------------------------------------------------------------------------
 1 | """Run validation test for CharacterPredictor."""
 2 | import os
 3 | from pathlib import Path
 4 | from time import time
 5 | import unittest
 6 | 
 7 | from text_recognizer.datasets import EmnistDataset
 8 | from text_recognizer.character_predictor import CharacterPredictor
 9 | 
10 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
11 | 
12 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist"
13 | 
14 | 
15 | class TestEvaluateCharacterPredictor(unittest.TestCase):
16 |     def test_evaluate(self):
17 |         predictor = CharacterPredictor()
18 |         dataset = EmnistDataset()
19 |         dataset.load_or_generate_data()
20 |         t = time()
21 |         metric = predictor.evaluate(dataset)
22 |         time_taken = time() - t
23 |         print(f"acc: {metric}, time_taken: {time_taken}")
24 |         self.assertGreater(metric, 0.6)
25 |         self.assertLess(time_taken, 10)
26 | 


--------------------------------------------------------------------------------
/evaluation/evaluate_line_predictor.py:
--------------------------------------------------------------------------------
 1 | """Run validation test for LinePredictor."""
 2 | import os
 3 | from pathlib import Path
 4 | from time import time
 5 | import unittest
 6 | 
 7 | from text_recognizer.datasets import EmnistLinesDataset
 8 | from text_recognizer.datasets import IamLinesDataset
 9 | from text_recognizer.line_predictor import LinePredictor
10 | 
11 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
12 | 
13 | EMNIST_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist_lines"
14 | IAM_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_lines"
15 | 
16 | 
17 | class TestEvaluateLinePredictorEmnist(unittest.TestCase):
18 |     def test_evaluate(self):
19 |         predictor = LinePredictor(EmnistLinesDataset)
20 |         dataset = EmnistLinesDataset()
21 | 
22 |         dataset.load_or_generate_data()
23 | 
24 |         t = time()
25 |         metric = predictor.evaluate(dataset)
26 |         time_taken = time() - t
27 | 
28 |         print(f"acc: {metric}, time_taken: {time_taken}")
29 |         self.assertGreater(metric, 0.6)
30 |         self.assertLess(time_taken, 120)
31 | 
32 | 
33 | class TestEvaluateLinePredictorIam(unittest.TestCase):
34 |     def test_evaluate(self):
35 |         predictor = LinePredictor(IamLinesDataset)
36 |         dataset = IamLinesDataset()
37 | 
38 |         dataset.load_or_generate_data()
39 | 
40 |         t = time()
41 |         metric = predictor.evaluate(dataset)
42 |         time_taken = time() - t
43 | 
44 |         print(f"acc: {metric}, time_taken: {time_taken}")
45 |         self.assertGreater(metric, 0.6)
46 |         self.assertLess(time_taken, 180)
47 | 


--------------------------------------------------------------------------------
/instructions/editor.md:
--------------------------------------------------------------------------------
 1 | # Setting up editor
 2 | 
 3 | ## VSCode
 4 | 
 5 | There are two things you want to make sure of when using VSCode: 1) that it uses the right environment, and 2) that it lints your files as you work.
 6 | 
 7 | Here is my setup for linting:
 8 | 
 9 | ```
10 | {
11 |   "editor.rulers": [120],
12 |   "files.exclude": {
13 |     "**/.git": true,
14 |     "**/.DS_Store": true,
15 |     "**/__pycache__": true,
16 |     "**/.pytest_cache": true,
17 |     "**/.mypy_cache": true
18 |   },
19 |   "python.linting.pep8Enabled": true,
20 |   "python.linting.pep8Path": "pycodestyle",
21 |   "python.linting.pylintEnabled": true,
22 |   "python.linting.mypyEnabled": true,
23 |   "python.linting.banditEnabled": true,
24 |   "python.linting.banditArgs": ["-ll"],
25 |   "python.linting.enabled": true,
26 |   "[python]": {
27 |     "editor.tabSize": 4
28 |   },
29 | }
30 | 
31 | ```
32 | 


--------------------------------------------------------------------------------
/instructions/lab1.md:
--------------------------------------------------------------------------------
 1 | # Lab 1: Single-character prediction
 2 | 
 3 | ## Before you begin, make sure to set up!
 4 | 
 5 | Please complete [Lab Setup](/setup.md) before proceeding!
 6 | 
 7 | ## Goal of the lab
 8 | 
 9 | Train a model to solve a simplified version of the line text recognition problem.
10 | 
11 | ## Outline
12 | 
13 | - Intro to EMNIST, a character prediction dataset.
14 | - Explore the `networks` and `training` code.
15 | - Train simple MLP/CNN baselines to solve EMNIST.
16 | - Test your model.
17 | 
18 | ## Follow along
19 | 
20 | ```
21 | git pull
22 | cd lab1/
23 | ```
24 | 
25 | ## Intro to EMNIST
26 | 
27 | - EMNIST = Extended Mini-NIST :)
28 | - All English letters and digits presented in the MNIST format.
29 | - Look at: `notebooks/01-look-at-emnist.ipynb`
30 | 
31 | ## Networks and training code
32 | 
33 | ```
34 | - text_recognizer/networks/mlp.py
35 | - text_recognizer/networks/lenet.py
36 | - text_recognizer/models/base.py
37 | - text_recognizer/models/character_model.py
38 | - training/util.py
39 | ```
40 | 
41 | ## Train MLP and CNN
42 | 
43 | You can run the shortcut command `tasks/train_character_predictor.sh`, which runs the following:
44 | 
45 | ```sh
46 | training/run_experiment.py --save \
47 |   '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp",  "train_args": {"batch_size": 256}}'
48 | ```
49 | 
50 | It will take a couple of minutes to train your model.
51 | 
52 | Just for fun, you could also try a larger MLP, with a smaller batch size:
53 | 
54 | ```sh
55 | training/run_experiment.py \
56 |   '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "network_args": {"num_layers": 8}, "train_args": {"batch_size": 128}}'
57 | ```
58 | 
59 | ## Testing
60 | 
61 | First, let's take a look at how the test works at
62 | 
63 | ```
64 | text_recognizer/tests/test_character_predictor.py
65 | ```
66 | 
67 | Now let's see if it works by running:
68 | 
69 | ```sh
70 | pytest -s text_recognizer/tests/test_character_predictor.py
71 | ```
72 | 
73 | Or, use the shorthand `tasks/test_functionality.sh`
74 | 
75 | Testing should finish quickly.
76 | 


--------------------------------------------------------------------------------
/instructions/lab2.md:
--------------------------------------------------------------------------------
 1 | # Lab 2: Convolutional Nets
 2 | 
 3 | ## Goal of the lab
 4 | 
 5 | - Use a simple convolutional network to recognize EMNIST characters.
 6 | - Construct a synthetic dataset of EMNIST lines.
 7 | - Move from reading single characters to reading lines.
 8 | 
 9 | ## Follow along
10 | 
11 | ```
12 | git pull
13 | cd lab2
14 | ```
15 | 
16 | ## Using a convolutional network for recognizing EMNIST characters
17 | 
18 | We left off in Lab 1 having trained an MLP model on the EMNIST characters dataset.
19 | 
20 | Let's also train a CNN on the same task.
21 | We can start in the notebook `notebooks/01b-cnn-for-emnist.ipynb`.
22 | 
23 | We can also run the same experiment with
24 | 
25 | ```sh
26 | training/run_experiment.py '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "lenet", "train_args": {"epochs": 1}}'
27 | ```
28 | 
29 | Training the single epoch will take about 2 minutes (that's why we only do one epoch in this lab :)).
30 | Leave it running while we go on to the next part.
31 | 
32 | ### Subsampling data
33 | 
34 | It is very useful to be able to subsample the dataset for quick experiments.
35 | This is possibe by passing `subsample_fraction=0.1` (or some other fraction) at dataset initialization, or in `dataset_args` in the `run_experiment.py` dictionary, for example:
36 | 
37 | ```sh
38 | training/run_experiment.py '{"dataset": "EmnistDataset", "dataset_args": {"subsample_fraction": 0.25}, "model": "CharacterModel", "network": "lenet"}'
39 | ```
40 | 
41 | ## Making a synthetic dataset of EMNIST Lines
42 | 
43 | - Synthetic dataset we built for this project
44 | - Sample sentences from Brown corpus
45 | - For each character, sample random EMNIST character and place on a line (with some random overlap)
46 | - Look at: `notebooks/02-look-at-emnist-lines.ipynb`
47 | 
48 | ## Reading multiple characters at once
49 | 
50 | Now that we have a dataset of lines and not just single characters, we can apply our convolutional net to it.
51 | 
52 | Let's look at `notebooks/02b-cnn-for-simple-emnist-lines.ipynb`, where we generate a datset with at most 8 characters and no overlap.
53 | 
54 | The first network we try is simply the same LeNet network we used for single characters, applied to each character in sequence, using the `TimeDistributed` layer.
55 | 
56 | We can also express the same network using all convolutional layers, which we do next.
57 | 
58 | We can train this model with a command, too:
59 | 
60 | ```sh
61 | python training/run_experiment.py --save '{"train_args": {"epochs": 5}, "dataset": "EmnistLinesDataset", "dataset_args": {"max_length": 8, "max_overlap": 0}, "model": "LineModel", "network": "line_cnn_all_conv"}'
62 | ```
63 | 


--------------------------------------------------------------------------------
/instructions/lab3.md:
--------------------------------------------------------------------------------
 1 | # Lab 3: Using a sequence model for line text recognition
 2 | 
 3 | ## Goal of the lab
 4 | 
 5 | Use sequence modeling to be able to handle overlapping characters (input sequence no longer maps neatly onto output sequence).
 6 | 
 7 | ## Outline
 8 | 
 9 | - Overview of the model, network, and loss
10 | - Train an LSTM on EMNIST
11 | 
12 | ## Follow along
13 | 
14 | ```
15 | git pull
16 | cd lab3
17 | ```
18 | 
19 | ## Overview of model and loss
20 | 
21 | - Look at slides for CTC loss
22 | - Look at `networks/line_lstm_ctc.py`
23 | - Look at `models/line_model_ctc.py`
24 | 
25 | ## Train LSTM model with CTC loss
26 | 
27 | Let's train an LSTM model with CTC loss.
28 | 
29 | ```sh
30 | python training/run_experiment.py --save '{"train_args": {"epochs": 16}, "dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}'
31 | ```
32 | 
33 | or the shortcut `tasks/train_lstm_line_predictor.sh`
34 | 
35 | ## Things to try
36 | 
37 | If you have time left over, or want to play around with this later on, you can try writing your own non-CTC `line_lstm` network (define it in `text_recognizer/networks/line_lstm.py`).
38 | For example, you could code up an encoder-decoder architecture with attention.
39 | 


--------------------------------------------------------------------------------
/instructions/lab5.md:
--------------------------------------------------------------------------------
 1 | # Lab 5: Line Detection
 2 | 
 3 | At this point, we have trained a model that can recognize text in a line, given an image of a single line.
 4 | 
 5 | ## Goal of the lab
 6 | 
 7 | Our next task is to automatically detect line regions in an image of a whole paragraph of text.
 8 | 
 9 | Our approach will be to train a model that, when given an image containing lines of text, returns a pixelwise labeling of that image, with each pixel belonging to either background, odd line of handwriting, or even line of handwriting.
10 | Given the output of the model, we can find line regions with an easy image processing operation.
11 | 
12 | ## Setup
13 | 
14 | - As always, `git pull` in the `~/fsdl-text-recognizer-project` repo to get the latest code.
15 | - Then `cd lab5`.
16 | 
17 | ## Data
18 | 
19 | We are starting from the IAM dataset, which includes not only lines but the original writing sample forms, with each line and word region annotated.
20 | 
21 | Let's load the IAM dataset and then look at the data files.
22 | Run `python text_recognizer/datasets/iam_dataset.py`
23 | Let's look at the raw data files, which are in `~/fsdl-text-recognizer-project/data/raw/iam/iamdb/forms`.
24 | 
25 | We want to crop out the region of each page corresponding to the handwritten paragraph as our model input, and generate corresponding ground truth.
26 | 
27 | Code to do this is in `text_recognizer/datasets/iam_paragraphs_dataset.py`
28 | 
29 | We can look at the results in `notebooks/04-look-at-iam-paragraphs.ipynb` and by looking at some debug images we output in `data/interim/iam_paragraphs`.
30 | 
31 | ## Training data augmentation
32 | 
33 | The model code for our new `LineDetector` is in `text_recognizer/models/line_detector_model.py`.
34 | 
35 | Because we only have about a thousand images to learn this task on, data augmentation will be crucial.
36 | Image augmentations such as streching, slight rotations, offsets, contrast and brightness changes, and potentially even mirror-flipping are tedious to code, and most frameworks provide optimized utility code for the task.
37 | 
38 | We use Keras's `ImageDataGenerator`, and you can see the parameters for it in `text_recognizer/models/line_detector_model.py`.
39 | We can take a look at what the data transformations look like in the same notebook.
40 | 
41 | ## Network description
42 | 
43 | The network used in this model is `text_recognizer/networks/fcn.py`.
44 | 
45 | The basic idea is a deep convolutional network with resnet-style blocks (input to block is concatenated to block output).
46 | We call it FCN, as in "Fully Convolutional Network," after the seminal paper that first used convnets for segmentation.
47 | 
48 | Unlike the original FCN, however, we do not maxpool or upsample, but instead rely on dilated convolutions to rapidly increase the effective receptive field.
49 | [Here](https://fomoro.com/research/articles/receptive-field-calculator) is a very calculator of the effective receptive field size of a convnet.
50 | 
51 | The crucial thing to understand is that because we are labeling odd and even lines differently, each predicted pixel must have the context of the entire image to correctly label -- otherwise, there is no way to know whether the pixel is on an odd or even line.
52 | 
53 | ## Review results
54 | 
55 | The model converges to something really good.
56 | 
57 | Check out `notebooks/04b-look-at-line-detector-predictions.ipynb` to see sample predictions on the test set.
58 | 
59 | We also plot some sample training data augmentation in that notebook.
60 | 
61 | ## Combining the two models
62 | 
63 | Now we are ready to combine the new `LineDetector` model and the `LinePredictor` model that we trained yesterday.
64 | 
65 | This is done in `text_recognizer/paragraph_text_recognizer.py`, which loads both models, find line regions with one, and runs each crop through the other.
66 | 
67 | We can see that it works as expected (albeit not too accurately yet) by running `pytest -s text_recognizer/tests/test_paragraph_text_recognizer.py`.
68 | 
69 | ## Things to try
70 | 
71 | - Try adding more data augmentations, or mess with the parameters of the existing ones
72 | - Try the U-Net architecture, which MaxPools down and then UpSamples back up, with increased conv layer channel dimensions in the middle (https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).
73 | 


--------------------------------------------------------------------------------
/instructions/lab6.md:
--------------------------------------------------------------------------------
 1 | # Lab 6: Data Labeling and Versioning
 2 | 
 3 | In this lab we will annotate the handwriting samples we collected, export and version the resulting data, write an interface to the new data format, and download the pages in parallel.
 4 | 
 5 | ## Data labeling
 6 | 
 7 | We will be using a simple online data annotation web service called Dataturks.
 8 | 
 9 | Please head to the [project page](https://dataturks.com/projects/sergeykarayev/fsdl_handwriting) and log in using our shared credential: `annotator@fullstackdeeplearning.com` (the password will be shared during lab).
10 | 
11 | You should be able to start tagging now.
12 | Let's do it together for a little bit, and then you'll have time to do a full page by yourself.
13 | 
14 | We'll sync up and review results in a few minutes.
15 | 
16 | (Review results and discuss any differences in annotation and how they could be prevented.)
17 | 
18 | ## Export data and update metadata file
19 | 
20 | Let's now export the data from Dataturks and add it to our version control.
21 | 
22 | You have noticed the `metadata.toml` files in all of our `data/raw` directories.
23 | They contain the remote source of the data, the filename it should have when downloaded, and a SHA-256 hash of the downloaded file.
24 | 
25 | The idea is that the data file has all the information needed for our dataset.
26 | In our case, it has image URLs and all the annotations we made.
27 | From this, we can download the images, and transform the annotation data into something usable by our training scripts.
28 | The hash, combined with the state of the codebase (tracked by git), then uniquely identifies the data we're going to use to train.
29 | 
30 | We replace the current `fsdl_handwriting.json` with the one we just exported, and now need to update the metadata file, since the hash is different.
31 | SHA256 hash of any file can be computed by running `shasum -a 256 <filename>`.
32 | We can also update `metadata.toml` with a convenient script that replace the SHA-256 of the current file with the SHA-256 of the new file.
33 | There is a convenience task script defined: `tasks/update_fsdl_paragraphs_metadata.sh`.
34 | 
35 | The data file itself is checked into version control, but tracked with git-lfs, as it can get heavyweight and can change frequently as we keep adding and annotating more data.
36 | Note that `git-lfs` actually does something very similar to what we more manually do with `metadata.toml`.
37 | The reason we also use the latter is for standardization across other types of datasets, which may not have a file we want to check into even `git-lfs` -- for example, EMNIST and IAM, which are too large as they include the images.
38 | 
39 | ## Download images
40 | 
41 | The class `IamHandwritingDataset` in `text_recognizer/datasets/iam_handwriting.py` must be able to load the data in the exported format and present it to consumers in a format they expect (e.g. `dataset.line_regions_by_id`).
42 | 
43 | Since this data export does not come with images, but only pointers to remote locations of the images, the class must also be responsible for downloading the images.
44 | 
45 | In downloading many images, it is very useful to do so in parallel.
46 | We use the `concurrent.futures.ThreadPoolExecutor` method, and use the `tqdm` package to provide a nice progress bar.
47 | 
48 | ## Looking at the data
49 | 
50 | We can confirm that we loaded the data correctly by looking at line crops and their corresponding strings.
51 | 
52 | Make sure you are in `lab6` directory, and take a look at `notebooks/05-look-at-fsdl-handwriting.ipynb`.
53 | 
54 | ## Training on the new dataset
55 | 
56 | We're not going to have time to train on the new dataset, but that is something that is now possible.
57 | As an exercise, you could write `FsdlHandwritingLinesDataset` and `FsdlHandwritingParagraphsDataset`, and be able to train a model on a combination of IAM and FSDL Handwriting data on both the line detection and line text prediction tasks.
58 | 


--------------------------------------------------------------------------------
/instructions/lab7.md:
--------------------------------------------------------------------------------
 1 | # Lab 7: Testing and Continuous Integration
 2 | 
 3 | ## Goal of the lab
 4 | 
 5 | - Add evaluation tests
 6 | - Add linting to our codebase
 7 | - Set up continuous integration via CircleCI, and see our commits pass/fail
 8 | 
 9 | ## Follow along
10 | 
11 | ```
12 | git pull
13 | cd lab7/
14 | ```
15 | 
16 | ## Linting script
17 | 
18 | Running `tasks/lint.sh` fully lints our codebase with a few different checkers:
19 | 
20 | - `pipenv check` scans our Python package dependency graph for known security vulnerabilities
21 | - `pylint` does static analysis of Python files and reports both style and bug problems
22 | - `pycodestyle` checks for simple code style guideline violations (somewhat overlapping with `pylint`)
23 | - `mypy` performs static type checking of Python files
24 | - `bandit` performs static analysis to find common security vulnerabilities in Python code
25 | - `shellcheck` finds bugs and potential bugs in shell scrips
26 | 
27 | A note: in writing Bash scripts, I often refer to [this excellent guide](http://redsymbol.net/articles/unofficial-bash-strict-mode/).
28 | 
29 | Note that the linters are configured using `.pylintrc` and `setup.cfg` files, as well as flags specified in `lint.sh`.
30 | 
31 | Getting linting right will pay off in no time, and is a must for any multi-developer codebase.
32 | 
33 | ## Setting up CircleCI
34 | 
35 | The relevant new files for setting up continuous integration are
36 | 
37 | - `evaluation/evaluate_character_predictor.py`
38 | - `evaluation/evaluate_line_predictor.py`
39 | - `tasks/test_validation.sh`
40 | 
41 | There is one additional file that is outside of the lab7 directory (in the top-level directory): `.circleci/config.yml`
42 | 
43 | Let's set up CircleCI first and then look at the new evaluation files.
44 | 
45 | Go to https://circleci.com and log in with your Github account.
46 | Click on Add Project. Select your fork of the `fsdl-text-recognizer-project` repo.
47 | It will ask you to place the `config.yml` file in the repo.
48 | Good news -- it's already there, so you can just hit the "Start building" button.
49 | 
50 | While CircleCI starts the build, let's look at the `config.yml` file.
51 | 
52 | Let's also check out the new validation test files: they simply evaluate the trained predictors on respective test sets, and make sure they are above threshold accuracy.
53 | 
54 | Now that CircleCI is done building, let's push a commit so that we can see it build again, and check out the nice green chechmark in our commit history (https://github.com/sergeyktest/fsdl-text-recognizer-project/commits/master)
55 | 


--------------------------------------------------------------------------------
/instructions/lab8.md:
--------------------------------------------------------------------------------
  1 | # Lab 8: Web Deployment
  2 | 
  3 | ## Goal of the lab
  4 | 
  5 | - Run our LinePredictor as a web app, and send it some requests
  6 | - Dockerize our web app
  7 | - Deploy our web app to production
  8 | 
  9 | ## Follow along
 10 | 
 11 | ```
 12 | git pull
 13 | cd lab8/
 14 | ```
 15 | 
 16 | This lab has quite a few new files, mostly in the new `api/` directory.
 17 | 
 18 | ## Serving predictions from a web server
 19 | 
 20 | First, we will get a Flask web server up and running and serving predictions.
 21 | 
 22 | ```
 23 | python api/app.py
 24 | ```
 25 | 
 26 | Open up another terminal tab (click on the '+' button under 'File' to open the
 27 | launcher). In this terminal, we'll send some test image to the web server
 28 | we're running in the first terminal.
 29 | 
 30 | **Make sure to `cd` into the `lab8` directory in this new terminal.**
 31 | 
 32 | ```
 33 | export API_URL=http://0.0.0.0:8000
 34 | curl -X POST "${API_URL}/v1/predict" -H 'Content-Type: application/json' --data '{ "image": "data:image/png;base64,'$(base64 -w0 -i text_recognizer/tests/support/emnist_lines/or\ if\ used\ the\ results.png)'" }'
 35 | ```
 36 | 
 37 | If you want to look at the image you just sent, you can navigate to
 38 | `lab8/text_recognizer/tests/support/emnist_lines` in the file browser on the
 39 | left, and open the image.
 40 | 
 41 | We can also send a request specifying a URL to an image:
 42 | ```
 43 | curl "${API_URL}/v1/predict?image_url=http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png"
 44 | ```
 45 | 
 46 | You can shut down your flask server now.
 47 | 
 48 | ## Adding web server tests
 49 | 
 50 | The web server code should have a unit test just like the rest of our code.
 51 | 
 52 | Let's check it out: the tests are in `api/tests/test_app.py`.
 53 | You can run them with
 54 | 
 55 | ```sh
 56 | tasks/test_api.sh
 57 | ```
 58 | 
 59 | ## Running web server in Docker
 60 | 
 61 | Now, we'll build a docker image with our application.
 62 | The Dockerfile in `api/Dockerfile` defines how we're building the docker image.
 63 | 
 64 | Still in the `lab8` directory, run:
 65 | 
 66 | ```sh
 67 | tasks/build_api_docker.sh
 68 | ```
 69 | 
 70 | This should take a couple of minutes to complete.
 71 | 
 72 | When it's finished, you can run the server with `tasks/run_api_docker.sh`
 73 | 
 74 | 
 75 | You can run the same curl commands as you did when you ran the flask server earlier, and see that you're getting the same results.
 76 | 
 77 | ```
 78 | curl -X POST "${API_URL}/v1/predict" -H 'Content-Type: application/json' --data '{ "image": "data:image/png;base64,'$(base64 -w0 -i text_recognizer/tests/support/emnist_lines/or\ if\ used\ the\ results.png)'" }'
 79 | 
 80 | curl "${API_URL}/v1/predict?image_url=http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png"
 81 | ```
 82 | 
 83 | If needed, you can connect to your running docker container by running:
 84 | 
 85 | ```sh
 86 | docker exec -it api bash
 87 | ```
 88 | 
 89 | You can shut down your docker container now.
 90 | 
 91 | We could deploy this container to a number of platforms.
 92 | In this lab, we will deploy the app as a Docker container using https://render.com
 93 | 
 94 | ## Web deployment
 95 | 
 96 | TODO: render.com
 97 | 
 98 | As before, we can test out our API by running a few curl commands (from the `lab8` directory). We need to change the `API_URL` first though to point it at Lambda:
 99 | 
100 | ```
101 | export API_URL="https://REPLACE_THIS.execute-api.us-west-2.amazonaws.com/dev/"
102 | curl -X POST "${API_URL}/v1/predict" -H 'Content-Type: application/json' --data '{ "image": "data:image/png;base64,'$(base64 -w0 -i text_recognizer/tests/support/emnist_lines/or\ if\ used\ the\ results.png)'" }'
103 | curl "${API_URL}/v1/predict?image_url=http://s3-us-west-2.amazonaws.com/fsdl-public-assets/emnist_lines/or%2Bif%2Bused%2Bthe%2Bresults.png"
104 | ```
105 | 
106 | If the POST request fails, it's probably because you are in `api` and not in the top-level `lab8` directory.
107 | 
108 | You'll want to run the curl commands a couple of times -- the first execution may time out, because the function has to "warm up."
109 | After the first request, it will stay warm for 10-60 minutes.
110 | 


--------------------------------------------------------------------------------
/instructions/lab8_notes.md:
--------------------------------------------------------------------------------
 1 | # Lab 9 notes
 2 | 
 3 | - Live-code the Flask web app, explaining what's going on
 4 | - At the end, should be able to CURL the app running locally with a GET request and a POST request
 5 | 
 6 | - Now, we're going to build it as a Docker container
 7 |     - go through each line
 8 |     - cover .dockerignore
 9 | 
10 | - Now, we're going to deploy to Lambda
11 | 


--------------------------------------------------------------------------------
/instructions/lab9.md:
--------------------------------------------------------------------------------
 1 | # Lab 9: Monitoring running web service
 2 | 
 3 | ## Goals
 4 | 
 5 | - Look at basic metrics and set up a more advanced one
 6 | - Experience something going wrong in our deployed service, and catching it with metrics
 7 | 
 8 | ## Monitoring
 9 | 
10 | We can look at the requests our function is receiving in the AWS CloudWatch interface.
11 | It shows requests, errors, duration, and some other metrics.
12 | 
13 | What it does not show is stuff that we care about specifically regarding machine learning: data and prediction distributions.
14 | 
15 | This is why we added a few extra metrics to `api/app.py`, in `predict()`.
16 | Using these simple print statements, we can set up CloudWatch metrics by using the Log Metrics functionality.
17 | 
18 | ### Log Metrics
19 | 
20 | Log in to your AWS Console, and make sure you're in the `us-west-2` region.
21 | 
22 | Once you're in, click on 'Services' and go to 'CloudWatch' under 'Management Tools.' Click on 'Logs' in the left sidebar. This will have several log groups -- one for each of us.
23 | You can filter for yours by entering `/aws/lambda/text-recognizer-USERNAME-dev-api` (you need to enter the whole thing, not just your username).
24 | Click on yours. You'll some log streams. If you click on one, you'll see some logs for requests to your API. Each log entry starts with START and ends with REPORT. The REPORT line has some interesting information about the API call, including memory usage and duration.
25 | 
26 | We're also logging a couple of metrics for you: the confidences of the predictor and the mean intensities of the input images.
27 | Next, we're going to make it so you can visualize these metrics. Go back to the list of Log Groups by clicking on Logs again in the left sidebar.
28 | Find your log group, but don't click on it. You'll see a column that says 'Metric Filters.' You currently likely have 0 filters. Click on "0 filters."
29 | Click on 'Add Metric Filter.'
30 | 
31 | Now, we need to add a pattern for parsing our metric out of the logs. Here's one you can use for the confidence levels. Enter this in the 'Filter Pattern' box.
32 | ```
33 | [level=METRIC, metric_name=confidence, metric_value]
34 | ```
35 | Click on 'Assign Metric.'
36 | Now, we need to name the metric and tell it what the data source is. Enter 'USERNAME_confidence' in the 'Metric name' box (replace USERNAME as usual). Click on 'Show advanced metric settings,' and for Metric Value, click on $metric_value to populate the text box. Hit 'Create Filter.'
37 | Since we're already here, let's go ahead and make another metric filter for the mean intensity. You can use this Filter Pattern:
38 | ```
39 | [level=METRIC, metric_name=mean_intensity, metric_value]
40 | ```
41 | You should name your metric "USERNAME_mean_intensity."
42 | 
43 | Now we have a couple of metric filters set up.
44 | Unfortunately, Metric Filters only apply to new log entries, so go back to your terminal and send a few more requests to your endpoint.
45 | 
46 | Now we can make a dashboard that shows our metrics. Click on 'Dashboards' in the left sidebar. Click 'Create Dashboard.' Name your dashboard your USERNAME.
47 | 
48 | We're going to add a few widgets to your dashboard. For the first widget, select 'Line'. In the search box, search for your username.
49 | Click on 'Lambda > By Function Name' in the search results, and select the checkbox for 'Invocations.' This'll make a plot showing you much your API is being called.
50 | 
51 | Let's add another widget -- select Line again. Go back to the Lambda metrics and select 'Duration' this time.
52 | 
53 | Lastly, let's plot our custom metrics. Add one more 'Line' widget, search for your username again, and click on 'LogMetrics' and then 'Metrics with no dimensions'.
54 | Check two checkboxes: `USERNAME_confidence` and `USERNAME_mean_intensity.` Before hitting Create, click on the 'Graphed Metrics' tab above, and under the 'Y Axis' column,
55 | select the right arrow for one of the metrics (it doesn't matter which one). Now hit create.
56 | 
57 | Feel free to resize and reorder your widgets.
58 | 
59 | Make sure to save your dashboard -- else it won't persist across sessions.
60 | 


--------------------------------------------------------------------------------
/instructions/lab9_aws_and_monitoring.md:
--------------------------------------------------------------------------------
 1 | Note that emailing credentials is a bad idea. You usually want to handle credentials in a more secure fashion.
 2 | We're only doing it in this case because your credentials give you limited access and are for a temporary AWS account.
 3 | 
 4 | You can also go to https://379872101858.signin.aws.amazon.com/console and log in with the email you used to register (and the password we emailed you), and create your own credentials if you prefer.
 5 | 
 6 | ## Lambda monitoring
 7 | 
 8 | We're going to check the logs and set up monitoring for your deployed API. In order to make the monitoring more interesting, we're going to simulate people using your API.
 9 | 
10 | **In order for us to do that, you need to go to https://goo.gl/forms/YQCXTI2k5R5Stq3u2 and submit your endpoint URL.**
11 | It should look like this (ending in "/dev/"):
12 | ```
13 | https://REPLACE_THIS.execute-api.us-west-2.amazonaws.com/dev/
14 | ```
15 | 
16 | If you haven't already sent a few requests to your endpoint, you should do so using the curl commands above.
17 | 
18 | Next, log in to the AWS Console at https://379872101858.signin.aws.amazon.com/console (you should've gotten an email with your username and password).
19 | 
20 | **Make sure that you switch into the Oregon region (also known as `us-west-2`) using the dropdown menu in the top right corner.**
21 | 
22 | Once you're in, click on 'Services' and go to 'CloudWatch' under 'Management Tools.' Click on 'Logs' in the left sidebar. This will have several log groups -- one for each of us.
23 | You can filter for yours by entering `/aws/lambda/text-recognizer-USERNAME-dev-api` (you need to enter the whole thing, not just your username).
24 | Click on yours. You'll some log streams. If you click on one, you'll see some logs for requests to your API. Each log entry starts with START and ends with REPORT. The REPORT line has some interesting information about the API call, including memory usage and duration.
25 | 
26 | We're also logging a couple of metrics for you: the confidences of the predictor and the mean intensities of the input images.
27 | Next, we're going to make it so you can visualize these metrics. Go back to the list of Log Groups by clicking on Logs again in the left sidebar.
28 | Find your log group, but don't click on it. You'll see a column that says 'Metric Filters.' You currently likely have 0 filters. Click on "0 filters."
29 | Click on 'Add Metric Filter.'
30 | 
31 | Now, we need to add a pattern for parsing our metric out of the logs. Here's one you can use for the confidence levels. Enter this in the 'Filter Pattern' box.
32 | ```
33 | [level=METRIC, metric_name=confidence, metric_value]
34 | ```
35 | Click on 'Assign Metric.'
36 | Now, we need to name the metric and tell it what the data source is. Enter 'USERNAME_confidence' in the 'Metric name' box (replace USERNAME as usual). Click on 'Show advanced metric settings,' and for Metric Value, click on $metric_value to populate the text box. Hit 'Create Filter.'
37 | Since we're already here, let's go ahead and make another metric filter for the mean intensity. You can use this Filter Pattern:
38 | ```
39 | [level=METRIC, metric_name=mean_intensity, metric_value]
40 | ```
41 | You should name your metric "USERNAME_mean_intensity."
42 | 
43 | Now we have a couple of metric filters set up.
44 | Unfortunately, Metric Filters only apply to new log entries, so go back to your terminal and send a few more requests to your endpoint.
45 | 
46 | Now we can make a dashboard that shows our metrics. Click on 'Dashboards' in the left sidebar. Click 'Create Dashboard.' Name your dashboard your USERNAME.
47 | 
48 | We're going to add a few widgets to your dashboard. For the first widget, select 'Line'. In the search box, search for your username.
49 | Click on 'Lambda > By Function Name' in the search results, and select the checkbox for 'Invocations.' This'll make a plot showing you much your API is being called.
50 | 
51 | Let's add another widget -- select Line again. Go back to the Lambda metrics and select 'Duration' this time.
52 | 
53 | Lastly, let's plot our custom metrics. Add one more 'Line' widget, search for your username again, and click on 'LogMetrics' and then 'Metrics with no dimensions'.
54 | Check two checkboxes: `USERNAME_confidence` and `USERNAME_mean_intensity.` Before hitting Create, click on the 'Graphed Metrics' tab above, and under the 'Y Axis' column,
55 | select the right arrow for one of the metrics (it doesn't matter which one). Now hit create.
56 | 
57 | Feel free to resize and reorder your widgets.
58 | 
59 | Make sure to save your dashboard -- else it won't persist across sessions.
60 | 
61 | You can play with your API here a bit while we turn on the traffic for everyone. Double check that you've submitted your endpoint to the Google form above.
62 | 
63 | Once the traffic is going, refresh your dashboard a bit and watch it. We're going to change something about the traffic, and it's going to start making your API perform poorly.
64 | Try and figure out what's going on, and how you can fix it. We'll leave the adversarial traffic on for a while.
65 | 
66 | If you're curious, you can add a metric filter to show memory usage with this pattern:
67 | ```
68 | [report_name="REPORT", request_id_name="RequestId:", request_id_value, duration_name="Duration:", duration_value, duration_unit="ms", billed_duration_name_1="Billed", bill_duration_name_2="Duration:", billed_duration_value, billed_duration_unit="ms", memory_size_name_1="Memory", memory_size_name_2="Size:", memory_size_value, memory_size_unit="MB", max_memory_used_name_1="Max", max_memory_used_name_2="Memory", max_memory_used_name_3="Used:", max_memory_used_value, max_memory_used_unit="MB"]
69 | ```
70 | 
71 | You can name it `USERNAME_memory`. Select `$max_memory_used_value` for the metric value.
72 | 
73 | Make sure to save your dashboard!
74 | 


--------------------------------------------------------------------------------
/instructions/project_structure.md:
--------------------------------------------------------------------------------
 1 | # Project Structure
 2 | 
 3 | Before we get going with the labs, let's familiarize ourselves with the high-level design of the codebase.
 4 | 
 5 | ## Follow along
 6 | 
 7 | ```
 8 | cd lab8/
 9 | ```
10 | 
11 | ## Project structure
12 | 
13 | Web backend
14 | 
15 | ```sh
16 | api/                        # Code for serving predictions as a REST API.
17 |     tests/test_app.py           # Test that predictions are working
18 |     Dockerfile                  # Specifies Docker image that runs the web server.
19 |     __init__.py
20 |     app.py                      # Flask web server that serves predictions.
21 | ```
22 | 
23 | Data (not under version control - one level up in the heirarchy)
24 | 
25 | ```sh
26 | data/                            # Training data lives here
27 |     raw/
28 |         emnist/metadata.toml     # Specifications for downloading data
29 | ```
30 | 
31 | Experimentation
32 | 
33 | ```sh
34 |     evaluation/                     # Scripts for evaluating model on eval set.
35 |         evaluate_character_predictor.py
36 | 
37 |     notebooks/                  # For snapshots of initial exploration, before solidfying code as proper Python files.
38 |         01-look-at-emnist.ipynb
39 | ```
40 | 
41 | Convenience scripts
42 | 
43 | ```sh
44 |     tasks/
45 |         # Deployment
46 |         build_api_docker.sh
47 | 
48 |         # Code quality
49 |         lint.sh
50 | 
51 |         # Tests
52 |         test_api.sh
53 |         test_functionality.sh
54 |         test_validation.sh
55 | 
56 |         # Training
57 |         train_character_predictor.sh
58 | ```
59 | 
60 | Main model and training code
61 | 
62 | ```sh
63 |     text_recognizer/                # Package that can be deployed as a self-contained prediction system
64 |         __init__.py
65 | 
66 |         character_predictor.py      # Takes a raw image and obtains a prediction
67 |         line_predictor.py
68 | 
69 |         datasets/                   # Code for loading datasets
70 |             __init__.py
71 |             dataset.py              # Base class for datasets - logic for downloading data
72 |             emnist_dataset.py
73 |             emnist_essentials.json
74 |             dataset_sequence.py
75 | 
76 |         models/                     # Code for instantiating models, including data preprocessing and loss functions
77 |             __init__.py
78 |             base.py                 # Base class for models
79 |             character_model.py
80 | 
81 |         networks/                   # Code for building neural networks (i.e., 'dumb' input->output mappings) used by models
82 |             __init__.py
83 |             mlp.py
84 | 
85 |         tests/
86 |             support/                        # Raw data used by tests
87 |             test_character_predictor.py     # Test model on a few key examples
88 | 
89 |         weights/                            # Weights for production model
90 |             CharacterModel_EmnistDataset_mlp_weights.h5
91 | 
92 |         util.py
93 | 
94 |     training/                       # Code for running training experiments and selecting the best model.
95 |         run_experiment.py           # Parse experiment config and launch training.
96 |         util.py                     # Logic for training a model with a given config
97 | ```
98 | 


--------------------------------------------------------------------------------
/instructions/readme.md:
--------------------------------------------------------------------------------
 1 | # Full Stack Deep Learning Labs
 2 | 
 3 | Welcome!
 4 | 
 5 | Project developed during lab sessions of the [Full Stack Deep Learning Bootcamp](https://fullstackdeeplearning.com).
 6 | 
 7 | - We will build a handwriting recognition system from scratch, and deploy it as a web service.
 8 | - Uses Keras, but designed to be modular, hackable, and scalable
 9 | - Provides code for training models in parallel and store evaluation in Weights & Biases
10 | - We will set up continuous integration system for our codebase, which will check functionality of code and evaluate the model about to be deployed.
11 | - We will package up the prediction system as a REST API, deployable as a Docker container.
12 | - We will deploy the prediction system as a serverless function to Amazon Lambda.
13 | - Lastly, we will set up monitoring that alerts us when the incoming data distribution changes.
14 | 
15 | ## Schedule for the November 2019 Bootcamp
16 | 
17 | - First session (90 min)
18 |   - [Setup](setup.md) (10 min): Get set up with jupyterhub.
19 |   - Introduction to problem and [project structure](project_structure.md) (20 min).
20 |   - Gather handwriting data (10 min).
21 |   - [Lab 1](lab1.md) (20 min): Introduce EMNIST. Training code details. Train & evaluate character prediction baselines.
22 |   - [Lab 2](lab2.md) (30 min): Introduce EMNIST Lines. Overview of CTC loss and model architecture. Train our model on EMNIST Lines.
23 | - Second session (60 min)
24 |   - [Lab 3](lab3.md) (40 min): Weights & Biases + parallel experiments
25 |   - [Lab 4](lab4.md) (20 min): IAM Lines and experimentation time (hyperparameter sweeps, leave running overnight).
26 | - Third session (90 min)
27 |   - Review results from the class on W&B
28 |   - [Lab 5](lab5.md) (45 min) Train & evaluate line detection model.
29 |   - [Lab 6](lab6.md) (45 min) Label handwriting data generated by the class, download and version results.
30 | - Fourth session (75 min)
31 |   - [Lab 7](lab7.md) (15 min) Add continuous integration that runs linting and tests on our codebase.
32 |   - [Lab 8](lab8.md) (60 min) Deploy the trained model to the web using AWS Lambda.
33 | 


--------------------------------------------------------------------------------
/instructions/setup.md:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | 
 3 | ## 1. Check out the repo
 4 | 
 5 | You should already have the repo in your home directory. Go into it and make sure you have the latest.
 6 | 
 7 | ```sh
 8 | cd fsdl-text-recognizer-project
 9 | git pull origin master
10 | ```
11 | 
12 | If not, open a shell in your JupyterLab instance and run
13 | 
14 | ```sh
15 | git clone https://github.com/full-stack-deep-learning/fsdl-text-recognizer-project.git
16 | cd fsdl-text-recognizer-project
17 | ```
18 | 
19 | ## 2. Set up the Python environment
20 | 
21 | ### If on GCP AI Platform Notebooks instance
22 | 
23 | Simply run ```pip install -r requirements.txt -r requirements-dev.txt```.
24 | 
25 | Also, run ```export PYTHONPATH=.``` before executing any commands later on, or you will get errors like `ModuleNotFoundError: No module named 'text_recognizer'`.
26 | 
27 | In order to not have to set `PYTHONPATH` in every terminal you open, just add that line as the last line of the `~/.bashrc` file using a text editor of your choice (e.g. `nano ~/.bashrc`)
28 | 
29 | ### If on own machine
30 | 
31 | Run `conda env create` to create an environment called `fsdl-text-recognizer`, as defined in `environment.yml`.
32 | This environment will provide us with the right Python version as well as the CUDA and CUDNN libraries.
33 | We will install Python libraries using `pip-sync`, however, which will let us do three nice things:
34 | 
35 | 1. Separate out dev from production dependencies (`requirements-dev.in` vs `requirements.in`).
36 | 2. Have a lockfile of exact versions for all dependencies (the auto-generated `requirements-dev.txt` and `requirements.txt`).
37 | 3. Allow us to easily deploy to targets that may not support the `conda` environment.
38 | 
39 | So, after running `conda env create`, activate the new environment and install the requirements:
40 | 
41 | ```sh
42 | conda activate fsdl-text-recognizer
43 | pip-sync requirements.txt requirements-dev.txt
44 | ```
45 | 
46 | If you add, remove, or need to update versions of some requirements, edit the `.in` files, then run
47 | 
48 | ```
49 | pip-compile requirements.in && pip-compile requirements-dev.in
50 | ```
51 | 
52 | Now, every time you work in this directory, make sure to start your session with `conda activate fsdl-text-recognizer`.
53 | 
54 | ## 3. Kick off a command
55 | 
56 | Before we get started, please run a command that will take a little bit of time to execute.
57 | 
58 | ```sh
59 | cd lab1/
60 | python text_recognizer/datasets/emnist_dataset.py
61 | cd ..
62 | ```
63 | 
64 | # Ready
65 | 
66 | Now you should be setup for the labs. The instructions for each lab are in readme files in their folders.
67 | 
68 | You will notice that there are solutions for all the labs right here in the repo, too.
69 | If you get stuck, you are welcome to take a look!
70 | 


--------------------------------------------------------------------------------
/instructions/setup_extra.md:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | 
 3 | ## Development on AWS (in progress)
 4 | 
 5 | We will use the [Deep Learning Base AMI](https://aws.amazon.com/marketplace/pp/B07Y3VDBNS) which has NVIDA CUDA and GPU drivers, but no pre-installed deep learning framework Python packages (we will install those ourselves).
 6 | 
 7 | ```sh
 8 | AMI="ami-0f4d5f31e6310624e"
 9 | TYPE="p2.4xlarge"
10 | aws ec2 run-instances --image-id "$AMI" --instance-type "$TYPE" --key-name id_rsa --security-group-ids=sg-331f3543
11 | ```
12 | 
13 | We'll tag it for later ease of reference
14 | 
15 | ```sh
16 | aws ec2 create-tags --resources <REPORTED InstanceId> --tags Key=Name,Value=fsdl
17 | ```
18 | 
19 | We also need to install aws CLI tools, and add two functions to our `.bashrc` or equivalent file
20 | 
21 | ```sh
22 | function ec2ip() {
23 |     echo $(aws ec2 describe-instances --filters "{\"Name\":\"tag:Name\", \"Values\":[\"$1\"]}" --query='Reservations[0].Instances[0].PublicIpAddress' | tr -d '"')
24 | }
25 | 
26 | function ec2id() {
27 |     echo $(aws ec2 describe-instances --filters "{\"Name\":\"tag:Name\", \"Values\":[\"$1\"]}" --query='Reservations[0].Instances[0].InstanceId' | tr -d '"')
28 | }
29 | ```
30 | 


--------------------------------------------------------------------------------
/notebooks/archive/02-train-emnist-mlp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/Users/sergeyk/work/gradescope/full-stack-deep-learning/projects\n",
 13 |       "The autoreload extension is already loaded. To reload it, use:\n",
 14 |       "  %reload_ext autoreload\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "from pathlib import Path\n",
 20 |     "repo_dirname = Path.cwd().parents[1].resolve()\n",
 21 |     "print(repo_dirname)\n",
 22 |     "\n",
 23 |     "%load_ext autoreload\n",
 24 |     "%autoreload 2\n",
 25 |     "\n",
 26 |     "GPU_IND = 0\n",
 27 |     "\n",
 28 |     "import numpy as np\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "%matplotlib inline\n",
 31 |     "\n",
 32 |     "import sys\n",
 33 |     "sys.path.append('..')\n",
 34 |     "\n",
 35 |     "from text_recognizer.datasets.emnist import EMNIST\n",
 36 |     "from text_recognizer.models.emnist_mlp import create_mlp_model\n",
 37 |     "from training.util import train_model"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "data = EmnistDataset()\n",
 47 |     "num_classes = data.y_train.shape[1]\n",
 48 |     "input_shape = data.x_train.shape[1]"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "_________________________________________________________________\n",
 61 |       "Layer (type)                 Output Shape              Param #   \n",
 62 |       "=================================================================\n",
 63 |       "dense (Dense)                (None, 128)               100480    \n",
 64 |       "_________________________________________________________________\n",
 65 |       "dropout (Dropout)            (None, 128)               0         \n",
 66 |       "_________________________________________________________________\n",
 67 |       "dense_1 (Dense)              (None, 128)               16512     \n",
 68 |       "_________________________________________________________________\n",
 69 |       "dropout_1 (Dropout)          (None, 128)               0         \n",
 70 |       "_________________________________________________________________\n",
 71 |       "dense_2 (Dense)              (None, 65)                8385      \n",
 72 |       "=================================================================\n",
 73 |       "Total params: 125,377\n",
 74 |       "Trainable params: 125,377\n",
 75 |       "Non-trainable params: 0\n",
 76 |       "_________________________________________________________________\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "model = create_mlp_model(num_classes=num_classes, input_shape=input_shape)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 6,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "name": "stdout",
 91 |      "output_type": "stream",
 92 |      "text": [
 93 |       "Train on 523449 samples, validate on 174483 samples\n",
 94 |       "Epoch 1/1\n",
 95 |       "523449/523449 [==============================] - 85s 162us/step - loss: 1.0147 - acc: 0.7138 - val_loss: 0.7489 - val_acc: 0.7779\n",
 96 |       "Training took 85.003911 s\n"
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "history = train_model(model=model, x_train=data.x_train, y_train=data.y_train, epochs=1, batch_size=32, loss='categorical_crossentropy')"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 7,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "116323/116323 [==============================] - 3s 26us/step\n",
114 |       "Test loss/accuracy: 0.7460721686058285 0.7792096146081137\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "score = model.evaluate(data.x_test, data.y_test, verbose=1)\n",
120 |     "print('Test loss/accuracy:', score[0], score[1])"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 9,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "ename": "NameError",
130 |      "evalue": "name '__file__' is not defined",
131 |      "output_type": "error",
132 |      "traceback": [
133 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
134 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
135 |       "\u001b[0;32m<ipython-input-9-f9e4182dc680>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpathlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mMODELS_DIRNAME\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpathlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparents\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresolve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m'models'\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m'emnist_mlp'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'model.h5'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mMODELS_DIRNAME\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
136 |       "\u001b[0;31mNameError\u001b[0m: name '__file__' is not defined"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "import pathlib\n",
142 |     "MODELS_DIRNAME = pathlib.Path(__file__).parents[1].resolve() / 'models' / 'emnist_mlp'\n",
143 |     "filename = f'model.h5'\n",
144 |     "model.save(MODELS_DIRNAME / filename)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.6.6"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 2
176 | }
177 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 120
3 | target-version = ['py37']
4 | 


--------------------------------------------------------------------------------
/requirements-dev.in:
--------------------------------------------------------------------------------
 1 | -c requirements.txt
 2 | bandit
 3 | black
 4 | gpustat
 5 | gradescope-utils
 6 | grequests # admin
 7 | itermplot
 8 | jupyterlab
 9 | matplotlib
10 | mypy
11 | nltk
12 | pycodestyle
13 | pydocstyle
14 | pylint
15 | pytest
16 | pyyaml
17 | redlock-py
18 | tornado
19 | safety
20 | scipy
21 | pillow
22 | wandb
23 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile
  3 | # To update, run:
  4 | #
  5 | #    pip-compile requirements-dev.in
  6 | #
  7 | appdirs==1.4.3            # via black, virtualenv
  8 | astroid==2.3.3            # via pylint
  9 | attrs==19.3.0             # via black, jsonschema, pytest
 10 | backcall==0.1.0           # via ipython
 11 | bandit==1.6.2             # via -r requirements-dev.in
 12 | black==19.10b0            # via -r requirements-dev.in
 13 | bleach==3.1.4             # via nbconvert
 14 | blessings==1.7            # via gpustat
 15 | certifi==2019.11.28       # via -c requirements.txt, pipenv, requests, sentry-sdk
 16 | chardet==3.0.4            # via -c requirements.txt, requests
 17 | click==7.1.1              # via -c requirements.txt, black, safety, wandb
 18 | configparser==5.0.0       # via wandb
 19 | cycler==0.10.0            # via matplotlib
 20 | decorator==4.4.2          # via ipython, traitlets
 21 | defusedxml==0.6.0         # via nbconvert
 22 | distlib==0.3.0            # via virtualenv
 23 | docker-pycreds==0.4.0     # via wandb
 24 | dparse==0.5.0             # via safety
 25 | entrypoints==0.3          # via nbconvert
 26 | filelock==3.0.12          # via virtualenv
 27 | gevent==1.4.0             # via grequests
 28 | gitdb==4.0.2              # via gitpython
 29 | gitpython==3.1.0          # via bandit, wandb
 30 | gpustat==0.6.0            # via -r requirements-dev.in
 31 | gql==0.2.0                # via wandb
 32 | gradescope-utils==0.3.1   # via -r requirements-dev.in
 33 | graphql-core==1.1         # via gql
 34 | greenlet==0.4.15          # via gevent
 35 | grequests==0.4.0          # via -r requirements-dev.in
 36 | idna==2.9                 # via -c requirements.txt, requests
 37 | importlib-metadata==1.6.0  # via jsonschema, pluggy, pytest, virtualenv
 38 | ipykernel==5.2.0          # via notebook
 39 | ipython-genutils==0.2.0   # via nbformat, notebook, traitlets
 40 | ipython==7.13.0           # via ipykernel
 41 | isort==4.3.21             # via pylint
 42 | itermplot==0.331          # via -r requirements-dev.in
 43 | jedi==0.16.0              # via ipython
 44 | jinja2==2.11.1            # via -c requirements.txt, jupyterlab, jupyterlab-server, nbconvert, notebook
 45 | json5==0.9.4              # via jupyterlab-server
 46 | jsonschema==3.2.0         # via jupyterlab-server, nbformat
 47 | jupyter-client==6.1.2     # via ipykernel, notebook
 48 | jupyter-core==4.6.3       # via jupyter-client, nbconvert, nbformat, notebook
 49 | jupyterlab-server==1.0.7  # via jupyterlab
 50 | jupyterlab==2.0.1         # via -r requirements-dev.in
 51 | kiwisolver==1.1.0         # via matplotlib
 52 | lazy-object-proxy==1.4.3  # via astroid
 53 | markupsafe==1.1.1         # via -c requirements.txt, jinja2
 54 | matplotlib==3.2.1         # via -r requirements-dev.in, itermplot
 55 | mccabe==0.6.1             # via pylint
 56 | mistune==0.8.4            # via nbconvert
 57 | more-itertools==8.2.0     # via pytest
 58 | mypy-extensions==0.4.3    # via mypy
 59 | mypy==0.770               # via -r requirements-dev.in
 60 | nbconvert==5.6.1          # via notebook
 61 | nbformat==5.0.4           # via nbconvert, notebook
 62 | nltk==3.4.5               # via -r requirements-dev.in
 63 | notebook==6.0.3           # via jupyterlab, jupyterlab-server
 64 | numpy==1.18.2             # via -c requirements.txt, itermplot, matplotlib, scipy
 65 | nvidia-ml-py3==7.352.0    # via gpustat, wandb
 66 | packaging==20.3           # via dparse, pytest, safety
 67 | pandocfilters==1.4.2      # via nbconvert
 68 | parso==0.6.2              # via jedi
 69 | pathspec==0.7.0           # via black
 70 | pathtools==0.1.2          # via watchdog
 71 | pbr==5.4.4                # via stevedore
 72 | pexpect==4.8.0            # via ipython
 73 | pickleshare==0.7.5        # via ipython
 74 | pillow==7.0.0             # via -r requirements-dev.in
 75 | pipenv==2018.11.26        # via dparse
 76 | pluggy==0.13.1            # via pytest
 77 | prometheus-client==0.7.1  # via notebook
 78 | promise==2.3              # via gql, graphql-core
 79 | prompt-toolkit==3.0.5     # via ipython
 80 | psutil==5.7.0             # via gpustat, wandb
 81 | ptyprocess==0.6.0         # via pexpect, terminado
 82 | py==1.8.1                 # via pytest
 83 | pycodestyle==2.5.0        # via -r requirements-dev.in
 84 | pydocstyle==5.0.2         # via -r requirements-dev.in
 85 | pygments==2.6.1           # via ipython, nbconvert
 86 | pylint==2.4.4             # via -r requirements-dev.in
 87 | pyparsing==2.4.6          # via matplotlib, packaging
 88 | pyrsistent==0.16.0        # via jsonschema
 89 | pytest==5.4.1             # via -r requirements-dev.in
 90 | python-dateutil==2.8.1    # via jupyter-client, matplotlib, wandb
 91 | pyyaml==5.3.1             # via -r requirements-dev.in, bandit, dparse, wandb
 92 | pyzmq==19.0.0             # via jupyter-client, notebook
 93 | redis==3.4.1              # via redlock-py
 94 | redlock-py==1.0.8         # via -r requirements-dev.in
 95 | regex==2020.2.20          # via black
 96 | requests==2.23.0          # via -c requirements.txt, gql, grequests, safety, wandb
 97 | safety==1.8.7             # via -r requirements-dev.in
 98 | scipy==1.4.1              # via -c requirements.txt, -r requirements-dev.in
 99 | send2trash==1.5.0         # via notebook
100 | sentry-sdk==0.14.3        # via wandb
101 | shortuuid==1.0.1          # via wandb
102 | six==1.14.0               # via -c requirements.txt, astroid, bandit, bleach, blessings, cycler, docker-pycreds, gpustat, gql, graphql-core, itermplot, jsonschema, nltk, packaging, promise, pyrsistent, python-dateutil, stevedore, traitlets, virtualenv, wandb
103 | smmap==3.0.1              # via gitdb
104 | snowballstemmer==2.0.0    # via pydocstyle
105 | stevedore==1.32.0         # via bandit
106 | subprocess32==3.5.4       # via wandb
107 | terminado==0.8.3          # via notebook
108 | testpath==0.4.4           # via nbconvert
109 | toml==0.10.0              # via -c requirements.txt, black, dparse
110 | tornado==6.0.4            # via -r requirements-dev.in, ipykernel, jupyter-client, jupyterlab, notebook, terminado
111 | traitlets==4.3.3          # via ipykernel, ipython, jupyter-client, jupyter-core, nbconvert, nbformat, notebook
112 | typed-ast==1.4.1          # via astroid, black, mypy
113 | typing-extensions==3.7.4.1  # via mypy
114 | urllib3==1.25.8           # via -c requirements.txt, requests, sentry-sdk
115 | virtualenv-clone==0.5.4   # via pipenv
116 | virtualenv==20.0.15       # via pipenv
117 | wandb==0.8.31             # via -r requirements-dev.in
118 | watchdog==0.10.2          # via wandb
119 | wcwidth==0.1.9            # via prompt-toolkit, pytest
120 | webencodings==0.5.1       # via bleach
121 | wrapt==1.11.2             # via -c requirements.txt, astroid
122 | zipp==3.1.0               # via importlib-metadata
123 | 
124 | # The following packages are considered to be unsafe in a requirements file:
125 | # pip
126 | # setuptools
127 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
 1 | boltons
 2 | editdistance
 3 | flask
 4 | h5py
 5 | numpy
 6 | opencv-python-headless
 7 | requests
 8 | tensorflow==2.2.0rc2
 9 | toml
10 | tqdm
11 | wrapt==1.11.* # due to pylint
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | absl-py==0.9.0            # via tensorboard, tensorflow
 8 | astunparse==1.6.3         # via tensorflow
 9 | boltons==20.0.0           # via -r requirements.in
10 | cachetools==4.0.0         # via google-auth
11 | certifi==2019.11.28       # via requests
12 | chardet==3.0.4            # via requests
13 | click==7.1.1              # via flask
14 | editdistance==0.5.3       # via -r requirements.in
15 | flask==1.1.1              # via -r requirements.in
16 | gast==0.3.3               # via tensorflow
17 | google-auth-oauthlib==0.4.1  # via tensorboard
18 | google-auth==1.12.0       # via google-auth-oauthlib, tensorboard
19 | google-pasta==0.2.0       # via tensorflow
20 | grpcio==1.27.2            # via tensorboard, tensorflow
21 | h5py==2.10.0              # via -r requirements.in, tensorflow
22 | idna==2.9                 # via requests
23 | itsdangerous==1.1.0       # via flask
24 | jinja2==2.11.1            # via flask
25 | keras-preprocessing==1.1.0  # via tensorflow
26 | markdown==3.2.1           # via tensorboard
27 | markupsafe==1.1.1         # via jinja2
28 | numpy==1.18.2             # via -r requirements.in, h5py, keras-preprocessing, opencv-python-headless, opt-einsum, scipy, tensorboard, tensorflow
29 | oauthlib==3.1.0           # via requests-oauthlib
30 | opencv-python-headless==4.2.0.32  # via -r requirements.in
31 | opt-einsum==3.2.0         # via tensorflow
32 | protobuf==3.11.3          # via tensorboard, tensorflow
33 | pyasn1-modules==0.2.8     # via google-auth
34 | pyasn1==0.4.8             # via pyasn1-modules, rsa
35 | requests-oauthlib==1.3.0  # via google-auth-oauthlib
36 | requests==2.23.0          # via -r requirements.in, requests-oauthlib, tensorboard
37 | rsa==4.0                  # via google-auth
38 | scipy==1.4.1              # via tensorflow
39 | six==1.14.0               # via absl-py, astunparse, google-auth, google-pasta, grpcio, h5py, keras-preprocessing, protobuf, tensorboard, tensorflow
40 | tensorboard-plugin-wit==1.6.0.post2  # via tensorboard
41 | tensorboard==2.2.0        # via tensorflow
42 | tensorflow-estimator==2.2.0rc0  # via tensorflow
43 | tensorflow==2.2.0rc2      # via -r requirements.in
44 | termcolor==1.1.0          # via tensorflow
45 | toml==0.10.0              # via -r requirements.in
46 | tqdm==4.44.1              # via -r requirements.in
47 | urllib3==1.25.8           # via requests
48 | werkzeug==1.0.0           # via flask, tensorboard
49 | wheel==0.34.2             # via astunparse, tensorboard, tensorflow
50 | wrapt==1.11.2             # via -r requirements.in, tensorflow
51 | 
52 | # The following packages are considered to be unsafe in a requirements file:
53 | # setuptools
54 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [pycodestyle]
 2 | max-line-length = 120
 3 | ignore = E203,W503
 4 | 
 5 | [pydocstyle]
 6 | convention = numpy
 7 | add-ignore = D102,D103,D104,D105,D200,D205,D400
 8 | 
 9 | [mypy]
10 | ignore_missing_imports = True
11 | 
12 | [tool:pytest]
13 | addopts = --doctest-modules
14 | 


--------------------------------------------------------------------------------
/tasks/build_api_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sed 's/tensorflow==/tensorflow-cpu==/' requirements.txt > api/requirements.txt
4 | 
5 | docker build -t text_recognizer_api -f api/Dockerfile .
6 | 


--------------------------------------------------------------------------------
/tasks/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | find . -name "__pycache__" -exec rm -r {} \;
4 | 


--------------------------------------------------------------------------------
/tasks/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | black .
4 | 


--------------------------------------------------------------------------------
/tasks/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -uo pipefail
 3 | set +e
 4 | 
 5 | FAILURE=false
 6 | 
 7 | echo "safety"
 8 | safety check -r requirements.txt -r requirements-dev.txt || FAILURE=true
 9 | 
10 | echo "pylint"
11 | pylint api text_recognizer training || FAILURE=true
12 | 
13 | echo "pycodestyle"
14 | pycodestyle api text_recognizer training || FAILURE=true
15 | 
16 | echo "pydocstyle"
17 | pydocstyle api text_recognizer training || FAILURE=true
18 | 
19 | echo "mypy"
20 | mypy api text_recognizer training || FAILURE=true
21 | 
22 | echo "bandit"
23 | bandit -ll -r {api,text_recognizer,training} || FAILURE=true
24 | 
25 | echo "shellcheck"
26 | shellcheck tasks/*.sh || FAILURE=true
27 | 
28 | if [ "$FAILURE" = true ]; then
29 |   echo "Linting failed"
30 |   exit 1
31 | fi
32 | echo "Linting passed"
33 | exit 0
34 | 


--------------------------------------------------------------------------------
/tasks/prepare_sample_experiments.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python training/prepare_experiments.py training/experiments/sample.json
3 | 


--------------------------------------------------------------------------------
/tasks/run_api_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker run -p 8000:8000 --name api -it --rm text_recognizer_api
3 | 


--------------------------------------------------------------------------------
/tasks/sync_requirements.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip-sync requirements-dev.txt requirements.txt
4 | 


--------------------------------------------------------------------------------
/tasks/test_api.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pytest -s api
3 | 


--------------------------------------------------------------------------------
/tasks/test_functionality.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pytest -s text_recognizer
3 | 


--------------------------------------------------------------------------------
/tasks/test_validation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pytest -s evaluation/evaluate*
3 | 


--------------------------------------------------------------------------------
/tasks/train_character_predictor.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}'
3 | 


--------------------------------------------------------------------------------
/tasks/train_line_detector.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python training/run_experiment.py --gpu=1 --save '{"dataset": "IamParagraphsDataset", "model": "LineDetectorModel", "network": "fcn", "train_args": {"batch_size": 16, "epochs": 32}}'
3 | 


--------------------------------------------------------------------------------
/tasks/train_lstm_line_predictor.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}'
3 | 


--------------------------------------------------------------------------------
/tasks/train_lstm_line_predictor_on_iam.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}'
3 | 


--------------------------------------------------------------------------------
/tasks/update_fsdl_paragraphs_metadata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python training/update_metadata.py data/raw/fsdl_handwriting/metadata.toml
4 | 


--------------------------------------------------------------------------------
/tasks/update_requirements.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip-compile -v requirements.in && pip-compile -v requirements-dev.in
4 | 


--------------------------------------------------------------------------------
/text_recognizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer/a99a3d3f0594dfceb249a56e8362337f9e12897e/text_recognizer/__init__.py


--------------------------------------------------------------------------------
/text_recognizer/character_predictor.py:
--------------------------------------------------------------------------------
 1 | """CharacterPredictor class"""
 2 | from typing import Tuple, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | from text_recognizer.models import CharacterModel
 7 | import text_recognizer.util as util
 8 | 
 9 | 
10 | class CharacterPredictor:
11 |     """Given an image of a single handwritten character, recognizes it."""
12 | 
13 |     def __init__(self):
14 |         self.model = CharacterModel()
15 |         self.model.load_weights()
16 | 
17 |     def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]:
18 |         """Predict on a single image."""
19 |         if isinstance(image_or_filename, str):
20 |             image = util.read_image(image_or_filename, grayscale=True)
21 |         else:
22 |             image = image_or_filename
23 |         return self.model.predict_on_image(image)
24 | 
25 |     def evaluate(self, dataset):
26 |         """Evaluate on a dataset."""
27 |         return self.model.evaluate(dataset.x_test, dataset.y_test)
28 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | """Dataset modules."""
 2 | from .emnist_dataset import EmnistDataset
 3 | 
 4 | # Hide lines below until Lab 2
 5 | from .emnist_lines_dataset import EmnistLinesDataset
 6 | 
 7 | # Hide lines above until Lab 2
 8 | # Hide lines below until Lab 4
 9 | from .iam_lines_dataset import IamLinesDataset
10 | 
11 | # Hide lines above until Lab 4
12 | # Hide lines below until Lab 5
13 | from .iam_dataset import IamDataset
14 | from .iam_paragraphs_dataset import IamParagraphsDataset
15 | 
16 | # Hide lines above until Lab 5
17 | # Hide lines below until Lab 6
18 | from .fsdl_handwriting_dataset import FsdlHandwritingDataset
19 | 
20 | # Hide lines above until Lab 6
21 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/dataset.py:
--------------------------------------------------------------------------------
 1 | """Dataset class to be extended by dataset-specific classes."""
 2 | from pathlib import Path
 3 | import argparse
 4 | import os
 5 | 
 6 | from text_recognizer import util
 7 | 
 8 | 
 9 | class Dataset:
10 |     """Simple abstract class for datasets."""
11 | 
12 |     @classmethod
13 |     def data_dirname(cls):
14 |         return Path(__file__).resolve().parents[2] / "data"
15 | 
16 |     def load_or_generate_data(self):
17 |         pass
18 | 
19 | 
20 | def _download_raw_dataset(metadata):
21 |     if os.path.exists(metadata["filename"]):
22 |         return
23 |     print(f"Downloading raw dataset from {metadata['url']}...")
24 |     util.download_url(metadata["url"], metadata["filename"])
25 |     print("Computing SHA-256...")
26 |     sha256 = util.compute_sha256(metadata["filename"])
27 |     if sha256 != metadata["sha256"]:
28 |         raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.")
29 | 
30 | 
31 | def _parse_args():
32 |     parser = argparse.ArgumentParser()
33 |     parser.add_argument(
34 |         "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.",
35 |     )
36 |     return parser.parse_args()
37 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/dataset_sequence.py:
--------------------------------------------------------------------------------
 1 | """DatasetSequence class."""
 2 | import numpy as np
 3 | from tensorflow.keras.utils import Sequence
 4 | 
 5 | 
 6 | def _shuffle(x, y):
 7 |     """Shuffle x and y maintaining their association."""
 8 |     shuffled_indices = np.random.permutation(x.shape[0])
 9 |     return x[shuffled_indices], y[shuffled_indices]
10 | 
11 | 
12 | class DatasetSequence(Sequence):
13 |     """
14 |     Minimal implementation of https://keras.io/utils/#sequence.
15 |     """
16 | 
17 |     def __init__(self, x, y, batch_size=32, augment_fn=None, format_fn=None):
18 |         self.x = x
19 |         self.y = y
20 |         self.batch_size = batch_size
21 |         self.augment_fn = augment_fn
22 |         self.format_fn = format_fn
23 | 
24 |     def __len__(self):
25 |         """Return length of the dataset."""
26 |         return int(np.ceil(len(self.x) / float(self.batch_size)))
27 | 
28 |     def __getitem__(self, idx):
29 |         """Return a single batch."""
30 |         # idx = 0  # If you want to intentionally overfit to just one batch
31 |         begin = idx * self.batch_size
32 |         end = (idx + 1) * self.batch_size
33 | 
34 |         # batch_x = np.take(self.x, range(begin, end), axis=0, mode='clip')
35 |         # batch_y = np.take(self.y, range(begin, end), axis=0, mode='clip')
36 | 
37 |         batch_x = self.x[begin:end]
38 |         batch_y = self.y[begin:end]
39 | 
40 |         if batch_x.dtype == np.uint8:
41 |             batch_x = (batch_x / 255).astype(np.float32)
42 | 
43 |         if self.augment_fn:
44 |             batch_x, batch_y = self.augment_fn(batch_x, batch_y)
45 | 
46 |         if self.format_fn:
47 |             batch_x, batch_y = self.format_fn(batch_x, batch_y)
48 | 
49 |         return batch_x, batch_y
50 | 
51 |     def on_epoch_end(self) -> None:
52 |         """Shuffle data."""
53 |         self.x, self.y = _shuffle(self.x, self.y)
54 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/emnist_essentials.json:
--------------------------------------------------------------------------------
1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]}


--------------------------------------------------------------------------------
/text_recognizer/datasets/emnist_lines_dataset.py:
--------------------------------------------------------------------------------
  1 | """Emnist Lines dataset: synthetic handwriting lines dataset made from EMNIST characters."""
  2 | from collections import defaultdict
  3 | from pathlib import Path
  4 | 
  5 | import h5py
  6 | import numpy as np
  7 | from tensorflow.keras.utils import to_categorical
  8 | 
  9 | from text_recognizer.datasets.dataset import Dataset
 10 | from text_recognizer.datasets.emnist_dataset import EmnistDataset
 11 | 
 12 | 
 13 | DATA_DIRNAME = Dataset.data_dirname() / "processed" / "emnist_lines"
 14 | ESSENTIALS_FILENAME = Path(__file__).parents[0].resolve() / "emnist_lines_essentials.json"
 15 | 
 16 | 
 17 | class EmnistLinesDataset(Dataset):
 18 |     """
 19 |     EmnistLinesDataset class.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     max_length
 24 |         Max line length in characters.
 25 |     max_overlap
 26 |         Max overlap between characters in a line.
 27 |     num_train
 28 |         Number of training examples to generate.
 29 |     num_test
 30 |         Number of test examples to generate.
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         max_length: int = 34,
 36 |         min_overlap: float = 0,
 37 |         max_overlap: float = 0.33,
 38 |         num_train: int = 10000,
 39 |         num_test: int = 1000,
 40 |     ):
 41 |         self.emnist = EmnistDataset()
 42 |         self.mapping = self.emnist.mapping
 43 |         self.max_length = max_length
 44 |         self.min_overlap = min_overlap
 45 |         self.max_overlap = max_overlap
 46 |         self.num_classes = len(self.mapping)
 47 |         self.input_shape = (
 48 |             self.emnist.input_shape[0],
 49 |             self.emnist.input_shape[1] * self.max_length,
 50 |         )
 51 |         self.output_shape = (self.max_length, self.num_classes)
 52 |         self.num_train = num_train
 53 |         self.num_test = num_test
 54 |         self.x_train = None
 55 |         self.y_train = None
 56 |         self.x_test = None
 57 |         self.y_test = None
 58 | 
 59 |     @property
 60 |     def data_filename(self):
 61 |         return (
 62 |             DATA_DIRNAME
 63 |             / f"ml_{self.max_length}_o{self.min_overlap}_{self.max_overlap}_ntr{self.num_train}_nte{self.num_test}.h5"
 64 |         )
 65 | 
 66 |     def load_or_generate_data(self):
 67 |         np.random.seed(42)
 68 | 
 69 |         if not self.data_filename.exists():
 70 |             self._generate_data("train")
 71 |             self._generate_data("test")
 72 |         self._load_data()
 73 | 
 74 |     def __repr__(self):
 75 |         return (
 76 |             "EMNIST Lines Dataset\n"  # pylint: disable=no-member
 77 |             f"Max length: {self.max_length}\n"
 78 |             f"Min overlap: {self.min_overlap}\n"
 79 |             f"Max overlap: {self.max_overlap}\n"
 80 |             f"Num classes: {self.num_classes}\n"
 81 |             f"Input shape: {self.input_shape}\n"
 82 |             f"Train: {self.x_train.shape} {self.y_train.shape}\n"
 83 |             f"Test: {self.x_test.shape} {self.y_test.shape}\n"
 84 |         )
 85 | 
 86 |     def _load_data(self):
 87 |         print("EmnistLinesDataset loading data from HDF5...")
 88 |         with h5py.File(self.data_filename, "r") as f:
 89 |             self.x_train = f["x_train"][:]
 90 |             self.y_train = f["y_train"][:]
 91 |             self.x_test = f["x_test"][:]
 92 |             self.y_test = f["y_test"][:]
 93 | 
 94 |     def _generate_data(self, split):
 95 |         print("EmnistLinesDataset generating data...")
 96 | 
 97 |         # pylint: disable=import-outside-toplevel
 98 |         from text_recognizer.datasets.sentence_generator import SentenceGenerator
 99 | 
100 |         sentence_generator = SentenceGenerator(self.max_length)
101 | 
102 |         emnist = self.emnist
103 |         emnist.load_or_generate_data()
104 |         if split == "train":
105 |             samples_by_char = get_samples_by_char(emnist.x_train, emnist.y_train_int, emnist.mapping)
106 |         else:
107 |             samples_by_char = get_samples_by_char(emnist.x_test, emnist.y_test_int, emnist.mapping)
108 | 
109 |         num = self.num_train if split == "train" else self.num_test
110 | 
111 |         DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
112 |         with h5py.File(self.data_filename, "a") as f:
113 |             x, y = create_dataset_of_images(
114 |                 num, samples_by_char, sentence_generator, self.min_overlap, self.max_overlap
115 |             )
116 |             y = convert_strings_to_categorical_labels(y, emnist.inverse_mapping)
117 |             f.create_dataset(f"x_{split}", data=x, dtype="u1", compression="lzf")
118 |             f.create_dataset(f"y_{split}", data=y, dtype="u1", compression="lzf")
119 | 
120 | 
121 | def get_samples_by_char(samples, labels, mapping):
122 |     samples_by_char = defaultdict(list)
123 |     for sample, label in zip(samples, labels.flatten()):
124 |         samples_by_char[mapping[label]].append(sample)
125 |     return samples_by_char
126 | 
127 | 
128 | def select_letter_samples_for_string(string, samples_by_char):
129 |     zero_image = np.zeros((28, 28), np.uint8)
130 |     sample_image_by_char = {}
131 |     for char in string:
132 |         if char in sample_image_by_char:
133 |             continue
134 |         samples = samples_by_char[char]
135 |         sample = samples[np.random.choice(len(samples))] if samples else zero_image
136 |         sample_image_by_char[char] = sample.reshape(28, 28)
137 |     return [sample_image_by_char[char] for char in string]
138 | 
139 | 
140 | def construct_image_from_string(
141 |     string: str, samples_by_char: dict, min_overlap: float, max_overlap: float
142 | ) -> np.ndarray:
143 |     overlap = np.random.uniform(min_overlap, max_overlap)
144 |     sampled_images = select_letter_samples_for_string(string, samples_by_char)
145 |     N = len(sampled_images)
146 |     H, W = sampled_images[0].shape
147 |     next_overlap_width = W - int(overlap * W)
148 |     concatenated_image = np.zeros((H, W * N), np.uint8)
149 |     x = 0
150 |     for image in sampled_images:
151 |         concatenated_image[:, x : (x + W)] += image
152 |         x += next_overlap_width
153 |     return np.minimum(255, concatenated_image)
154 | 
155 | 
156 | def create_dataset_of_images(N, samples_by_char, sentence_generator, min_overlap, max_overlap):
157 |     sample_label = sentence_generator.generate()
158 |     sample_image = construct_image_from_string(sample_label, samples_by_char, 0, 0)  # sample_image has 0 overlap
159 |     images = np.zeros(
160 |         (N, sample_image.shape[0], sample_image.shape[1]), np.uint8,  # pylint: disable=unsubscriptable-object
161 |     )
162 |     labels = []
163 |     for n in range(N):
164 |         label = None
165 |         for _ in range(10):  # Try several times to generate before actually erroring
166 |             try:
167 |                 label = sentence_generator.generate()
168 |                 break
169 |             except Exception:  # pylint: disable=broad-except
170 |                 pass
171 |         if label is None:
172 |             raise RuntimeError("Was not able to generate a valid string")
173 |         images[n] = construct_image_from_string(label, samples_by_char, min_overlap, max_overlap)
174 |         labels.append(label)
175 |     return images, labels
176 | 
177 | 
178 | def convert_strings_to_categorical_labels(labels, mapping):
179 |     return np.array([to_categorical([mapping[c] for c in label], num_classes=len(mapping)) for label in labels])
180 | 
181 | 
182 | def main():
183 |     dataset = EmnistLinesDataset()
184 |     dataset.load_or_generate_data()
185 |     print(dataset)
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     main()
190 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/fsdl_handwriting_dataset.py:
--------------------------------------------------------------------------------
  1 | """Class for loading our own FSDL Handwriting dataset, which encompasses both paragraphs and lines."""
  2 | import json
  3 | 
  4 | import numpy as np
  5 | import toml
  6 | 
  7 | from text_recognizer import util
  8 | from text_recognizer.datasets.dataset import Dataset
  9 | 
 10 | 
 11 | RAW_DATA_DIRNAME = Dataset.data_dirname() / "raw" / "fsdl_handwriting"
 12 | METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
 13 | PAGES_DIRNAME = RAW_DATA_DIRNAME / "pages"
 14 | 
 15 | 
 16 | class FsdlHandwritingDataset(Dataset):
 17 |     """
 18 |     FSDL Handwriting dataset gathered in class.
 19 |     """
 20 | 
 21 |     def __init__(self):
 22 |         self.metadata = toml.load(METADATA_FILENAME)
 23 |         with open(RAW_DATA_DIRNAME / self.metadata["filename"]) as f:
 24 |             page_data = [json.loads(line) for line in f.readlines()]
 25 |         # NOTE: pylint bug https://github.com/PyCQA/pylint/issues/3164
 26 |         # pylint: disable=unnecessary-comprehension
 27 |         self.data_by_page_id = {
 28 |             id_: data for id_, data in (_extract_id_and_data(page_datum) for page_datum in page_data)
 29 |         }
 30 |         # pylint: enable=unnecessary-comprehension
 31 | 
 32 |     def load_or_generate_data(self):
 33 |         if len(self.page_filenames) < len(self.data_by_page_id):
 34 |             self._download_pages()
 35 | 
 36 |     @property
 37 |     def page_filenames(self):
 38 |         return list(PAGES_DIRNAME.glob("*.jpg"))
 39 | 
 40 |     def _download_pages(self):
 41 |         PAGES_DIRNAME.mkdir(exist_ok=True, parents=True)
 42 |         ids, urls = zip(*[(id_, data["url"]) for id_, data in self.data_by_page_id.items()])
 43 |         filenames = [PAGES_DIRNAME / id_ for id_ in ids]
 44 |         util.download_urls(urls, filenames)
 45 | 
 46 |     @property
 47 |     def line_regions_by_id(self):
 48 |         """Return a dict from name of IAM form to a list of (x1, x2, y1, y2) coordinates of all lines in it."""
 49 |         return {id_: data["regions"] for id_, data in self.data_by_page_id.items()}
 50 | 
 51 |     @property
 52 |     def line_strings_by_id(self):
 53 |         """Return a dict from name of image to a list of strings."""
 54 |         return {id_: data["strings"] for id_, data in self.data_by_page_id.items()}
 55 | 
 56 |     def __repr__(self):
 57 |         return "FSDH Handwriting Dataset\n" f"Num pages: {len(self.data_by_page_id)}\n"
 58 | 
 59 | 
 60 | def _extract_id_and_data(page_datum):
 61 |     """
 62 |     page_datum is of the form
 63 |         {
 64 |             'label': ['line'],
 65 |             'shape': 'rectangle',
 66 |             'points': [
 67 |                 [0.1422924901185771, 0.18948824343015216],
 68 |                 [0.875494071146245, 0.18948824343015216],
 69 |                 [0.875494071146245, 0.25034578146611347],
 70 |                 [0.1422924901185771, 0.25034578146611347]
 71 |             ],
 72 |             'notes': 'A MOVE to stop Mr. Gaitskiell from',
 73 |             'imageWidth': 1240,
 74 |             'imageHeight': 1771
 75 |         }
 76 |     """
 77 |     url = page_datum["content"]
 78 |     id_ = url.split("/")[-1]
 79 |     regions = []
 80 |     strings = []
 81 |     try:
 82 |         for annotation in page_datum["annotation"]:
 83 |             points = np.array(annotation["points"])
 84 |             x1, y1 = points.min(0)
 85 |             x2, y2 = points.max(0)
 86 |             regions.append(
 87 |                 {
 88 |                     "x1": int(x1 * annotation["imageWidth"]),
 89 |                     "y1": int(y1 * annotation["imageHeight"]),
 90 |                     "x2": int(x2 * annotation["imageWidth"]),
 91 |                     "y2": int(y2 * annotation["imageHeight"]),
 92 |                 }
 93 |             )
 94 |             strings.append(annotation["notes"])
 95 |     except Exception:  # pylint: disable=broad-except
 96 |         pass
 97 |     return id_, {"url": url, "regions": regions, "strings": strings}
 98 | 
 99 | 
100 | def main():
101 |     dataset = FsdlHandwritingDataset()
102 |     dataset.load_or_generate_data()
103 |     print(dataset)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main()
108 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/iam_dataset.py:
--------------------------------------------------------------------------------
  1 | """Class for loading the IAM dataset, which encompasses both paragraphs and lines, with associated utilities."""
  2 | import os
  3 | from typing import Dict, List
  4 | import xml.etree.ElementTree as ElementTree
  5 | import zipfile
  6 | 
  7 | from boltons.cacheutils import cachedproperty
  8 | import toml
  9 | 
 10 | from text_recognizer.datasets.dataset import Dataset, _download_raw_dataset
 11 | 
 12 | 
 13 | RAW_DATA_DIRNAME = Dataset.data_dirname() / "raw" / "iam"
 14 | METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
 15 | EXTRACTED_DATASET_DIRNAME = RAW_DATA_DIRNAME / "iamdb"
 16 | 
 17 | DOWNSAMPLE_FACTOR = 2  # If images were downsampled, the regions must also be.
 18 | LINE_REGION_PADDING = 0  # add this many pixels around the exact coordinates
 19 | 
 20 | 
 21 | class IamDataset(Dataset):
 22 |     """
 23 |     "The IAM Lines dataset, first published at the ICDAR 1999, contains forms of unconstrained handwritten text,
 24 |     which were scanned at a resolution of 300dpi and saved as PNG images with 256 gray levels.
 25 |     From http://www.fki.inf.unibe.ch/databases/iam-handwriting-database
 26 | 
 27 |     The data split we will use is
 28 |     IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines.
 29 |         The validation set has been merged into the train set.
 30 |         The train set has 7,101 lines from 326 writers.
 31 |         The test set has 1,861 lines from 128 writers.
 32 |         The text lines of all data sets are mutually exclusive, thus each writer has contributed to one set only.
 33 |     """
 34 | 
 35 |     def __init__(self):
 36 |         self.metadata = toml.load(METADATA_FILENAME)
 37 | 
 38 |     def load_or_generate_data(self):
 39 |         if not self.xml_filenames:
 40 |             self._download_iam()
 41 | 
 42 |     @property
 43 |     def xml_filenames(self):
 44 |         return list((EXTRACTED_DATASET_DIRNAME / "xml").glob("*.xml"))
 45 | 
 46 |     @property
 47 |     def form_filenames(self):
 48 |         return list((EXTRACTED_DATASET_DIRNAME / "forms").glob("*.jpg"))
 49 | 
 50 |     def _download_iam(self):
 51 |         curdir = os.getcwd()
 52 |         os.chdir(RAW_DATA_DIRNAME)
 53 |         _download_raw_dataset(self.metadata)
 54 |         _extract_raw_dataset(self.metadata)
 55 |         os.chdir(curdir)
 56 | 
 57 |     @property
 58 |     def form_filenames_by_id(self):
 59 |         return {filename.stem: filename for filename in self.form_filenames}
 60 | 
 61 |     @cachedproperty
 62 |     def line_strings_by_id(self):
 63 |         """Return a dict from name of IAM form to a list of line texts in it."""
 64 |         return {filename.stem: _get_line_strings_from_xml_file(filename) for filename in self.xml_filenames}
 65 | 
 66 |     @cachedproperty
 67 |     def line_regions_by_id(self):
 68 |         """Return a dict from name of IAM form to a list of (x1, x2, y1, y2) coordinates of all lines in it."""
 69 |         return {filename.stem: _get_line_regions_from_xml_file(filename) for filename in self.xml_filenames}
 70 | 
 71 |     def __repr__(self):
 72 |         """Print info about the dataset."""
 73 |         return "IAM Dataset\n" f"Num forms: {len(self.xml_filenames)}\n"
 74 | 
 75 | 
 76 | def _extract_raw_dataset(metadata):
 77 |     print("Extracting IAM data")
 78 |     with zipfile.ZipFile(metadata["filename"], "r") as zip_file:
 79 |         zip_file.extractall()
 80 | 
 81 | 
 82 | def _get_line_strings_from_xml_file(filename: str) -> List[str]:
 83 |     """Get the text content of each line. Note that we replace &quot; with "."""
 84 |     xml_root_element = ElementTree.parse(filename).getroot()  # nosec
 85 |     xml_line_elements = xml_root_element.findall("handwritten-part/line")
 86 |     return [el.attrib["text"].replace("&quot;", '"') for el in xml_line_elements]
 87 | 
 88 | 
 89 | def _get_line_regions_from_xml_file(filename: str) -> List[Dict[str, int]]:
 90 |     """Get the line region dict for each line."""
 91 |     xml_root_element = ElementTree.parse(filename).getroot()  # nosec
 92 |     xml_line_elements = xml_root_element.findall("handwritten-part/line")
 93 |     return [_get_line_region_from_xml_element(el) for el in xml_line_elements]
 94 | 
 95 | 
 96 | def _get_line_region_from_xml_element(xml_line) -> Dict[str, int]:
 97 |     """
 98 |     Parameters
 99 |     ----------
100 |     xml_line
101 |         xml element that has x, y, width, and height attributes
102 |     """
103 |     word_elements = xml_line.findall("word/cmp")
104 |     x1s = [int(el.attrib["x"]) for el in word_elements]
105 |     y1s = [int(el.attrib["y"]) for el in word_elements]
106 |     x2s = [int(el.attrib["x"]) + int(el.attrib["width"]) for el in word_elements]
107 |     y2s = [int(el.attrib["y"]) + int(el.attrib["height"]) for el in word_elements]
108 |     return {
109 |         "x1": min(x1s) // DOWNSAMPLE_FACTOR - LINE_REGION_PADDING,
110 |         "y1": min(y1s) // DOWNSAMPLE_FACTOR - LINE_REGION_PADDING,
111 |         "x2": max(x2s) // DOWNSAMPLE_FACTOR + LINE_REGION_PADDING,
112 |         "y2": max(y2s) // DOWNSAMPLE_FACTOR + LINE_REGION_PADDING,
113 |     }
114 | 
115 | 
116 | def main():
117 |     dataset = IamDataset()
118 |     dataset.load_or_generate_data()
119 |     print(dataset)
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     main()
124 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/iam_lines_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | IamLinesDataset class.
 3 | 
 4 | We will use a processed version of this dataset, without including code that did the processing.
 5 | We will look at how to generate processed data from raw IAM data in the IamParagraphsDataset.
 6 | """
 7 | 
 8 | from boltons.cacheutils import cachedproperty
 9 | import h5py
10 | from tensorflow.keras.utils import to_categorical
11 | 
12 | from text_recognizer import util
13 | from text_recognizer.datasets.dataset import Dataset, _parse_args
14 | from text_recognizer.datasets.emnist_dataset import EmnistDataset
15 | 
16 | 
17 | PROCESSED_DATA_DIRNAME = Dataset.data_dirname() / "processed" / "iam_lines"
18 | PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "iam_lines.h5"
19 | PROCESSED_DATA_URL = "https://s3-us-west-2.amazonaws.com/fsdl-public-assets/iam_lines.h5"
20 | 
21 | 
22 | class IamLinesDataset(Dataset):
23 |     """
24 | 
25 |     Note that we use cachedproperty because data takes time to load.
26 |     """
27 | 
28 |     def __init__(self, subsample_fraction: float = None):
29 |         self.mapping = EmnistDataset().mapping
30 |         self.inverse_mapping = {v: k for k, v in self.mapping.items()}
31 |         self.num_classes = len(self.mapping)
32 |         self.input_shape = (28, 952)
33 |         self.output_shape = (97, self.num_classes)
34 | 
35 |         self.subsample_fraction = subsample_fraction
36 |         self.x_train = None
37 |         self.x_test = None
38 |         self.y_train_int = None
39 |         self.y_test_int = None
40 | 
41 |     def load_or_generate_data(self):
42 |         """Load or generate dataset data."""
43 |         if not PROCESSED_DATA_FILENAME.exists():
44 |             PROCESSED_DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
45 |             print("Downloading IAM lines...")
46 |             util.download_url(PROCESSED_DATA_URL, PROCESSED_DATA_FILENAME)
47 |         with h5py.File(PROCESSED_DATA_FILENAME, "r") as f:
48 |             self.x_train = f["x_train"][:]
49 |             self.y_train_int = f["y_train"][:]
50 |             self.x_test = f["x_test"][:]
51 |             self.y_test_int = f["y_test"][:]
52 |         self._subsample()
53 | 
54 |     def _subsample(self):
55 |         """Only this fraction of data will be loaded."""
56 |         if self.subsample_fraction is None:
57 |             return
58 |         num_train = int(self.x_train.shape[0] * self.subsample_fraction)
59 |         num_test = int(self.x_test.shape[0] * self.subsample_fraction)
60 |         self.x_train = self.x_train[:num_train]
61 |         self.y_train_int = self.y_train_int[:num_train]
62 |         self.x_test = self.x_test[:num_test]
63 |         self.y_test_int = self.y_test_int[:num_test]
64 | 
65 |     @cachedproperty
66 |     def y_train(self):
67 |         """Return y_train"""
68 |         return to_categorical(self.y_train_int, self.num_classes)
69 | 
70 |     @cachedproperty
71 |     def y_test(self):
72 |         """Return y_test"""
73 |         return to_categorical(self.y_test_int, self.num_classes)
74 | 
75 |     def __repr__(self):
76 |         """Print info about the dataset."""
77 |         return (
78 |             "IAM Lines Dataset\n"  # pylint: disable=no-member
79 |             f"Num classes: {self.num_classes}\n"
80 |             f"Mapping: {self.mapping}\n"
81 |             f"Train: {self.x_train.shape} {self.y_train.shape}\n"
82 |             f"Test: {self.x_test.shape} {self.y_test.shape}\n"
83 |         )
84 | 
85 | 
86 | def main():
87 |     """Load dataset and print info."""
88 |     args = _parse_args()
89 |     dataset = IamLinesDataset(subsample_fraction=args.subsample_fraction)
90 |     dataset.load_or_generate_data()
91 |     print(dataset)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/text_recognizer/datasets/sentence_generator.py:
--------------------------------------------------------------------------------
 1 | """SentenceGenerator class and supporting functions."""
 2 | import itertools
 3 | import re
 4 | import string
 5 | from typing import Optional
 6 | 
 7 | import nltk
 8 | import numpy as np
 9 | 
10 | from text_recognizer.datasets.dataset import Dataset
11 | 
12 | NLTK_DATA_DIRNAME = Dataset.data_dirname() / "raw" / "nltk"
13 | 
14 | 
15 | class SentenceGenerator:
16 |     """Generate text sentences using the Brown corpus."""
17 | 
18 |     def __init__(self, max_length: Optional[int] = None):
19 |         self.text = brown_text()
20 |         self.word_start_inds = [0] + [_.start(0) + 1 for _ in re.finditer(" ", self.text)]
21 |         self.max_length = max_length
22 | 
23 |     def generate(self, max_length: Optional[int] = None) -> str:
24 |         """
25 |         Sample a string from text of the Brown corpus of length at least one word and at most max_length,
26 |         padding it to max_length with the '_' character.
27 |         """
28 |         if max_length is None:
29 |             max_length = self.max_length
30 |         if max_length is None:
31 |             raise ValueError("Must provide max_length to this method or when making this object.")
32 | 
33 |         ind = np.random.randint(0, len(self.word_start_inds) - 1)
34 |         start_ind = self.word_start_inds[ind]
35 |         end_ind_candidates = []
36 |         for ind in range(ind + 1, len(self.word_start_inds)):
37 |             if self.word_start_inds[ind] - start_ind > max_length:
38 |                 break
39 |             end_ind_candidates.append(self.word_start_inds[ind])
40 |         end_ind = np.random.choice(end_ind_candidates)
41 |         sampled_text = self.text[start_ind:end_ind].strip()
42 |         padding = "_" * (max_length - len(sampled_text))
43 |         return sampled_text + padding
44 | 
45 | 
46 | def brown_text():
47 |     """Return a single string with the Brown corpus with all punctuation stripped."""
48 |     sents = load_nltk_brown_corpus()
49 |     text = " ".join(itertools.chain.from_iterable(sents))
50 |     text = text.translate({ord(c): None for c in string.punctuation})
51 |     text = re.sub("  +", " ", text)
52 |     return text
53 | 
54 | 
55 | def load_nltk_brown_corpus():
56 |     """Load the Brown corpus using the NLTK library."""
57 |     nltk.data.path.append(NLTK_DATA_DIRNAME)
58 |     try:
59 |         nltk.corpus.brown.sents()
60 |     except LookupError:
61 |         NLTK_DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
62 |         nltk.download("brown", download_dir=NLTK_DATA_DIRNAME)
63 |     return nltk.corpus.brown.sents()
64 | 


--------------------------------------------------------------------------------
/text_recognizer/line_predictor.py:
--------------------------------------------------------------------------------
 1 | """LinePredictor class"""
 2 | from typing import Tuple, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | from text_recognizer.models import LineModelCtc
 7 | from text_recognizer.datasets import EmnistLinesDataset
 8 | import text_recognizer.util as util
 9 | 
10 | 
11 | class LinePredictor:
12 |     """Given an image of a line of handwritten text, recognizes text contents."""
13 | 
14 |     def __init__(self, dataset_cls=EmnistLinesDataset):
15 |         self.model = LineModelCtc(dataset_cls=dataset_cls)
16 |         self.model.load_weights()
17 | 
18 |     def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]:
19 |         """Predict on a single image."""
20 |         if isinstance(image_or_filename, str):
21 |             image = util.read_image(image_or_filename, grayscale=True)
22 |         else:
23 |             image = image_or_filename
24 |         return self.model.predict_on_image(image)
25 | 
26 |     def evaluate(self, dataset):
27 |         """Evaluate on a dataset."""
28 |         return self.model.evaluate(dataset.x_test, dataset.y_test)
29 | 


--------------------------------------------------------------------------------
/text_recognizer/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """Model modules."""
 2 | from .character_model import CharacterModel
 3 | 
 4 | # Hide lines below until Lab 2
 5 | from .line_model import LineModel
 6 | 
 7 | # Hide lines above until Lab 2
 8 | 
 9 | # Hide lines below until Lab 3
10 | from .line_model_ctc import LineModelCtc
11 | 
12 | # Hide lines above until Lab 3
13 | 
14 | # Hide lines below until Lab 5
15 | from .line_detector_model import LineDetectorModel
16 | 
17 | # Hide lines above until Lab 5
18 | 


--------------------------------------------------------------------------------
/text_recognizer/models/base.py:
--------------------------------------------------------------------------------
  1 | """Model class, to be extended by specific types of models."""
  2 | # pylint: disable=missing-function-docstring
  3 | from pathlib import Path
  4 | from typing import Callable, Dict, Optional
  5 | 
  6 | from tensorflow.keras.models import Model as KerasModel
  7 | from tensorflow.keras.optimizers import RMSprop
  8 | import numpy as np
  9 | 
 10 | from text_recognizer.datasets.dataset_sequence import DatasetSequence
 11 | 
 12 | 
 13 | DIRNAME = Path(__file__).parents[1].resolve() / "weights"
 14 | 
 15 | 
 16 | class Model:
 17 |     """Base class, to be subclassed by predictors for specific type of data."""
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         dataset_cls: type,
 22 |         network_fn: Callable[..., KerasModel],
 23 |         dataset_args: Dict = None,
 24 |         network_args: Dict = None,
 25 |     ):
 26 |         self.name = f"{self.__class__.__name__}_{dataset_cls.__name__}_{network_fn.__name__}"
 27 | 
 28 |         if dataset_args is None:
 29 |             dataset_args = {}
 30 |         self.data = dataset_cls(**dataset_args)
 31 | 
 32 |         if network_args is None:
 33 |             network_args = {}
 34 |         self.network = network_fn(self.data.input_shape, self.data.output_shape, **network_args)
 35 |         self.network.summary()
 36 | 
 37 |         self.batch_augment_fn: Optional[Callable] = None
 38 |         self.batch_format_fn: Optional[Callable] = None
 39 | 
 40 |     @property
 41 |     def image_shape(self):
 42 |         return self.data.input_shape
 43 | 
 44 |     @property
 45 |     def weights_filename(self) -> str:
 46 |         DIRNAME.mkdir(parents=True, exist_ok=True)
 47 |         return str(DIRNAME / f"{self.name}_weights.h5")
 48 | 
 49 |     def fit(
 50 |         self, dataset, batch_size: int = 32, epochs: int = 10, augment_val: bool = True, callbacks: list = None,
 51 |     ):
 52 |         if callbacks is None:
 53 |             callbacks = []
 54 | 
 55 |         self.network.compile(loss=self.loss(), optimizer=self.optimizer(), metrics=self.metrics())
 56 | 
 57 |         train_sequence = DatasetSequence(
 58 |             dataset.x_train,
 59 |             dataset.y_train,
 60 |             batch_size,
 61 |             augment_fn=self.batch_augment_fn,
 62 |             format_fn=self.batch_format_fn,
 63 |         )
 64 |         test_sequence = DatasetSequence(
 65 |             dataset.x_test,
 66 |             dataset.y_test,
 67 |             batch_size,
 68 |             augment_fn=self.batch_augment_fn if augment_val else None,
 69 |             format_fn=self.batch_format_fn,
 70 |         )
 71 | 
 72 |         self.network.fit(
 73 |             train_sequence,
 74 |             epochs=epochs,
 75 |             callbacks=callbacks,
 76 |             validation_data=test_sequence,
 77 |             use_multiprocessing=False,
 78 |             workers=1,
 79 |             shuffle=True,
 80 |         )
 81 | 
 82 |     def evaluate(self, x: np.ndarray, y: np.ndarray, batch_size: int = 16, _verbose: bool = False):
 83 |         # pylint: disable=unused-argument
 84 |         sequence = DatasetSequence(x, y, batch_size=batch_size)  # Use a small batch size to use less memory
 85 |         preds = self.network.predict(sequence)
 86 |         return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))
 87 | 
 88 |     def loss(self):  # pylint: disable=no-self-use
 89 |         return "categorical_crossentropy"
 90 | 
 91 |     def optimizer(self):  # pylint: disable=no-self-use
 92 |         return RMSprop()
 93 | 
 94 |     def metrics(self):  # pylint: disable=no-self-use
 95 |         return ["accuracy"]
 96 | 
 97 |     def load_weights(self):
 98 |         self.network.load_weights(self.weights_filename)
 99 | 
100 |     def save_weights(self):
101 |         self.network.save_weights(self.weights_filename)
102 | 


--------------------------------------------------------------------------------
/text_recognizer/models/character_model.py:
--------------------------------------------------------------------------------
 1 | """CharacterModel class."""
 2 | from typing import Callable, Dict, Tuple
 3 | 
 4 | import numpy as np
 5 | 
 6 | from text_recognizer.models.base import Model
 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset
 8 | from text_recognizer.networks.mlp import mlp
 9 | 
10 | 
11 | class CharacterModel(Model):
12 |     """CharacterModel works on datasets providing images, with one-hot labels."""
13 | 
14 |     def __init__(
15 |         self,
16 |         dataset_cls: type = EmnistDataset,
17 |         network_fn: Callable = mlp,
18 |         dataset_args: Dict = None,
19 |         network_args: Dict = None,
20 |     ):
21 |         super().__init__(dataset_cls, network_fn, dataset_args, network_args)
22 | 
23 |     def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:
24 |         if image.dtype == np.uint8:
25 |             image = (image / 255).astype(np.float32)
26 |         # NOTE: integer to character mapping dictionary is self.data.mapping[integer]
27 |         # Your code below (Lab 1)
28 |         pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten()
29 |         ind = np.argmax(pred_raw)
30 |         confidence_of_prediction = pred_raw[ind]
31 |         predicted_character = self.data.mapping[ind]
32 |         # Your code above (Lab 1)
33 |         return predicted_character, confidence_of_prediction
34 | 


--------------------------------------------------------------------------------
/text_recognizer/models/line_detector_model.py:
--------------------------------------------------------------------------------
 1 | """Define LineDetectorModel class."""
 2 | from typing import Callable, Dict, Tuple
 3 | 
 4 | import numpy as np
 5 | 
 6 | from tensorflow.keras.optimizers import Adam
 7 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
 8 | 
 9 | from text_recognizer.datasets.iam_paragraphs_dataset import IamParagraphsDataset
10 | from text_recognizer.models.base import Model
11 | from text_recognizer.networks import fcn
12 | 
13 | 
14 | _DATA_AUGMENTATION_PARAMS = {
15 |     "width_shift_range": 0.06,
16 |     "height_shift_range": 0.1,
17 |     "horizontal_flip": True,
18 |     "zoom_range": 0.1,
19 |     "fill_mode": "constant",
20 |     "cval": 0,
21 |     "shear_range": 3,
22 | }
23 | 
24 | 
25 | class LineDetectorModel(Model):
26 |     """Model to detect lines of text in an image."""
27 | 
28 |     def __init__(
29 |         self,
30 |         dataset_cls: type = IamParagraphsDataset,
31 |         network_fn: Callable = fcn,
32 |         dataset_args: Dict = None,
33 |         network_args: Dict = None,
34 |     ):
35 |         """Define the default dataset and network values for this model."""
36 |         super().__init__(dataset_cls, network_fn, dataset_args, network_args)
37 | 
38 |         self.data_augmentor = ImageDataGenerator(**_DATA_AUGMENTATION_PARAMS)
39 |         self.batch_augment_fn = self.augment_batch
40 | 
41 |     def loss(self):  # pylint: disable=no-self-use
42 |         return "categorical_crossentropy"
43 | 
44 |     def optimizer(self):  # pylint: disable=no-self-use
45 |         return Adam(0.001 / 2)
46 | 
47 |     def metrics(self):  # pylint: disable=no-self-use
48 |         return None
49 | 
50 |     def augment_batch(self, x_batch: np.ndarray, y_batch: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
51 |         """Perform different random transformations on the whole batch of x, y samples."""
52 |         x_augment, y_augment = zip(*[self._augment_sample(x, y) for x, y in zip(x_batch, y_batch)])
53 |         return np.stack(x_augment, axis=0), np.stack(y_augment, axis=0)
54 | 
55 |     def _augment_sample(self, x: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
56 |         """
57 |         Perform the same random image transformation on both x and y.
58 |         x is a 2d image of shape self.image_shape, but self.data_augmentor needs the channel image too.
59 |         """
60 |         x_3d = np.expand_dims(x, axis=-1)
61 |         transform_parameters = self.data_augmentor.get_random_transform(x_3d.shape)
62 |         x_augment = self.data_augmentor.apply_transform(x_3d, transform_parameters)
63 |         y_augment = self.data_augmentor.apply_transform(y, transform_parameters)
64 |         return np.squeeze(x_augment, axis=-1), y_augment
65 | 
66 |     def predict_on_image(self, x: np.ndarray) -> np.ndarray:
67 |         """Predict on a single input."""
68 |         return self.network.predict(np.expand_dims(x, axis=0))[0]
69 | 
70 |     def evaluate(self, x: np.ndarray, y: np.ndarray, batch_size: int = 32, verbose: bool = False) -> float:
71 |         """Evaluate the model."""
72 |         # pylint: disable=unused-argument
73 |         return self.network.evaluate(x, y, batch_size=batch_size)
74 | 


--------------------------------------------------------------------------------
/text_recognizer/models/line_model.py:
--------------------------------------------------------------------------------
 1 | """Define LineModel class."""
 2 | from typing import Callable, Dict, Tuple
 3 | 
 4 | import editdistance
 5 | import numpy as np
 6 | 
 7 | from text_recognizer.datasets.emnist_lines_dataset import EmnistLinesDataset
 8 | from text_recognizer.datasets.dataset_sequence import DatasetSequence
 9 | from text_recognizer.models.base import Model
10 | from text_recognizer.networks import line_cnn_all_conv
11 | 
12 | 
13 | class LineModel(Model):
14 |     """Model for predicting a string from an image of a handwritten line of text."""
15 | 
16 |     def __init__(
17 |         self,
18 |         dataset_cls: type = EmnistLinesDataset,
19 |         network_fn: Callable = line_cnn_all_conv,
20 |         dataset_args: Dict = None,
21 |         network_args: Dict = None,
22 |     ):
23 |         """Define the default dataset and network values for this model."""
24 |         super().__init__(dataset_cls, network_fn, dataset_args, network_args)
25 | 
26 |     def evaluate(self, x, y, batch_size=16, verbose=True):
27 |         """Evaluate model."""
28 |         sequence = DatasetSequence(x, y)
29 |         preds_raw = self.network.predict(sequence)
30 |         trues = np.argmax(y, -1)
31 |         preds = np.argmax(preds_raw, -1)
32 |         pred_strings = ["".join(self.data.mapping.get(label, "") for label in pred).strip(" |_") for pred in preds]
33 |         true_strings = ["".join(self.data.mapping.get(label, "") for label in true).strip(" |_") for true in trues]
34 |         char_accuracies = [
35 |             1 - editdistance.eval(true_string, pred_string) / len(true_string)
36 |             for pred_string, true_string in zip(pred_strings, true_strings)
37 |         ]
38 |         if verbose:
39 |             sorted_ind = np.argsort(char_accuracies)
40 |             print("\nLeast accurate predictions:")
41 |             for ind in sorted_ind[:5]:
42 |                 print(f"True: {true_strings[ind]}")
43 |                 print(f"Pred: {pred_strings[ind]}")
44 |             print("\nMost accurate predictions:")
45 |             for ind in sorted_ind[-5:]:
46 |                 print(f"True: {true_strings[ind]}")
47 |                 print(f"Pred: {pred_strings[ind]}")
48 |             print("\nRandom predictions:")
49 |             random_ind = np.random.randint(0, len(char_accuracies), 5)
50 |             for ind in random_ind:  # pylint: disable=not-an-iterable
51 |                 print(f"True: {true_strings[ind]}")
52 |                 print(f"Pred: {pred_strings[ind]}")
53 |         mean_accuracy = np.mean(char_accuracies)
54 |         return mean_accuracy
55 | 
56 |     def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:
57 |         """Predict on a single input."""
58 |         if image.dtype == np.uint8:
59 |             image = (image / 255).astype(np.float32)
60 |         pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).squeeze()
61 |         pred = "".join(self.data.mapping[label] for label in np.argmax(pred_raw, axis=-1).flatten()).strip()
62 |         conf = np.min(np.max(pred_raw, axis=-1))  # The least confident of the predictions.
63 |         return pred, conf
64 | 


--------------------------------------------------------------------------------
/text_recognizer/models/line_model_ctc.py:
--------------------------------------------------------------------------------
  1 | """Define LineModelCtc class and associated functions."""
  2 | from typing import Callable, Dict, Tuple
  3 | 
  4 | import editdistance
  5 | import numpy as np
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras.models import Model as KerasModel
  8 | 
  9 | from text_recognizer.datasets.dataset_sequence import DatasetSequence
 10 | from text_recognizer.datasets import EmnistLinesDataset
 11 | from text_recognizer.models.base import Model
 12 | from text_recognizer.networks.line_lstm_ctc import line_lstm_ctc
 13 | 
 14 | 
 15 | class LineModelCtc(Model):
 16 |     """Model for recognizing handwritten text in an image of a line, using CTC loss/decoding."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         dataset_cls: type = EmnistLinesDataset,
 21 |         network_fn: Callable = line_lstm_ctc,
 22 |         dataset_args: Dict = None,
 23 |         network_args: Dict = None,
 24 |     ):
 25 |         """Define the default dataset and network values for this model."""
 26 |         default_dataset_args: dict = {}
 27 |         if dataset_args is None:
 28 |             dataset_args = {}
 29 |         dataset_args = {**default_dataset_args, **dataset_args}
 30 | 
 31 |         default_network_args = {"window_width": 12, "window_stride": 5}
 32 |         if network_args is None:
 33 |             network_args = {}
 34 |         network_args = {**default_network_args, **network_args}
 35 |         super().__init__(dataset_cls, network_fn, dataset_args, network_args)
 36 |         self.batch_format_fn = format_batch_ctc
 37 | 
 38 |     def loss(self):
 39 |         """Simply pass through the loss that we computed in the network."""
 40 |         return {"ctc_loss": lambda y_true, y_pred: y_pred}
 41 | 
 42 |     def metrics(self):
 43 |         """
 44 |         Compute no metrics.
 45 | 
 46 |         TODO: We could probably pass in a custom character accuracy metric for 'ctc_decoded' output here.
 47 |         """
 48 |         return None
 49 | 
 50 |     def evaluate(self, x, y, batch_size: int = 16, verbose: bool = True) -> float:
 51 |         """Evaluate model."""
 52 |         test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn)
 53 | 
 54 |         # We can use the `ctc_decoded` layer that is part of our model here.
 55 |         decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer("ctc_decoded").output)
 56 |         preds = decoding_model.predict(test_sequence)
 57 | 
 58 |         trues = np.argmax(y, -1)
 59 |         pred_strings = ["".join(self.data.mapping.get(label, "") for label in pred).strip(" |_") for pred in preds]
 60 |         true_strings = ["".join(self.data.mapping.get(label, "") for label in true).strip(" |_") for true in trues]
 61 | 
 62 |         char_accuracies = [
 63 |             1 - editdistance.eval(true_string, pred_string) / len(true_string)
 64 |             for pred_string, true_string in zip(pred_strings, true_strings)
 65 |         ]
 66 |         if verbose:
 67 |             sorted_ind = np.argsort(char_accuracies)
 68 |             print("\nLeast accurate predictions:")
 69 |             for ind in sorted_ind[:5]:
 70 |                 print(f"True: {true_strings[ind]}")
 71 |                 print(f"Pred: {pred_strings[ind]}")
 72 |             print("\nMost accurate predictions:")
 73 |             for ind in sorted_ind[-5:]:
 74 |                 print(f"True: {true_strings[ind]}")
 75 |                 print(f"Pred: {pred_strings[ind]}")
 76 |             print("\nRandom predictions:")
 77 |             random_ind = np.random.randint(0, len(char_accuracies), 5)
 78 |             for ind in random_ind:  # pylint: disable=not-an-iterable
 79 |                 print(f"True: {true_strings[ind]}")
 80 |                 print(f"Pred: {pred_strings[ind]}")
 81 |         mean_accuracy = np.mean(char_accuracies)
 82 |         return mean_accuracy
 83 | 
 84 |     def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:
 85 |         """Predict on a single input."""
 86 |         softmax_output_fn = KerasModel(
 87 |             inputs=[self.network.get_layer("image").input], outputs=[self.network.get_layer("softmax_output").output],
 88 |         )
 89 |         if image.dtype == np.uint8:
 90 |             image = (image / 255).astype(np.float32)
 91 | 
 92 |         # Get the prediction and confidence using softmax_output_fn, passing the right input into it.
 93 |         input_image = np.expand_dims(image, 0)
 94 |         softmax_output = softmax_output_fn.predict(input_image)
 95 | 
 96 |         input_length = [softmax_output.shape[1]]
 97 |         decoded, log_prob = K.ctc_decode(softmax_output, input_length, greedy=True)
 98 | 
 99 |         pred_raw = K.eval(decoded[0])[0]
100 |         pred = "".join(self.data.mapping[label] for label in pred_raw).strip()
101 | 
102 |         neg_sum_logit = K.eval(log_prob)[0][0]
103 |         conf = np.exp(-neg_sum_logit)
104 |         # Your code above (Lab 3)
105 | 
106 |         return pred, conf
107 | 
108 | 
109 | def format_batch_ctc(batch_x, batch_y):
110 |     """
111 |     Because CTC loss needs to be computed inside of the network, we include information about outputs in the inputs.
112 |     """
113 |     batch_size = batch_y.shape[0]
114 |     y_true = np.argmax(batch_y, axis=-1)
115 | 
116 |     label_lengths = []
117 |     for ind in range(batch_size):
118 |         # Find all of the indices in the label that are blank
119 |         empty_at = np.where(batch_y[ind, :, -1] == 1)[0]
120 |         # Length of the label is the pos of the first blank, or the max length
121 |         if empty_at.shape[0] > 0:
122 |             label_lengths.append(empty_at[0])
123 |         else:
124 |             label_lengths.append(batch_y.shape[1])
125 | 
126 |     batch_inputs = {
127 |         "image": batch_x,
128 |         "y_true": y_true,
129 |         "input_length": np.ones((batch_size, 1)),  # dummy, will be set to num_windows in network
130 |         "label_length": np.array(label_lengths),
131 |     }
132 |     batch_outputs = {"ctc_loss": np.zeros(batch_size), "ctc_decoded": y_true}  # dummy
133 |     return batch_inputs, batch_outputs
134 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | """Neural network code modules."""
 2 | from .mlp import mlp
 3 | from .lenet import lenet
 4 | 
 5 | # Hide lines below until Lab 2
 6 | from .line_cnn_all_conv import line_cnn_all_conv
 7 | 
 8 | # Hide lines above until Lab 2
 9 | 
10 | # Hide lines below until Lab 3
11 | from .line_lstm_ctc import line_lstm_ctc
12 | 
13 | # Hide lines above until Lab 3
14 | 
15 | # Hide lines below until Lab 5
16 | from .fcn import fcn
17 | 
18 | # Hide lines above until Lab 5
19 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/ctc.py:
--------------------------------------------------------------------------------
 1 | """Define ctc_decode function."""
 2 | import tensorflow as tf
 3 | import tensorflow.keras.backend as K
 4 | from tensorflow.python.ops import ctc_ops, sparse_ops  # pylint: disable=no-name-in-module
 5 | 
 6 | 
 7 | def ctc_decode(y_pred, input_length, max_output_length):
 8 |     """
 9 |     Cut down from https://github.com/keras-team/keras/blob/master/keras/backend/tensorflow_backend.py#L4170
10 | 
11 |     Decodes the output of a softmax.
12 |     Uses greedy (best path) search.
13 | 
14 |     # Arguments
15 |         y_pred: tensor `(samples, time_steps, num_categories)`
16 |             containing the prediction, or output of the softmax.
17 |         input_length: tensor `(samples, )` containing the sequence length for
18 |             each batch item in `y_pred`.
19 |         max_output_length: int giving the max output sequence length
20 | 
21 |     # Returns
22 |         List: list of one element that contains the decoded sequence.
23 |     """
24 |     y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon())
25 |     input_length = tf.cast((tf.squeeze(input_length, axis=-1)), tf.int32)
26 | 
27 |     (decoded, _) = ctc_ops.ctc_greedy_decoder(inputs=y_pred, sequence_length=input_length)
28 | 
29 |     sparse = decoded[0]
30 |     decoded_dense = sparse_ops.sparse_to_dense(sparse.indices, sparse.dense_shape, sparse.values, default_value=-1)
31 | 
32 |     # Unfortunately, decoded_dense will be of different number of columns, depending on the decodings.
33 |     # For use in `predict()`, we need to get it all in one standard shape, so let's pad if necessary.
34 |     max_length = max_output_length + 2  # giving 2 extra characters for CTC leeway
35 |     cols = tf.shape(decoded_dense)[-1]
36 | 
37 |     def pad():
38 |         return tf.pad(decoded_dense, [[0, 0], [0, max_length - cols]], constant_values=-1)
39 | 
40 |     def noop():
41 |         return decoded_dense
42 | 
43 |     return tf.cond(tf.less(cols, max_length), pad, noop)
44 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/fcn.py:
--------------------------------------------------------------------------------
 1 | """Keras network code for the fully-convolutional network used for line detection."""
 2 | from typing import List, Tuple
 3 | from tensorflow.keras.models import Model
 4 | from tensorflow.keras.layers import Activation, Add, Conv2D, Input, Lambda, Layer
 5 | from tensorflow.keras import backend as K
 6 | 
 7 | 
 8 | def residual_conv_block(
 9 |     input_layer: Layer, kernel_sizes: List[int], num_filters: List[int], dilation_rates: List[int], activation: str,
10 | ) -> Layer:
11 |     """Instantiate a Residual convolutional block."""
12 |     padding = "same"
13 |     x = Conv2D(
14 |         num_filters[0],
15 |         kernel_size=kernel_sizes[0],
16 |         dilation_rate=dilation_rates[0],
17 |         padding=padding,
18 |         activation=activation,
19 |     )(input_layer)
20 |     x = Conv2D(num_filters[1], kernel_size=kernel_sizes[1], dilation_rate=dilation_rates[1], padding=padding,)(x)
21 |     y = Conv2D(num_filters[1], kernel_size=1, dilation_rate=1, padding=padding)(input_layer)
22 |     x = Add()([x, y])
23 |     x = Activation(activation)(x)
24 |     return x
25 | 
26 | 
27 | def fcn(_input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model:
28 |     """Instantiate a fully convolutional residual network for line detection."""
29 |     num_filters = [16] * 14
30 |     kernel_sizes = [7] * 14
31 |     dilation_rates = [3] * 4 + [7] * 10
32 | 
33 |     num_classes = output_shape[-1]
34 |     input_image = Input((None, None))
35 |     model_layer = Lambda(lambda x: K.expand_dims(x, axis=-1))(input_image)
36 | 
37 |     for i in range(0, len(num_filters), 2):
38 |         model_layer = residual_conv_block(
39 |             input_layer=model_layer,
40 |             kernel_sizes=kernel_sizes[i : i + 2],
41 |             num_filters=num_filters[i : i + 2],
42 |             dilation_rates=dilation_rates[i : i + 2],
43 |             activation="relu",
44 |         )
45 |     output = Conv2D(num_classes, kernel_size=1, dilation_rate=1, padding="same", activation="softmax")(model_layer)
46 | 
47 |     model = Model(inputs=input_image, outputs=output)
48 |     return model
49 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/lenet.py:
--------------------------------------------------------------------------------
 1 | """LeNet network."""
 2 | from typing import Tuple
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D
 6 | from tensorflow.keras.models import Sequential, Model
 7 | 
 8 | 
 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model:
10 |     """Return LeNet Keras model."""
11 |     num_classes = output_shape[0]
12 | 
13 |     # Your code below (Lab 2)
14 |     model = Sequential()
15 |     if len(input_shape) < 3:
16 |         model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims'))
17 |         input_shape = (input_shape[0], input_shape[1], 1)
18 |     model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid"))
19 |     model.add(Conv2D(64, (3, 3), activation="relu", padding="valid"))
20 |     model.add(MaxPooling2D(pool_size=(2, 2), padding="valid"))
21 |     model.add(Dropout(0.2))
22 |     model.add(Flatten())
23 |     model.add(Dense(128, activation="relu"))
24 |     model.add(Dropout(0.2))
25 |     model.add(Dense(num_classes, activation="softmax"))
26 |     # Your code above (Lab 2)
27 | 
28 |     return model
29 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/line_cnn_all_conv.py:
--------------------------------------------------------------------------------
 1 | """CNN-based model for recognizing handwritten text."""
 2 | from typing import Tuple
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow.keras.layers import Conv2D, Dropout, MaxPooling2D, Reshape, Lambda, Permute
 6 | from tensorflow.keras.models import Sequential
 7 | from tensorflow.keras.models import Model as KerasModel
 8 | 
 9 | 
10 | def line_cnn_all_conv(
11 |     input_shape: Tuple[int, ...], output_shape: Tuple[int, ...], window_width: float = 28, window_stride: float = 14,
12 | ) -> KerasModel:
13 |     image_height, image_width = input_shape
14 |     output_length, num_classes = output_shape
15 |     # Current shape is: (image_height, image_width, 1)
16 | 
17 |     model = Sequential()
18 |     model.add(Reshape((image_height, image_width, 1), input_shape=input_shape))
19 |     model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same"))
20 |     model.add(Conv2D(64, (3, 3), activation="relu", padding="same"))
21 |     model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
22 |     model.add(Dropout(0.2))
23 |     # Current shape is: (image_height // 2, image_width // 2, 64)
24 | 
25 |     # So far, this is the same as LeNet. At this point, LeNet would flatten and Dense 128.
26 |     # Instead, we are going to use a Conv2D to slide over these outputs with window_width and window_stride,
27 |     # and output softmax activations of shape (output_length, num_classes)./
28 | 
29 |     # Because of MaxPooling, everything is divided by 2
30 |     new_height = image_height // 2
31 |     new_width = image_width // 2
32 |     new_window_width = window_width // 2
33 |     new_window_stride = window_stride // 2
34 | 
35 |     # Your code below (Lab 2)
36 |     model.add(
37 |         Conv2D(128, (new_height, new_window_width), (new_height, new_window_stride), activation="relu", padding="same")
38 |     )
39 |     model.add(Dropout(0.2))
40 |     # Your code above (Lab 2)
41 |     # Shape is now (1, num_windows, 128)
42 |     num_windows = new_width // new_window_stride
43 | 
44 |     model.add(Permute((2, 1, 3)))  # We could instead do a Reshape((num_windows, 1, 128))
45 |     # Shape is now (num_windows, 1, 128)
46 | 
47 |     final_classifier_width = num_windows // output_length
48 |     model.add(
49 |         Conv2D(
50 |             num_classes, (final_classifier_width, 1), (final_classifier_width, 1), activation="softmax", padding="same"
51 |         )
52 |     )
53 |     # Shape is now (output_length, 1, num_classes)
54 | 
55 |     model.add(Lambda(lambda x: tf.squeeze(x, 2)))  # We could instead do a Reshape((output_length, num_classes))
56 |     # Shape is now (output_length, num_classes)
57 | 
58 |     # Since we floor'd the calculation of width, we might have too many items in the sequence. Take only output_length.
59 |     # model.add(Lambda(lambda x: x[:, :output_length, :]))
60 |     return model
61 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/line_lstm_ctc.py:
--------------------------------------------------------------------------------
 1 | """LSTM with CTC for handwritten text recognition within a line."""
 2 | from tensorflow.keras.layers import Dense, Input, Reshape, TimeDistributed, Lambda, LSTM
 3 | from tensorflow.keras.models import Model as KerasModel
 4 | import tensorflow.keras.backend as K
 5 | 
 6 | from text_recognizer.networks.lenet import lenet
 7 | from text_recognizer.networks.misc import slide_window
 8 | from text_recognizer.networks.ctc import ctc_decode
 9 | 
10 | 
11 | def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14):  # pylint: disable=too-many-locals
12 |     image_height, image_width = input_shape
13 |     output_length, num_classes = output_shape
14 | 
15 |     num_windows = int((image_width - window_width) / window_stride) + 1
16 |     if num_windows < output_length:
17 |         raise ValueError(f"Window width/stride need to generate >= {output_length} windows (currently {num_windows})")
18 | 
19 |     image_input = Input(shape=input_shape, name="image")
20 |     y_true = Input(shape=(output_length,), name="y_true")
21 |     input_length = Input(shape=(1,), name="input_length")
22 |     label_length = Input(shape=(1,), name="label_length")
23 | 
24 |     # Your code should use slide_window and extract image patches from image_input.
25 |     # Pass a convolutional model over each image patch to generate a feature vector per window.
26 |     # Pass these features through one or more LSTM layers.
27 |     # Convert the lstm outputs to softmax outputs.
28 |     # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).
29 | 
30 |     # Your code below (Lab 3)
31 |     image_reshaped = Reshape((image_height, image_width, 1))(image_input)
32 |     # (image_height, image_width, 1)
33 | 
34 |     image_patches = Lambda(slide_window, arguments={"window_width": window_width, "window_stride": window_stride})(
35 |         image_reshaped
36 |     )
37 |     # (num_windows, image_height, window_width, 1)
38 | 
39 |     # Make a LeNet and get rid of the last two layers (softmax and dropout)
40 |     convnet = lenet((image_height, window_width, 1), (num_classes,))
41 |     convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output)
42 |     convnet_outputs = TimeDistributed(convnet)(image_patches)
43 |     # (num_windows, 128)
44 | 
45 |     lstm_output = LSTM(128, return_sequences=True)(convnet_outputs)
46 |     # (num_windows, 128)
47 | 
48 |     softmax_output = Dense(num_classes, activation="softmax", name="softmax_output")(lstm_output)
49 |     # (num_windows, num_classes)
50 |     # Your code above (Lab 3)
51 | 
52 |     input_length_processed = Lambda(
53 |         lambda x, num_windows=None: x * num_windows, arguments={"num_windows": num_windows}
54 |     )(input_length)
55 | 
56 |     ctc_loss_output = Lambda(lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name="ctc_loss")(
57 |         [y_true, softmax_output, input_length_processed, label_length]
58 |     )
59 | 
60 |     ctc_decoded_output = Lambda(lambda x: ctc_decode(x[0], x[1], output_length), name="ctc_decoded")(
61 |         [softmax_output, input_length_processed]
62 |     )
63 | 
64 |     model = KerasModel(
65 |         inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output],
66 |     )
67 |     return model
68 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/misc.py:
--------------------------------------------------------------------------------
 1 | """Misc neural network functionality."""
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray:
 7 |     """
 8 |     Parameters
 9 |     ----------
10 |     image
11 |         (image_height, image_width, 1) input
12 | 
13 |     Returns
14 |     -------
15 |     np.ndarray
16 |         (num_windows, image_height, window_width, 1) output, where
17 |         num_windows is floor((image_width - window_width) / window_stride) + 1
18 |     """
19 |     kernel = [1, 1, window_width, 1]
20 |     strides = [1, 1, window_stride, 1]
21 |     patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID")
22 |     patches = tf.transpose(patches, (0, 2, 1, 3))
23 |     patches = tf.expand_dims(patches, -1)
24 |     return patches
25 | 


--------------------------------------------------------------------------------
/text_recognizer/networks/mlp.py:
--------------------------------------------------------------------------------
 1 | """Define mlp network function."""
 2 | from typing import Tuple
 3 | 
 4 | from tensorflow.keras.models import Model, Sequential
 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten
 6 | 
 7 | 
 8 | def mlp(
 9 |     input_shape: Tuple[int, ...],
10 |     output_shape: Tuple[int, ...],
11 |     layer_size: int = 128,
12 |     dropout_amount: float = 0.2,
13 |     num_layers: int = 3,
14 | ) -> Model:
15 |     """
16 |     Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions.
17 |     Creates num_layers layers.
18 |     """
19 |     num_classes = output_shape[0]
20 | 
21 |     model = Sequential()
22 |     # Don't forget to pass input_shape to the first layer of the model
23 |     # Your code below (Lab 1)
24 |     model.add(Flatten(input_shape=input_shape))
25 |     for _ in range(num_layers):
26 |         model.add(Dense(layer_size, activation="relu"))
27 |         model.add(Dropout(dropout_amount))
28 |     model.add(Dense(num_classes, activation="softmax"))
29 |     # Your code above (Lab 1)
30 | 
31 |     return model
32 | 


--------------------------------------------------------------------------------
/text_recognizer/paragraph_text_recognizer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Takes an image and returns all the text in it, by first segmenting the image with LineDetector, then extracting crops
  3 | of the image corresponding to the line regions, and running each line region crop through LinePredictor.
  4 | """
  5 | from typing import List, Tuple, Union
  6 | import cv2
  7 | import numpy as np
  8 | from text_recognizer.datasets import IamLinesDataset
  9 | from text_recognizer.models.line_detector_model import LineDetectorModel
 10 | from text_recognizer.models.line_model_ctc import LineModelCtc
 11 | import text_recognizer.util as util
 12 | 
 13 | 
 14 | class ParagraphTextRecognizer:
 15 |     """Given an image of a single handwritten character, recognizes it."""
 16 | 
 17 |     def __init__(self):
 18 |         self.line_detector_model = LineDetectorModel()
 19 |         self.line_detector_model.load_weights()
 20 |         self.line_predictor_model = LineModelCtc(dataset_cls=IamLinesDataset)
 21 |         self.line_predictor_model.load_weights()
 22 | 
 23 |     def predict(self, image_or_filename: Union[np.ndarray, str]):
 24 |         """
 25 |         Take an image and return all the text in it.
 26 |         """
 27 |         if isinstance(image_or_filename, str):
 28 |             image = util.read_image(image_or_filename, grayscale=True)
 29 |         else:
 30 |             image = image_or_filename
 31 | 
 32 |         line_region_crops = self._get_line_region_crops(image=image)
 33 |         print([a.shape for a in line_region_crops])
 34 |         prepared_line_region_crops = [
 35 |             self._prepare_image_for_line_predictor_model(image=crop) for crop in line_region_crops
 36 |         ]
 37 | 
 38 |         line_region_strings = [
 39 |             self.line_predictor_model.predict_on_image(crop)[0] for crop in prepared_line_region_crops
 40 |         ]
 41 |         return " ".join(line_region_strings), line_region_crops
 42 | 
 43 |     def _get_line_region_crops(self, image: np.ndarray, min_crop_len_factor: float = 0.02) -> List[np.ndarray]:
 44 |         """Find all the line regions in square image and crop them out and return them."""
 45 |         prepared_image, scale_down_factor = self._prepare_image_for_line_detector_model(image)
 46 |         line_segmentation = self.line_detector_model.predict_on_image(prepared_image)
 47 |         bounding_boxes_xywh = _find_line_bounding_boxes(line_segmentation)
 48 | 
 49 |         bounding_boxes_xywh = (bounding_boxes_xywh * scale_down_factor).astype(int)
 50 | 
 51 |         min_crop_length = int(min_crop_len_factor * min(image.shape[0], image.shape[1]))
 52 |         line_region_crops = [
 53 |             image[y : y + h, x : x + w]
 54 |             for x, y, w, h in bounding_boxes_xywh
 55 |             if w >= min_crop_length and h >= min_crop_length
 56 |         ]
 57 |         return line_region_crops
 58 | 
 59 |     def _prepare_image_for_line_detector_model(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
 60 |         """Convert uint8 image to float image with black background with shape self.line_detector_model.image_shape."""
 61 |         resized_image, scale_down_factor = _resize_image_for_line_detector_model(
 62 |             image=image, max_shape=self.line_detector_model.image_shape
 63 |         )
 64 |         resized_image = (1.0 - resized_image / 255).astype("float32")
 65 |         return resized_image, scale_down_factor
 66 | 
 67 |     def _prepare_image_for_line_predictor_model(self, image: np.ndarray) -> np.ndarray:
 68 |         """
 69 |         Convert uint8 image to float image with black background with shape self.line_predictor_model.image_shape
 70 |         while maintaining the image aspect ratio.
 71 |         """
 72 |         expected_shape = self.line_predictor_model.image_shape
 73 |         scale_factor = (np.array(expected_shape) / np.array(image.shape)).min()
 74 |         scaled_image = cv2.resize(image, dsize=None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)
 75 | 
 76 |         pad_width = (
 77 |             (0, expected_shape[0] - scaled_image.shape[0]),
 78 |             (0, expected_shape[1] - scaled_image.shape[1]),
 79 |         )
 80 |         padded_image = np.pad(scaled_image, pad_width=pad_width, mode="constant", constant_values=255)
 81 |         return 1 - padded_image / 255
 82 | 
 83 | 
 84 | def _find_line_bounding_boxes(line_segmentation: np.ndarray):
 85 |     """Given a line segmentation, find bounding boxes for connected-component regions corresponding to non-0 labels."""
 86 | 
 87 |     def _find_line_bounding_boxes_in_channel(line_segmentation_channel: np.ndarray) -> np.ndarray:
 88 |         line_activation_image = cv2.dilate(line_segmentation_channel, kernel=np.ones((3, 3)), iterations=1)
 89 |         line_activation_image = (line_activation_image * 255).astype("uint8")
 90 |         line_activation_image = cv2.threshold(line_activation_image, 0.5, 1, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
 91 | 
 92 |         bounding_cnts, _ = cv2.findContours(line_activation_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 93 |         return np.array([cv2.boundingRect(cnt) for cnt in bounding_cnts])
 94 | 
 95 |     bboxes_xywh = np.concatenate(
 96 |         [_find_line_bounding_boxes_in_channel(line_segmentation[:, :, i]) for i in [1, 2]], axis=0
 97 |     )
 98 |     return bboxes_xywh[np.argsort(bboxes_xywh[:, 1])]
 99 | 
100 | 
101 | def _resize_image_for_line_detector_model(image: np.ndarray, max_shape: Tuple[int, int]) -> Tuple[np.ndarray, float]:
102 |     """Resize the image to less than the max_shape while maintaining aspect ratio."""
103 |     scale_down_factor = max(np.array(image.shape) / np.array(max_shape))
104 |     if scale_down_factor == 1:
105 |         return image.copy(), scale_down_factor
106 |     resized_image = cv2.resize(
107 |         image, dsize=None, fx=1 / scale_down_factor, fy=1 / scale_down_factor, interpolation=cv2.INTER_AREA,
108 |     )
109 |     return resized_image, scale_down_factor
110 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer/a99a3d3f0594dfceb249a56e8362337f9e12897e/text_recognizer/tests/__init__.py


--------------------------------------------------------------------------------
/text_recognizer/tests/support/create_emnist_lines_support_files.py:
--------------------------------------------------------------------------------
 1 | """Module for creating EMNIST Lines test support files."""
 2 | from pathlib import Path
 3 | import shutil
 4 | 
 5 | import numpy as np
 6 | 
 7 | from text_recognizer.datasets import EmnistLinesDataset
 8 | import text_recognizer.util as util
 9 | 
10 | 
11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines"
12 | 
13 | 
14 | def create_emnist_lines_support_files():
15 |     shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
16 |     SUPPORT_DIRNAME.mkdir()
17 | 
18 |     dataset = EmnistLinesDataset()
19 |     dataset.load_or_generate_data()
20 | 
21 |     for ind in [0, 1, 3]:
22 |         image = dataset.x_test[ind]
23 |         print(image.sum(), image.dtype)
24 |         label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip(
25 |             " _"
26 |         )
27 |         print(label)
28 |         util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))
29 |         # Hide lines below until Lab 8
30 |         # Inverted version
31 |         image = -(image - 255)
32 |         util.write_image(image, str(SUPPORT_DIRNAME / f"{label}_i.png"))
33 |         # Hide lines above until Lab 8
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     create_emnist_lines_support_files()
38 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/create_emnist_support_files.py:
--------------------------------------------------------------------------------
 1 | """Module for creating EMNIST test support files."""
 2 | from pathlib import Path
 3 | import shutil
 4 | 
 5 | import numpy as np
 6 | 
 7 | from text_recognizer.datasets import EmnistDataset
 8 | import text_recognizer.util as util
 9 | 
10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist"
11 | 
12 | 
13 | def create_emnist_support_files():
14 |     shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
15 |     SUPPORT_DIRNAME.mkdir()
16 | 
17 |     dataset = EmnistDataset()
18 |     dataset.load_or_generate_data()
19 | 
20 |     for ind in [5, 7, 9]:
21 |         image = dataset.x_test[ind]
22 |         label = dataset.mapping[np.argmax(dataset.y_test[ind])]
23 |         print(ind, label)
24 |         util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     create_emnist_support_files()
29 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/create_iam_lines_support_files.py:
--------------------------------------------------------------------------------
 1 | """Module for creating IAM Lines test support files."""
 2 | from pathlib import Path
 3 | import shutil
 4 | 
 5 | import numpy as np
 6 | 
 7 | from text_recognizer.datasets import IamLinesDataset
 8 | import text_recognizer.util as util
 9 | 
10 | 
11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines"
12 | 
13 | 
14 | def create_iam_lines_support_files():
15 |     shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
16 |     SUPPORT_DIRNAME.mkdir()
17 | 
18 |     dataset = IamLinesDataset()
19 |     dataset.load_or_generate_data()
20 | 
21 |     for ind in [0, 1, 3]:
22 |         image = dataset.x_test[ind]
23 |         label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip(
24 |             " _"
25 |         )
26 |         print(label)
27 |         util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png"))
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     create_iam_lines_support_files()
32 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist/8.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a
3 | size 498
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist/U.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5
3 | size 524
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist/e.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e
3 | size 563
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist_lines/Corsi left for.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f
3 | size 3763
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist_lines/do that In.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7
3 | size 2888
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/emnist_lines/or if used the results.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199
3 | size 5645
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f
3 | size 4876
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a
3 | size 3437
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2
3 | size 3600
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:80d99e88bbb1f9be0f6a24ef75d6d2178043a0c1feba2b91e94c349b6b042bdd
3 | size 144556
4 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/test_character_predictor.py:
--------------------------------------------------------------------------------
 1 | """Tests for CharacterPredictor class."""
 2 | import os
 3 | from pathlib import Path
 4 | import unittest
 5 | 
 6 | from text_recognizer.character_predictor import CharacterPredictor
 7 | 
 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist"
 9 | 
10 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
11 | 
12 | 
13 | class TestCharacterPredictor(unittest.TestCase):
14 |     """Tests for the CharacterPredictor class."""
15 | 
16 |     def test_filename(self):
17 |         """Test that CharacterPredictor correctly predicts on a single image, for serveral test images."""
18 |         predictor = CharacterPredictor()
19 | 
20 |         for filename in SUPPORT_DIRNAME.glob("*.png"):
21 |             pred, conf = predictor.predict(str(filename))
22 |             print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}")
23 |             self.assertEqual(pred, filename.stem)
24 |             self.assertGreater(conf, 0.7)
25 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/test_line_predictor.py:
--------------------------------------------------------------------------------
 1 | """Tests for LinePredictor class."""
 2 | import os
 3 | from pathlib import Path
 4 | import unittest
 5 | 
 6 | import editdistance
 7 | import numpy as np
 8 | 
 9 | import text_recognizer.util as util
10 | from text_recognizer.line_predictor import LinePredictor
11 | 
12 | # Hide lines below until Lab 4
13 | from text_recognizer.datasets import IamLinesDataset
14 | 
15 | # Hide lines above until Lab 4
16 | 
17 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support"
18 | 
19 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
20 | 
21 | 
22 | class TestEmnistLinePredictor(unittest.TestCase):
23 |     """Test LinePredictor class on the EmnistLines dataset."""
24 | 
25 |     def test_filename(self):
26 |         """Test that LinePredictor correctly predicts on single images, for several test images."""
27 |         predictor = LinePredictor()
28 | 
29 |         for filename in (SUPPORT_DIRNAME / "emnist_lines").glob("*.png"):
30 |             pred, conf = predictor.predict(str(filename))
31 |             true = str(filename.stem)
32 |             edit_distance = editdistance.eval(pred, true) / len(pred)
33 |             print(f'Pred: "{pred}" | Confidence: {conf} | True: {true} | Edit distance: {edit_distance}')
34 |             self.assertLess(edit_distance, 0.2)
35 | 
36 | 
37 | class TestEmnistLinePredictorVariableImageWidth(unittest.TestCase):
38 |     """Test LinePredictor class on the EmnistLines dataset, with variable images."""
39 | 
40 |     def test_filename(self):
41 |         """Test that LinePredictor correctly predicts on single images, for several test images."""
42 |         predictor = LinePredictor()
43 |         for filename in SUPPORT_DIRNAME.glob("*.png"):
44 |             image = util.read_image(str(filename), grayscale=True)
45 |             print("Saved image shape:", image.shape)
46 |             image = image[:, : -np.random.randint(1, 150)]  # pylint: disable=invalid-unary-operand-type
47 |             print("Randomly cropped image shape:", image.shape)
48 |             pred, conf = predictor.predict(image)
49 |             true = str(filename.stem)
50 |             edit_distance = editdistance.eval(pred, true) / len(pred)
51 |             print(f'Pred: "{pred}" | Confidence: {conf} | True: {true} | Edit distance: {edit_distance}')
52 |             self.assertLess(edit_distance, 0.2)
53 | 
54 | 
55 | # Hide lines below until Lab 4
56 | class TestIamLinePredictor(unittest.TestCase):
57 |     """Test LinePredictor class on the IamLines dataset, with variable images."""
58 | 
59 |     def test_filename(self):  # pylint: disable=R0201
60 |         """Test that LinePredictor correctly predicts on single images, for several test images."""
61 |         predictor = LinePredictor(IamLinesDataset)
62 | 
63 |         for filename in (SUPPORT_DIRNAME / "iam_lines").glob("*.png"):
64 |             pred, conf = predictor.predict(str(filename))
65 |             true = filename.stem
66 |             if pred:
67 |                 edit_distance = editdistance.eval(pred, true) / len(pred)
68 |             else:
69 |                 edit_distance = 0
70 |             print(f'Pred: "{pred}" | Confidence: {conf} | True: {true} | Edit distance: {edit_distance}')
71 |             # self.assertLess(edit_distance, 0.2)
72 | 
73 | 
74 | # Hide lines above until Lab 4
75 | 


--------------------------------------------------------------------------------
/text_recognizer/tests/test_paragraph_text_recognizer.py:
--------------------------------------------------------------------------------
 1 | """Tests for ParagraphTextRecognizer class."""
 2 | import os
 3 | from pathlib import Path
 4 | import unittest
 5 | from text_recognizer.paragraph_text_recognizer import ParagraphTextRecognizer
 6 | import text_recognizer.util as util
 7 | 
 8 | 
 9 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_paragraphs"
10 | 
11 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
12 | 
13 | 
14 | class TestParagraphTextRecognizer(unittest.TestCase):
15 |     """Test that it can take non-square images of max dimension larger than 256px."""
16 | 
17 |     def test_filename(self):  # pylint: disable=R0201
18 |         predictor = ParagraphTextRecognizer()
19 |         num_text_lines_by_name = {"a01-000u-cropped": 7}
20 |         for filename in (SUPPORT_DIRNAME).glob("*.jpg"):
21 |             full_image = util.read_image(str(filename), grayscale=True)
22 |             predicted_text, line_region_crops = predictor.predict(full_image)
23 |             print(predicted_text)
24 |             assert len(line_region_crops) == num_text_lines_by_name[filename.stem]
25 | 


--------------------------------------------------------------------------------
/text_recognizer/util.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for text_recognizer module."""
  2 | # Hide lines below until Lab 8
  3 | import base64
  4 | 
  5 | # Hide lines above until Lab 8
  6 | from concurrent.futures import as_completed, ThreadPoolExecutor
  7 | from pathlib import Path
  8 | from typing import Union
  9 | from urllib.request import urlopen, urlretrieve
 10 | import hashlib
 11 | import os
 12 | 
 13 | import numpy as np
 14 | import cv2
 15 | from tqdm import tqdm
 16 | 
 17 | 
 18 | def read_image(image_uri: Union[Path, str], grayscale=False) -> np.array:
 19 |     """Read image_uri."""
 20 | 
 21 |     def read_image_from_filename(image_filename, imread_flag):
 22 |         return cv2.imread(str(image_filename), imread_flag)
 23 | 
 24 |     def read_image_from_url(image_url, imread_flag):
 25 |         url_response = urlopen(str(image_url))  # nosec
 26 |         img_array = np.array(bytearray(url_response.read()), dtype=np.uint8)
 27 |         return cv2.imdecode(img_array, imread_flag)
 28 | 
 29 |     imread_flag = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR
 30 |     local_file = os.path.exists(image_uri)
 31 |     try:
 32 |         img = None
 33 |         if local_file:
 34 |             img = read_image_from_filename(image_uri, imread_flag)
 35 |         else:
 36 |             img = read_image_from_url(image_uri, imread_flag)
 37 |         assert img is not None
 38 |     except Exception as e:
 39 |         raise ValueError("Could not load image at {}: {}".format(image_uri, e))
 40 |     return img
 41 | 
 42 | 
 43 | # Hide lines below until Lab 8
 44 | def read_b64_image(b64_string, grayscale=False):
 45 |     """Load base64-encoded images."""
 46 |     imread_flag = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR
 47 |     try:
 48 |         _, b64_data = b64_string.split(",")
 49 |         return cv2.imdecode(np.frombuffer(base64.b64decode(b64_data), np.uint8), imread_flag)
 50 |     except Exception as e:
 51 |         raise ValueError("Could not load image from b64 {}: {}".format(b64_string, e))
 52 | 
 53 | 
 54 | # Hide lines above until Lab 8
 55 | def write_image(image: np.ndarray, filename: Union[Path, str]) -> None:
 56 |     """Write image to file."""
 57 |     cv2.imwrite(str(filename), image)
 58 | 
 59 | 
 60 | def compute_sha256(filename: Union[Path, str]):
 61 |     """Return SHA256 checksum of a file."""
 62 |     with open(filename, "rb") as f:
 63 |         return hashlib.sha256(f.read()).hexdigest()
 64 | 
 65 | 
 66 | class TqdmUpTo(tqdm):
 67 |     """From https://github.com/tqdm/tqdm/blob/master/examples/tqdm_wget.py"""
 68 | 
 69 |     def update_to(self, blocks=1, bsize=1, tsize=None):
 70 |         """
 71 |         Parameters
 72 |         ----------
 73 |         blocks : int, optional
 74 |             Number of blocks transferred so far [default: 1].
 75 |         bsize  : int, optional
 76 |             Size of each block (in tqdm units) [default: 1].
 77 |         tsize  : int, optional
 78 |             Total size (in tqdm units). If [default: None] remains unchanged.
 79 |         """
 80 |         if tsize is not None:
 81 |             self.total = tsize  # pylint: disable=attribute-defined-outside-init
 82 |         self.update(blocks * bsize - self.n)  # will also set self.n = b * bsize
 83 | 
 84 | 
 85 | def download_url(url, filename):
 86 |     """Download a file from url to filename, with a progress bar."""
 87 |     with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1) as t:
 88 |         urlretrieve(url, filename, reporthook=t.update_to, data=None)  # nosec
 89 | 
 90 | 
 91 | # Hide lines below until Lab 6
 92 | def download_urls(urls, filenames):
 93 |     """Download urls to filenames in a multi-threaded way."""
 94 |     with ThreadPoolExecutor() as executor:
 95 |         futures = [executor.submit(urlretrieve, url, filename) for url, filename in zip(urls, filenames)]
 96 |         for future in tqdm(as_completed(futures), total=len(futures)):
 97 |             try:
 98 |                 future.result()
 99 |             except Exception as e:  # pylint: disable=broad-except
100 |                 print("Error", e)
101 | 
102 | 
103 | # Hide lines above until Lab 6
104 | 


--------------------------------------------------------------------------------
/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530
3 | size 595520
4 | 


--------------------------------------------------------------------------------
/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a4da991d0accee3b999ba55d5c6a2fde4150112bf935c4c10e237e2065427ab6
3 | size 745984
4 | 


--------------------------------------------------------------------------------
/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db
3 | size 2243720
4 | 


--------------------------------------------------------------------------------
/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4
3 | size 2243720
4 | 


--------------------------------------------------------------------------------
/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer/a99a3d3f0594dfceb249a56e8362337f9e12897e/training/__init__.py


--------------------------------------------------------------------------------
/training/experiments/cnn.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "experiment_group": "Line CNN",
 3 |     "experiments": [
 4 |         {
 5 |             "dataset": "EmnistLinesDataset",
 6 |             "model": "LineModel",
 7 |             "network": "line_cnn_all_conv",
 8 |             "network_args": {
 9 |                 "window_width": 14,
10 |                 "window_stride": 14
11 |             }
12 |         },
13 |         {
14 |             "dataset": "EmnistLinesDataset",
15 |             "model": "LineModel",
16 |             "network": "line_cnn_all_conv",
17 |             "network_args": {
18 |                 "window_width": 7,
19 |                 "window_stride": 7
20 |             }
21 |         }
22 |     ]
23 | }
24 | 


--------------------------------------------------------------------------------
/training/experiments/lstm_ctc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "experiment_group": "LineCtc2",
 3 |     "experiments": [
 4 |         {
 5 |             "dataset": "EmnistLinesDataset",
 6 |             "model": "LineModelCtc",
 7 |             "network": "line_lstm_ctc",
 8 |             "network_args": {
 9 |                 "window_width": 28,
10 |                 "window_stride": 14
11 |             }
12 |         },
13 |         {
14 |             "dataset": "EmnistLinesDataset",
15 |             "model": "LineModelCtc",
16 |             "network": "line_lstm_ctc",
17 |             "network_args": {
18 |                 "window_width": 14,
19 |                 "window_stride": 7
20 |             }
21 |         },
22 |         {
23 |             "dataset": "EmnistLinesDataset",
24 |             "dataset_args": {
25 |                 "max_length": 34
26 |             },
27 |             "model": "LineModelCtc",
28 |             "network": "line_lstm_ctc",
29 |             "network_args": {
30 |                 "window_width": 28,
31 |                 "window_stride": 14
32 |             }
33 |         },
34 |         {
35 |             "dataset": "EmnistLinesDataset",
36 |             "dataset_args": {
37 |                 "max_length": 34
38 |             },
39 |             "model": "LineModelCtc",
40 |             "network": "line_lstm_ctc",
41 |             "network_args": {
42 |                 "window_width": 14,
43 |                 "window_stride": 7
44 |             }
45 |         },
46 |         {
47 |             "dataset": "IamLinesDataset",
48 |             "model": "LineModelCtc",
49 |             "network": "line_lstm_ctc",
50 |             "network_args": {
51 |                 "window_width": 28,
52 |                 "window_stride": 14
53 |             }
54 |         },
55 |         {
56 |             "dataset": "IamLinesDataset",
57 |             "model": "LineModelCtc",
58 |             "network": "line_lstm_ctc",
59 |             "network_args": {
60 |                 "window_width": 14,
61 |                 "window_stride": 7
62 |             }
63 |         }
64 |     ]
65 | }
66 | 


--------------------------------------------------------------------------------
/training/experiments/sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "experiment_group": "Sample Experiments",
 3 |     "experiments": [
 4 |         {
 5 |             "dataset": "EmnistDataset",
 6 |             "model": "CharacterModel",
 7 |             "network": "mlp",
 8 |             "network_args": {
 9 |                 "num_layers": 2
10 |             },
11 |             "train_args": {
12 |                 "batch_size": 256
13 |             }
14 |         },
15 |         {
16 |             "dataset": "EmnistDataset",
17 |             "model": "CharacterModel",
18 |             "network": "mlp",
19 |             "network_args": {
20 |                 "num_layers": 4
21 |             },
22 |             "train_args": {
23 |                 "batch_size": 256
24 |             }
25 |         },
26 |         {
27 |             "dataset": "EmnistDataset",
28 |             "model": "CharacterModel",
29 |             "network": "lenet",
30 |             "train_args": {
31 |                 "batch_size": 128
32 |             }
33 |         }
34 |     ]
35 | }
36 | 


--------------------------------------------------------------------------------
/training/gpu_manager.py:
--------------------------------------------------------------------------------
 1 | """GPUManager class."""
 2 | import os
 3 | import time
 4 | 
 5 | import gpustat
 6 | import numpy as np
 7 | from redlock import Redlock
 8 | 
 9 | 
10 | GPU_LOCK_TIMEOUT = 5000  # ms
11 | 
12 | 
13 | class GPUManager:
14 |     """Class for allocating GPUs."""
15 | 
16 |     def __init__(self, verbose: bool = False):
17 |         self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}])
18 |         self.verbose = verbose
19 | 
20 |     def get_free_gpu(self):
21 |         """
22 |         If some GPUs are available, try reserving one by checking out an exclusive redis lock.
23 |         If none available or can't get lock, sleep and check again.
24 |         """
25 |         while True:
26 |             gpu_ind = self._get_free_gpu()
27 |             if gpu_ind is not None:
28 |                 return gpu_ind
29 |             if self.verbose:
30 |                 print(f"pid {os.getpid()} sleeping")
31 |             time.sleep(GPU_LOCK_TIMEOUT / 1000)
32 | 
33 |     def _get_free_gpu(self):
34 |         try:
35 |             available_gpu_inds = [
36 |                 gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total
37 |             ]
38 |         except Exception:  # pylint: disable=broad-except
39 |             return [0]  # Return dummy GPU index if no CUDA GPUs are installed
40 | 
41 |         if available_gpu_inds:
42 |             gpu_ind = np.random.choice(available_gpu_inds)
43 |             if self.verbose:
44 |                 print(f"pid {os.getpid()} picking gpu {gpu_ind}")
45 |             if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT):
46 |                 return int(gpu_ind)
47 |             if self.verbose:
48 |                 print(f"pid {os.getpid()} couldnt get lock")
49 |         return None
50 | 


--------------------------------------------------------------------------------
/training/prepare_experiments.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Simple way to run experiments defined in a file."""
 3 | import argparse
 4 | import json
 5 | 
 6 | 
 7 | def run_experiments(experiments_filename):
 8 |     """Run experiments from file."""
 9 |     with open(experiments_filename) as f:
10 |         experiments_config = json.load(f)
11 |     num_experiments = len(experiments_config["experiments"])
12 |     for ind in range(num_experiments):
13 |         experiment_config = experiments_config["experiments"][ind]
14 |         experiment_config["experiment_group"] = experiments_config["experiment_group"]
15 |         print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'")
16 | 
17 | 
18 | def main():
19 |     """Parse command-line arguments and run experiments from provided file."""
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.")
22 |     args = parser.parse_args()
23 |     run_experiments(args.experiments_filename)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/training/run_experiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """Script to run an experiment."""
  3 | import argparse
  4 | import json
  5 | import importlib
  6 | from typing import Dict
  7 | import os
  8 | 
  9 | # Hide lines below until Lab 3
 10 | import wandb
 11 | 
 12 | from training.gpu_manager import GPUManager
 13 | 
 14 | # Hide lines above until Lab 3
 15 | from training.util import train_model
 16 | 
 17 | DEFAULT_TRAIN_ARGS = {"batch_size": 64, "epochs": 16}
 18 | 
 19 | 
 20 | def run_experiment(experiment_config: Dict, save_weights: bool, gpu_ind: int, use_wandb: bool = True):
 21 |     """
 22 |     Run a training experiment.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     experiment_config (dict)
 27 |         Of the form
 28 |         {
 29 |             "dataset": "EmnistLinesDataset",
 30 |             "dataset_args": {
 31 |                 "max_overlap": 0.4,
 32 |                 "subsample_fraction": 0.2
 33 |             },
 34 |             "model": "LineModel",
 35 |             "network": "line_cnn_all_conv",
 36 |             "network_args": {
 37 |                 "window_width": 14,
 38 |                 "window_stride": 7
 39 |             },
 40 |             "train_args": {
 41 |                 "batch_size": 128,
 42 |                 "epochs": 10
 43 |             }
 44 |         }
 45 |     save_weights (bool)
 46 |         If True, will save the final model weights to a canonical location (see Model in models/base.py)
 47 |     gpu_ind (int)
 48 |         specifies which gpu to use (or -1 for first available)
 49 |     use_wandb (bool)
 50 |         sync training run to wandb
 51 |     """
 52 |     print(f"Running experiment with config {experiment_config} on GPU {gpu_ind}")
 53 | 
 54 |     datasets_module = importlib.import_module("text_recognizer.datasets")
 55 |     dataset_class_ = getattr(datasets_module, experiment_config["dataset"])
 56 |     dataset_args = experiment_config.get("dataset_args", {})
 57 |     dataset = dataset_class_(**dataset_args)
 58 |     dataset.load_or_generate_data()
 59 |     print(dataset)
 60 | 
 61 |     models_module = importlib.import_module("text_recognizer.models")
 62 |     model_class_ = getattr(models_module, experiment_config["model"])
 63 | 
 64 |     networks_module = importlib.import_module("text_recognizer.networks")
 65 |     network_fn_ = getattr(networks_module, experiment_config["network"])
 66 |     network_args = experiment_config.get("network_args", {})
 67 |     model = model_class_(
 68 |         dataset_cls=dataset_class_, network_fn=network_fn_, dataset_args=dataset_args, network_args=network_args,
 69 |     )
 70 |     print(model)
 71 | 
 72 |     experiment_config["train_args"] = {
 73 |         **DEFAULT_TRAIN_ARGS,
 74 |         **experiment_config.get("train_args", {}),
 75 |     }
 76 |     experiment_config["experiment_group"] = experiment_config.get("experiment_group", None)
 77 |     experiment_config["gpu_ind"] = gpu_ind
 78 | 
 79 |     # Hide lines below until Lab 3
 80 |     if use_wandb:
 81 |         wandb.init(config=experiment_config)
 82 |     # Hide lines above until Lab 3
 83 | 
 84 |     train_model(
 85 |         model,
 86 |         dataset,
 87 |         epochs=experiment_config["train_args"]["epochs"],
 88 |         batch_size=experiment_config["train_args"]["batch_size"],
 89 |         use_wandb=use_wandb,
 90 |     )
 91 |     score = model.evaluate(dataset.x_test, dataset.y_test)
 92 |     print(f"Test evaluation: {score}")
 93 | 
 94 |     # Hide lines below until Lab 3
 95 |     if use_wandb:
 96 |         wandb.log({"test_metric": score})
 97 |     # Hide lines above until Lab 3
 98 | 
 99 |     if save_weights:
100 |         model.save_weights()
101 | 
102 | 
103 | def _parse_args():
104 |     """Parse command-line arguments."""
105 |     parser = argparse.ArgumentParser()
106 |     parser.add_argument("--gpu", type=int, default=0, help="Provide index of GPU to use.")
107 |     parser.add_argument(
108 |         "--save",
109 |         default=False,
110 |         dest="save",
111 |         action="store_true",
112 |         help="If true, then final weights will be saved to canonical, version-controlled location",
113 |     )
114 |     parser.add_argument(
115 |         "experiment_config",
116 |         type=str,
117 |         help='Experimenet JSON (\'{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp"}\'',
118 |     )
119 |     parser.add_argument(
120 |         "--nowandb", default=False, action="store_true", help="If true, do not use wandb for this run",
121 |     )
122 |     args = parser.parse_args()
123 |     return args
124 | 
125 | 
126 | def main():
127 |     """Run experiment."""
128 |     args = _parse_args()
129 |     # Hide lines below until Lab 3
130 |     if args.gpu < 0:
131 |         gpu_manager = GPUManager()
132 |         args.gpu = gpu_manager.get_free_gpu()  # Blocks until one is available
133 |     # Hide lines above until Lab 3
134 | 
135 |     experiment_config = json.loads(args.experiment_config)
136 |     os.environ["CUDA_VISIBLE_DEVICES"] = f"{args.gpu}"
137 |     run_experiment(experiment_config, args.save, args.gpu, use_wandb=not args.nowandb)
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     main()
142 | 


--------------------------------------------------------------------------------
/training/run_sweep.py:
--------------------------------------------------------------------------------
 1 | """W&B Sweep Functionality."""
 2 | import os
 3 | import signal
 4 | import subprocess
 5 | import sys
 6 | import json
 7 | from typing import Tuple
 8 | from ast import literal_eval
 9 | 
10 | DEFAULT_CONFIG = {
11 |     "dataset": "IamLinesDataset",
12 |     "dataset_args": {"subsample_fraction": 0.33},
13 |     "model": "LineModel",
14 |     "network": "line_lstm_ctc",
15 |     "train_args": {"batch_size": 128, "epochs": 10},
16 | }
17 | 
18 | 
19 | def args_to_json(default_config: dict, preserve_args: tuple = ("gpu", "save")) -> Tuple[dict, list]:
20 |     """Convert command line arguments to nested config values
21 | 
22 |     i.e. run_sweep.py --dataset_args.foo=1.7
23 | 
24 |     {
25 |         "dataset_args": {
26 |             "foo": 1.7
27 |         }
28 |     }
29 | 
30 |     """
31 |     args = []
32 |     config = default_config.copy()
33 |     key, val = None, None
34 |     for arg in sys.argv[1:]:
35 |         if "=" in arg:
36 |             key, val = arg.split("=")
37 |         elif key:
38 |             val = arg
39 |         else:
40 |             key = arg
41 |         if key and val:
42 |             parsed_key = key.lstrip("-").split(".")
43 |             if parsed_key[0] in preserve_args:
44 |                 args.append("--{}={}".format(parsed_key[0], val))
45 |             else:
46 |                 nested = config
47 |                 for level in parsed_key[:-1]:
48 |                     nested[level] = config.get(level, {})
49 |                     nested = nested[level]
50 |                 try:
51 |                     # Convert numerics to floats / ints
52 |                     val = literal_eval(val)
53 |                 except ValueError:
54 |                     pass
55 |                 nested[parsed_key[-1]] = val
56 |             key, val = None, None
57 |     return config, args
58 | 
59 | 
60 | def main():
61 |     config, args = args_to_json(DEFAULT_CONFIG)
62 |     env = {k: v for k, v in os.environ.items() if k not in ("WANDB_PROGRAM", "WANDB_ARGS")}
63 |     # pylint: disable=subprocess-popen-preexec-fn
64 |     run = subprocess.Popen(
65 |         ["python", "training/run_experiment.py", *args, json.dumps(config)], env=env, preexec_fn=os.setsid,
66 |     )  # nosec
67 |     signal.signal(signal.SIGTERM, lambda *args: run.terminate())
68 |     run.wait()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/training/sweep_emnist.yaml:
--------------------------------------------------------------------------------
 1 | program: training/run_sweep.py
 2 | method: grid
 3 | metric:
 4 |   name: val_loss
 5 |   goal: minimize
 6 | parameters:
 7 |   dataset:
 8 |     value: EmnistDataset
 9 |   model:
10 |     value: CharacterModel
11 |   network:
12 |     value: mlp
13 |   network_args.layer_size:
14 |     values: [128, 256]
15 |   network_args.dropout_amount:
16 |     values: [0.2, 0.4]
17 |   network_args.num_layers:
18 |     values: [3, 6]
19 |   train_args.batch_size:
20 |     values: [64, 128]
21 |   train_args.epochs:
22 |     value: 5
23 | 


--------------------------------------------------------------------------------
/training/sweep_iam.yaml:
--------------------------------------------------------------------------------
 1 | program: training/run_sweep.py
 2 | method: grid
 3 | metric:
 4 |   name: val_loss
 5 |   goal: minimize
 6 | parameters:
 7 |   dataset:
 8 |     value: IamLinesDataset
 9 |   model:
10 |     value: LineModelCtc
11 |   network:
12 |     value: line_lstm_ctc
13 |   network_args.window_width:
14 |     values: [14, 18]
15 |   network_args.window_stride:
16 |     values: [5, 7] # careful with these
17 |   train_args.batch_size:
18 |     values: [64, 128]
19 | 


--------------------------------------------------------------------------------
/training/update_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Update metadata.toml with SHA-256 hash of the current file."""
 3 | from pathlib import Path
 4 | import argparse
 5 | 
 6 | import toml
 7 | 
 8 | from text_recognizer import util
 9 | 
10 | 
11 | def _get_metadata_filename():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("filename", type=str, help="Path to the metadata.toml file to update.")
14 |     args = parser.parse_args()
15 |     return Path(args.filename).resolve()
16 | 
17 | 
18 | def main():
19 |     metadata_filename = _get_metadata_filename()
20 | 
21 |     metadata = toml.load(metadata_filename)
22 | 
23 |     data_filename = metadata_filename.parents[0] / metadata["filename"]
24 |     supposed_data_sha256 = metadata["sha256"]
25 |     actual_data_sha256 = util.compute_sha256(data_filename)
26 | 
27 |     if supposed_data_sha256 == actual_data_sha256:
28 |         print("Nothing to update: SHA-256 matches")
29 |         return
30 | 
31 |     print("Updating metadata SHA-256")
32 |     metadata["sha256"] = actual_data_sha256
33 |     with open(metadata_filename, "w") as f:
34 |         toml.dump(metadata, f)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/training/util.py:
--------------------------------------------------------------------------------
 1 | """Function to train a model."""
 2 | from time import time
 3 | 
 4 | from tensorflow.keras.callbacks import EarlyStopping, Callback
 5 | 
 6 | # Hide lines below until Lab 3
 7 | import wandb
 8 | from wandb.keras import WandbCallback
 9 | 
10 | # Hide lines above until Lab 3
11 | 
12 | from text_recognizer.datasets.dataset import Dataset
13 | from text_recognizer.models.base import Model
14 | 
15 | EARLY_STOPPING = True
16 | 
17 | 
18 | # Hide lines below until Lab 3
19 | class WandbImageLogger(Callback):
20 |     """Custom callback for logging image predictions"""
21 | 
22 |     def __init__(self, model_wrapper: Model, dataset: Dataset, example_count: int = 4):
23 |         super().__init__()
24 |         self.model_wrapper = model_wrapper
25 |         self.val_images = dataset.x_test[:example_count]  # type: ignore
26 | 
27 |     def on_epoch_end(self, epoch, logs=None):
28 |         images = [
29 |             wandb.Image(image, caption="{}: {}".format(*self.model_wrapper.predict_on_image(image)))
30 |             for i, image in enumerate(self.val_images)
31 |         ]
32 |         wandb.log({"examples": images}, commit=False)
33 | 
34 | 
35 | # Hide lines above until Lab 3
36 | 
37 | 
38 | def train_model(model: Model, dataset: Dataset, epochs: int, batch_size: int, use_wandb: bool = False) -> Model:
39 |     """Train model."""
40 |     callbacks = []
41 | 
42 |     if EARLY_STOPPING:
43 |         early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.01, patience=3, verbose=1, mode="auto")
44 |         callbacks.append(early_stopping)
45 | 
46 |     # Hide lines below until Lab 3
47 |     if use_wandb:
48 |         image_callback = WandbImageLogger(model, dataset)
49 |         wandb_callback = WandbCallback()
50 |         callbacks.append(image_callback)
51 |         callbacks.append(wandb_callback)
52 |     # Hide lines above until Lab 3
53 | 
54 |     model.network.summary()
55 | 
56 |     t = time()
57 |     _history = model.fit(dataset=dataset, batch_size=batch_size, epochs=epochs, callbacks=callbacks)
58 |     print("Training took {:2f} s".format(time() - t))
59 | 
60 |     return model
61 | 


--------------------------------------------------------------------------------
/wandb/settings:
--------------------------------------------------------------------------------
1 | [default]
2 | entity = fsdl
3 | project = fsdl-text-recognizer-nov2019
4 | base_url = https://api.wandb.ai
5 | 
6 | 


--------------------------------------------------------------------------------