├── lab7 ├── api │ └── __init__.py ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── iam_paragraphs │ │ │ │ └── a01-000u-cropped.jpg │ │ │ ├── iam_lines │ │ │ │ ├── He rose from his breakfast-nook bench.png │ │ │ │ ├── and came into the livingroom, where.png │ │ │ │ └── his entrance. He came, almost falling.png │ │ │ ├── create_emnist_support_files.py │ │ │ ├── create_iam_lines_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ ├── test_character_predictor.py │ │ └── test_paragraph_text_recognizer.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ ├── LineDetectorModel_IamParagraphsDataset_fcn_weights.h5 │ │ ├── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ ├── test_validation.sh │ ├── prepare_sample_experiments.sh │ ├── update_fsdl_paragraphs_metadata.sh │ ├── train_lstm_line_predictor.sh │ ├── train_lstm_line_predictor_on_iam.sh │ ├── train_character_predictor.sh │ ├── train_line_detector.sh │ └── lint.sh ├── pyproject.toml ├── wandb │ └── settings ├── setup.cfg ├── training │ ├── sweep_iam.yaml │ ├── sweep_emnist.yaml │ ├── experiments │ │ └── sample.json │ ├── prepare_experiments.py │ ├── update_metadata.py │ └── gpu_manager.py └── evaluation │ ├── evaluate_character_predictor.py │ └── evaluate_line_predictor.py ├── lab8 ├── api │ ├── __init__.py │ ├── Dockerfile │ ├── tests │ │ └── test_app.py │ └── app.py ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── iam_paragraphs │ │ │ │ └── a01-000u-cropped.jpg │ │ │ ├── iam_lines │ │ │ │ ├── He rose from his breakfast-nook bench.png │ │ │ │ ├── and came into the livingroom, where.png │ │ │ │ └── his entrance. He came, almost falling.png │ │ │ ├── create_emnist_support_files.py │ │ │ ├── create_iam_lines_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ ├── test_character_predictor.py │ │ └── test_paragraph_text_recognizer.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ ├── LineDetectorModel_IamParagraphsDataset_fcn_weights.h5 │ │ ├── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_api.sh │ ├── test_functionality.sh │ ├── test_validation.sh │ ├── run_api_docker.sh │ ├── prepare_sample_experiments.sh │ ├── update_fsdl_paragraphs_metadata.sh │ ├── train_lstm_line_predictor.sh │ ├── train_lstm_line_predictor_on_iam.sh │ ├── build_api_docker.sh │ ├── train_character_predictor.sh │ ├── train_line_detector.sh │ └── lint.sh ├── pyproject.toml ├── wandb │ └── settings ├── setup.cfg ├── training │ ├── sweep_iam.yaml │ ├── sweep_emnist.yaml │ ├── experiments │ │ └── sample.json │ ├── prepare_experiments.py │ ├── update_metadata.py │ └── gpu_manager.py └── evaluation │ ├── evaluate_character_predictor.py │ └── evaluate_line_predictor.py ├── lab1 ├── text_recognizer │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ ├── dataset.py │ │ └── dataset_sequence.py │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── create_emnist_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ └── test_character_predictor.py │ ├── weights │ │ └── CharacterModel_EmnistDataset_mlp_weights.h5 │ └── character_predictor.py ├── tasks │ ├── test_functionality.sh │ └── train_character_predictor.sh └── training │ └── util.py ├── lab2 ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── create_emnist_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ └── test_character_predictor.py │ ├── weights │ │ └── CharacterModel_EmnistDataset_mlp_weights.h5 │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ └── train_character_predictor.sh └── training │ └── util.py ├── lab3 ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── create_emnist_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ └── test_character_predictor.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ ├── train_lstm_line_predictor.sh │ └── train_character_predictor.sh └── readme.md ├── lab4 ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── iam_lines │ │ │ │ ├── He rose from his breakfast-nook bench.png │ │ │ │ ├── and came into the livingroom, where.png │ │ │ │ └── his entrance. He came, almost falling.png │ │ │ ├── create_emnist_support_files.py │ │ │ ├── create_iam_lines_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ └── test_character_predictor.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ ├── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ ├── prepare_sample_experiments.sh │ ├── train_lstm_line_predictor.sh │ ├── train_lstm_line_predictor_on_iam.sh │ └── train_character_predictor.sh ├── wandb │ └── settings └── training │ ├── sweep_iam.yaml │ ├── sweep_emnist.yaml │ ├── experiments │ └── sample.json │ ├── prepare_experiments.py │ └── gpu_manager.py ├── lab5 ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── iam_paragraphs │ │ │ │ └── a01-000u-cropped.jpg │ │ │ ├── iam_lines │ │ │ │ ├── He rose from his breakfast-nook bench.png │ │ │ │ ├── and came into the livingroom, where.png │ │ │ │ └── his entrance. He came, almost falling.png │ │ │ ├── create_emnist_support_files.py │ │ │ ├── create_iam_lines_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ ├── test_character_predictor.py │ │ └── test_paragraph_text_recognizer.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ ├── LineDetectorModel_IamParagraphsDataset_fcn_weights.h5 │ │ ├── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ ├── prepare_sample_experiments.sh │ ├── train_lstm_line_predictor.sh │ ├── train_lstm_line_predictor_on_iam.sh │ ├── train_character_predictor.sh │ └── train_line_detector.sh ├── wandb │ └── settings └── training │ ├── sweep_iam.yaml │ ├── sweep_emnist.yaml │ ├── experiments │ └── sample.json │ ├── prepare_experiments.py │ └── gpu_manager.py ├── lab6 ├── text_recognizer │ ├── __init__.py │ ├── tests │ │ ├── support │ │ │ ├── emnist │ │ │ │ ├── 8.png │ │ │ │ ├── U.png │ │ │ │ └── e.png │ │ │ ├── emnist_lines │ │ │ │ ├── do that In.png │ │ │ │ ├── Corsi left for.png │ │ │ │ └── or if used the results.png │ │ │ ├── iam_paragraphs │ │ │ │ └── a01-000u-cropped.jpg │ │ │ ├── iam_lines │ │ │ │ ├── He rose from his breakfast-nook bench.png │ │ │ │ ├── and came into the livingroom, where.png │ │ │ │ └── his entrance. He came, almost falling.png │ │ │ ├── create_emnist_support_files.py │ │ │ ├── create_iam_lines_support_files.py │ │ │ └── create_emnist_lines_support_files.py │ │ ├── test_character_predictor.py │ │ └── test_paragraph_text_recognizer.py │ ├── weights │ │ ├── CharacterModel_EmnistDataset_mlp_weights.h5 │ │ ├── LineDetectorModel_IamParagraphsDataset_fcn_weights.h5 │ │ ├── LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5 │ │ └── LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5 │ ├── models │ │ ├── __init__.py │ │ └── character_model.py │ ├── networks │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── mlp.py │ │ └── lenet.py │ ├── datasets │ │ ├── __init__.py │ │ ├── emnist_essentials.json │ │ └── dataset.py │ ├── character_predictor.py │ └── line_predictor.py ├── tasks │ ├── test_functionality.sh │ ├── prepare_sample_experiments.sh │ ├── update_fsdl_paragraphs_metadata.sh │ ├── train_lstm_line_predictor.sh │ ├── train_lstm_line_predictor_on_iam.sh │ ├── train_character_predictor.sh │ └── train_line_detector.sh ├── wandb │ └── settings └── training │ ├── sweep_iam.yaml │ ├── sweep_emnist.yaml │ ├── experiments │ └── sample.json │ ├── prepare_experiments.py │ ├── update_metadata.py │ └── gpu_manager.py ├── .gitattributes ├── requirements.in ├── data └── raw │ ├── iam │ ├── metadata.toml │ └── readme.md │ ├── emnist │ ├── metadata.toml │ └── readme.md │ └── fsdl_handwriting │ ├── metadata.toml │ └── readme.md ├── environment.yml ├── requirements-dev.in └── .gitignore /lab7/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab8/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab1/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab2/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab3/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab4/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab5/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab6/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab7/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab8/text_recognizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lab8/tasks/test_api.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s api 3 | -------------------------------------------------------------------------------- /lab1/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab2/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab3/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab4/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab5/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab6/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab7/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab8/tasks/test_functionality.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s text_recognizer 3 | -------------------------------------------------------------------------------- /lab7/tasks/test_validation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s evaluation/evaluate* 3 | -------------------------------------------------------------------------------- /lab8/tasks/test_validation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pytest -s evaluation/evaluate* 3 | -------------------------------------------------------------------------------- /lab7/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | target-version = ['py37'] 4 | -------------------------------------------------------------------------------- /lab8/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | target-version = ['py37'] 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.png filter=lfs diff=lfs merge=lfs -text 2 | *.h5 filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /lab8/tasks/run_api_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker run -p 8000:8000 --name api -it --rm text_recognizer_api 3 | -------------------------------------------------------------------------------- /lab1/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /lab4/tasks/prepare_sample_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/prepare_experiments.py training/experiments/sample.json 3 | -------------------------------------------------------------------------------- /lab5/tasks/prepare_sample_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/prepare_experiments.py training/experiments/sample.json 3 | -------------------------------------------------------------------------------- /lab6/tasks/prepare_sample_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/prepare_experiments.py training/experiments/sample.json 3 | -------------------------------------------------------------------------------- /lab7/tasks/prepare_sample_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/prepare_experiments.py training/experiments/sample.json 3 | -------------------------------------------------------------------------------- /lab8/tasks/prepare_sample_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/prepare_experiments.py training/experiments/sample.json 3 | -------------------------------------------------------------------------------- /lab4/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | entity = fsdl 3 | project = fsdl-text-recognizer-nov2019 4 | base_url = https://api.wandb.ai 5 | 6 | -------------------------------------------------------------------------------- /lab5/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | entity = fsdl 3 | project = fsdl-text-recognizer-nov2019 4 | base_url = https://api.wandb.ai 5 | 6 | -------------------------------------------------------------------------------- /lab6/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | entity = fsdl 3 | project = fsdl-text-recognizer-nov2019 4 | base_url = https://api.wandb.ai 5 | 6 | -------------------------------------------------------------------------------- /lab7/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | entity = fsdl 3 | project = fsdl-text-recognizer-nov2019 4 | base_url = https://api.wandb.ai 5 | 6 | -------------------------------------------------------------------------------- /lab8/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | entity = fsdl 3 | project = fsdl-text-recognizer-nov2019 4 | base_url = https://api.wandb.ai 5 | 6 | -------------------------------------------------------------------------------- /lab6/tasks/update_fsdl_paragraphs_metadata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python training/update_metadata.py data/raw/fsdl_handwriting/metadata.toml 4 | -------------------------------------------------------------------------------- /lab7/tasks/update_fsdl_paragraphs_metadata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python training/update_metadata.py data/raw/fsdl_handwriting/metadata.toml 4 | -------------------------------------------------------------------------------- /lab8/tasks/update_fsdl_paragraphs_metadata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python training/update_metadata.py data/raw/fsdl_handwriting/metadata.toml 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist/8.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:455c3788a677a33583aec467f49d1917d0b34c0785b3eee6867699f0d2ffbc1a 3 | size 498 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist/U.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c1490758a7d28fde2a2e0bdc0a644c19a828500901f4417f205def23c2ad3d5 3 | size 524 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist/e.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:001a7679be1c0c622354aebcbdcc0f2e992e1fc3295ee1d6fef1c1dd1613508e 3 | size 563 4 | -------------------------------------------------------------------------------- /lab3/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab4/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab4/tasks/train_lstm_line_predictor_on_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab5/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab5/tasks/train_lstm_line_predictor_on_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab6/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab6/tasks/train_lstm_line_predictor_on_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab7/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab7/tasks/train_lstm_line_predictor_on_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab8/tasks/train_lstm_line_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab8/tasks/train_lstm_line_predictor_on_iam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "IamLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 3 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /lab8/tasks/build_api_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sed 's/tensorflow==/tensorflow-cpu==/' requirements.txt > api/requirements.txt 4 | 5 | docker build -t text_recognizer_api -f api/Dockerfile . 6 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist_lines/do that In.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:42bff01a0fd9f03726f12069f19646374f239642ea51819d0687359712d45eb7 3 | size 2888 4 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | boltons 2 | editdistance 3 | flask 4 | h5py 5 | numpy 6 | opencv-python-headless 7 | requests 8 | tensorflow==2.2.0rc2 9 | toml 10 | tqdm 11 | wrapt==1.11.* # due to pylint 12 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist_lines/Corsi left for.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e6d26a81d593d7d37d9496763104717e24aa3885cfc993c685eedd29b02ce1f 3 | size 3763 4 | -------------------------------------------------------------------------------- /data/raw/iam/metadata.toml: -------------------------------------------------------------------------------- 1 | url = 'https://s3-us-west-2.amazonaws.com/fsdl-public-assets/iam/iamdb.zip' 2 | filename = 'iamdb.zip' 3 | sha256 = 'f3c9e87a88a313e557c6d3548ed8a2a1af2dc3c4a678c5f3fc6f972ba4a50c55' 4 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: fsdl-text-recognizer 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.7 6 | - cudatoolkit=10.1 7 | - cudnn=7.6 8 | - pip 9 | - pip: 10 | - pip-tools 11 | -------------------------------------------------------------------------------- /lab1/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab2/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab3/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab4/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab5/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab6/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab7/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /lab8/tasks/train_character_predictor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --save '{"dataset": "EmnistDataset", "model": "CharacterModel", "network": "mlp", "train_args": {"batch_size": 256}}' 3 | -------------------------------------------------------------------------------- /data/raw/emnist/metadata.toml: -------------------------------------------------------------------------------- 1 | filename = 'matlab.zip' 2 | sha256 = 'e1fa805cdeae699a52da0b77c2db17f6feb77eed125f9b45c022e7990444df95' 3 | url = 'https://s3-us-west-2.amazonaws.com/fsdl-public-assets/matlab.zip' 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab1/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/emnist_lines/or if used the results.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bea915331082580d7aaf129da096afd600a15eca4fa562fe78fb57c4f8e5a199 3 | size 5645 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/weights/CharacterModel_EmnistDataset_mlp_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f4d3191391db8f3ba58c70e0e9578be68632e2cfc952794a5c81d735ccab530 3 | size 595520 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer-project/HEAD/lab5/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer-project/HEAD/lab6/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer-project/HEAD/lab7/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-full-stack/fsdl-text-recognizer-project/HEAD/lab8/text_recognizer/tests/support/iam_paragraphs/a01-000u-cropped.jpg -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f 3 | size 4876 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a 3 | size 3437 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2 3 | size 3600 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f 3 | size 4876 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a 3 | size 3437 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2 3 | size 3600 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4da991d0accee3b999ba55d5c6a2fde4150112bf935c4c10e237e2065427ab6 3 | size 745984 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f 3 | size 4876 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a 3 | size 3437 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2 3 | size 3600 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4da991d0accee3b999ba55d5c6a2fde4150112bf935c4c10e237e2065427ab6 3 | size 745984 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f 3 | size 4876 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a 3 | size 3437 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2 3 | size 3600 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4da991d0accee3b999ba55d5c6a2fde4150112bf935c4c10e237e2065427ab6 3 | size 745984 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/iam_lines/He rose from his breakfast-nook bench.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8841d95a0008748d5c557061ea59dac2e46a221e30b9c6e9fc6ceac16827094f 3 | size 4876 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/iam_lines/and came into the livingroom, where.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2890307c91b9a25f2bec15fcbc15995d824a1c4d991a29f982ad4c09a9a1e6a 3 | size 3437 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/iam_lines/his entrance. He came, almost falling.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:95f159bccf1acebb1c48eeeb5773748032dca76969695be2141b9fc8b28013c2 3 | size 3600 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/weights/LineDetectorModel_IamParagraphsDataset_fcn_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4da991d0accee3b999ba55d5c6a2fde4150112bf935c4c10e237e2065427ab6 3 | size 745984 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/weights/LineModelCtc_IamLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1948f55bbeb7a98b7ef643967c54000c63fb4c5fff32a1115cb7cd0e9e8da0e4 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab3/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab4/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab5/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab6/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab7/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab8/text_recognizer/weights/LineModelCtc_EmnistLinesDataset_line_lstm_ctc_weights.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2befc73c19e5f30a6eb5c83ad4c9261b64b3fe23cb8e64e7e4c9b13dce863db 3 | size 2243720 4 | -------------------------------------------------------------------------------- /lab5/tasks/train_line_detector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --gpu=1 --save '{"dataset": "IamParagraphsDataset", "model": "LineDetectorModel", "network": "fcn", "train_args": {"batch_size": 16, "epochs": 32}}' 3 | -------------------------------------------------------------------------------- /lab6/tasks/train_line_detector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --gpu=1 --save '{"dataset": "IamParagraphsDataset", "model": "LineDetectorModel", "network": "fcn", "train_args": {"batch_size": 16, "epochs": 32}}' 3 | -------------------------------------------------------------------------------- /lab7/tasks/train_line_detector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --gpu=1 --save '{"dataset": "IamParagraphsDataset", "model": "LineDetectorModel", "network": "fcn", "train_args": {"batch_size": 16, "epochs": 32}}' 3 | -------------------------------------------------------------------------------- /lab8/tasks/train_line_detector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python training/run_experiment.py --gpu=1 --save '{"dataset": "IamParagraphsDataset", "model": "LineDetectorModel", "network": "fcn", "train_args": {"batch_size": 16, "epochs": 32}}' 3 | -------------------------------------------------------------------------------- /data/raw/fsdl_handwriting/metadata.toml: -------------------------------------------------------------------------------- 1 | url = "https://dataturks.com/projects/sergeykarayev/fsdl_handwriting/export" 2 | filename = "fsdl_handwriting.json" 3 | sha256 = "720d6c72b4317a9a5492630a1c9f6d83a20d36101a29311a5cf7825c1d60c180" 4 | -------------------------------------------------------------------------------- /lab2/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | -------------------------------------------------------------------------------- /lab2/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | 10 | -------------------------------------------------------------------------------- /lab3/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | -------------------------------------------------------------------------------- /data/raw/fsdl_handwriting/readme.md: -------------------------------------------------------------------------------- 1 | # FSDL Handwriting Dataset 2 | 3 | Handwritten paragraphs generated in the FSDL March 2019 class and annotated using the DataTurks UX. 4 | 5 | Export via manual download on https://dataturks.com/projects/sergeykarayev/fsdl_handwriting/export 6 | -------------------------------------------------------------------------------- /lab2/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | 11 | -------------------------------------------------------------------------------- /lab7/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length = 120 3 | ignore = E203,W503 4 | 5 | [pydocstyle] 6 | convention = numpy 7 | add-ignore = D102,D103,D104,D105,D200,D205,D400 8 | 9 | [mypy] 10 | ignore_missing_imports = True 11 | 12 | [tool:pytest] 13 | addopts = --doctest-modules 14 | -------------------------------------------------------------------------------- /lab8/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length = 120 3 | ignore = E203,W503 4 | 5 | [pydocstyle] 6 | convention = numpy 7 | add-ignore = D102,D103,D104,D105,D200,D205,D400 8 | 9 | [mypy] 10 | ignore_missing_imports = True 11 | 12 | [tool:pytest] 13 | addopts = --doctest-modules 14 | -------------------------------------------------------------------------------- /requirements-dev.in: -------------------------------------------------------------------------------- 1 | -c requirements.txt 2 | bandit 3 | black 4 | gpustat 5 | gradescope-utils 6 | grequests # admin 7 | itermplot 8 | jupyterlab 9 | matplotlib 10 | mypy 11 | nltk 12 | pycodestyle 13 | pydocstyle 14 | pylint 15 | pytest 16 | pyyaml 17 | redlock-py 18 | tornado 19 | safety 20 | scipy 21 | pillow 22 | wandb 23 | -------------------------------------------------------------------------------- /lab3/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | -------------------------------------------------------------------------------- /lab4/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | -------------------------------------------------------------------------------- /lab4/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | # Hide lines below until Lab 4 9 | from .iam_lines_dataset import IamLinesDataset 10 | 11 | # Hide lines above until Lab 4 12 | -------------------------------------------------------------------------------- /lab3/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | -------------------------------------------------------------------------------- /lab4/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | -------------------------------------------------------------------------------- /lab4/training/sweep_iam.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: IamLinesDataset 9 | model: 10 | value: LineModelCtc 11 | network: 12 | value: line_lstm_ctc 13 | network_args.window_width: 14 | values: [14, 18] 15 | network_args.window_stride: 16 | values: [5, 7] # careful with these 17 | train_args.batch_size: 18 | values: [64, 128] 19 | -------------------------------------------------------------------------------- /lab5/training/sweep_iam.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: IamLinesDataset 9 | model: 10 | value: LineModelCtc 11 | network: 12 | value: line_lstm_ctc 13 | network_args.window_width: 14 | values: [14, 18] 15 | network_args.window_stride: 16 | values: [5, 7] # careful with these 17 | train_args.batch_size: 18 | values: [64, 128] 19 | -------------------------------------------------------------------------------- /lab6/training/sweep_iam.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: IamLinesDataset 9 | model: 10 | value: LineModelCtc 11 | network: 12 | value: line_lstm_ctc 13 | network_args.window_width: 14 | values: [14, 18] 15 | network_args.window_stride: 16 | values: [5, 7] # careful with these 17 | train_args.batch_size: 18 | values: [64, 128] 19 | -------------------------------------------------------------------------------- /lab7/training/sweep_iam.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: IamLinesDataset 9 | model: 10 | value: LineModelCtc 11 | network: 12 | value: line_lstm_ctc 13 | network_args.window_width: 14 | values: [14, 18] 15 | network_args.window_stride: 16 | values: [5, 7] # careful with these 17 | train_args.batch_size: 18 | values: [64, 128] 19 | -------------------------------------------------------------------------------- /lab8/training/sweep_iam.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: IamLinesDataset 9 | model: 10 | value: LineModelCtc 11 | network: 12 | value: line_lstm_ctc 13 | network_args.window_width: 14 | values: [14, 18] 15 | network_args.window_stride: 16 | values: [5, 7] # careful with these 17 | train_args.batch_size: 18 | values: [64, 128] 19 | -------------------------------------------------------------------------------- /data/raw/emnist/readme.md: -------------------------------------------------------------------------------- 1 | # EMNIST dataset 2 | 3 | The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 4 | and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset." 5 | From https://www.nist.gov/itl/iad/image-group/emnist-dataset 6 | 7 | Original url is http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/matlab.zip 8 | 9 | We uploaded the same file to our S3 bucket for faster download. 10 | -------------------------------------------------------------------------------- /lab5/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | # Hide lines below until Lab 5 15 | from .line_detector_model import LineDetectorModel 16 | 17 | # Hide lines above until Lab 5 18 | -------------------------------------------------------------------------------- /lab6/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | # Hide lines below until Lab 5 15 | from .line_detector_model import LineDetectorModel 16 | 17 | # Hide lines above until Lab 5 18 | -------------------------------------------------------------------------------- /lab7/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | # Hide lines below until Lab 5 15 | from .line_detector_model import LineDetectorModel 16 | 17 | # Hide lines above until Lab 5 18 | -------------------------------------------------------------------------------- /lab8/text_recognizer/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model modules.""" 2 | from .character_model import CharacterModel 3 | 4 | # Hide lines below until Lab 2 5 | from .line_model import LineModel 6 | 7 | # Hide lines above until Lab 2 8 | 9 | # Hide lines below until Lab 3 10 | from .line_model_ctc import LineModelCtc 11 | 12 | # Hide lines above until Lab 3 13 | 14 | # Hide lines below until Lab 5 15 | from .line_detector_model import LineDetectorModel 16 | 17 | # Hide lines above until Lab 5 18 | -------------------------------------------------------------------------------- /lab5/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | # Hide lines below until Lab 5 16 | from .fcn import fcn 17 | 18 | # Hide lines above until Lab 5 19 | -------------------------------------------------------------------------------- /lab6/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | # Hide lines below until Lab 5 16 | from .fcn import fcn 17 | 18 | # Hide lines above until Lab 5 19 | -------------------------------------------------------------------------------- /lab7/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | # Hide lines below until Lab 5 16 | from .fcn import fcn 17 | 18 | # Hide lines above until Lab 5 19 | -------------------------------------------------------------------------------- /lab8/text_recognizer/networks/__init__.py: -------------------------------------------------------------------------------- 1 | """Neural network code modules.""" 2 | from .mlp import mlp 3 | from .lenet import lenet 4 | 5 | # Hide lines below until Lab 2 6 | from .line_cnn_all_conv import line_cnn_all_conv 7 | 8 | # Hide lines above until Lab 2 9 | 10 | # Hide lines below until Lab 3 11 | from .line_lstm_ctc import line_lstm_ctc 12 | 13 | # Hide lines above until Lab 3 14 | 15 | # Hide lines below until Lab 5 16 | from .fcn import fcn 17 | 18 | # Hide lines above until Lab 5 19 | -------------------------------------------------------------------------------- /lab4/training/sweep_emnist.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: EmnistDataset 9 | model: 10 | value: CharacterModel 11 | network: 12 | value: mlp 13 | network_args.layer_size: 14 | values: [128, 256] 15 | network_args.dropout_amount: 16 | values: [0.2, 0.4] 17 | network_args.num_layers: 18 | values: [3, 6] 19 | train_args.batch_size: 20 | values: [64, 128] 21 | train_args.epochs: 22 | value: 5 23 | -------------------------------------------------------------------------------- /lab5/training/sweep_emnist.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: EmnistDataset 9 | model: 10 | value: CharacterModel 11 | network: 12 | value: mlp 13 | network_args.layer_size: 14 | values: [128, 256] 15 | network_args.dropout_amount: 16 | values: [0.2, 0.4] 17 | network_args.num_layers: 18 | values: [3, 6] 19 | train_args.batch_size: 20 | values: [64, 128] 21 | train_args.epochs: 22 | value: 5 23 | -------------------------------------------------------------------------------- /lab6/training/sweep_emnist.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: EmnistDataset 9 | model: 10 | value: CharacterModel 11 | network: 12 | value: mlp 13 | network_args.layer_size: 14 | values: [128, 256] 15 | network_args.dropout_amount: 16 | values: [0.2, 0.4] 17 | network_args.num_layers: 18 | values: [3, 6] 19 | train_args.batch_size: 20 | values: [64, 128] 21 | train_args.epochs: 22 | value: 5 23 | -------------------------------------------------------------------------------- /lab7/training/sweep_emnist.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: EmnistDataset 9 | model: 10 | value: CharacterModel 11 | network: 12 | value: mlp 13 | network_args.layer_size: 14 | values: [128, 256] 15 | network_args.dropout_amount: 16 | values: [0.2, 0.4] 17 | network_args.num_layers: 18 | values: [3, 6] 19 | train_args.batch_size: 20 | values: [64, 128] 21 | train_args.epochs: 22 | value: 5 23 | -------------------------------------------------------------------------------- /lab8/training/sweep_emnist.yaml: -------------------------------------------------------------------------------- 1 | program: training/run_sweep.py 2 | method: grid 3 | metric: 4 | name: val_loss 5 | goal: minimize 6 | parameters: 7 | dataset: 8 | value: EmnistDataset 9 | model: 10 | value: CharacterModel 11 | network: 12 | value: mlp 13 | network_args.layer_size: 14 | values: [128, 256] 15 | network_args.dropout_amount: 16 | values: [0.2, 0.4] 17 | network_args.num_layers: 18 | values: [3, 6] 19 | train_args.batch_size: 20 | values: [64, 128] 21 | train_args.epochs: 22 | value: 5 23 | -------------------------------------------------------------------------------- /lab5/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | # Hide lines below until Lab 4 9 | from .iam_lines_dataset import IamLinesDataset 10 | 11 | # Hide lines above until Lab 4 12 | # Hide lines below until Lab 5 13 | from .iam_dataset import IamDataset 14 | from .iam_paragraphs_dataset import IamParagraphsDataset 15 | 16 | # Hide lines above until Lab 5 17 | -------------------------------------------------------------------------------- /lab8/api/Dockerfile: -------------------------------------------------------------------------------- 1 | # The "buster" flavor of the official docker Python image is based on Debian and includes common packages. 2 | FROM python:3.7-buster 3 | 4 | # Create the working directory 5 | RUN set -ex && mkdir /repo 6 | WORKDIR /repo 7 | 8 | # Copy only the relevant directories to the working diretory 9 | COPY text_recognizer/ ./text_recognizer 10 | COPY api/ ./api 11 | 12 | # Install Python dependencies 13 | RUN set -ex && pip3 install -r api/requirements.txt 14 | 15 | # Run the web server 16 | EXPOSE 8000 17 | ENV PYTHONPATH /repo 18 | CMD python3 /repo/api/app.py 19 | -------------------------------------------------------------------------------- /lab6/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | # Hide lines below until Lab 4 9 | from .iam_lines_dataset import IamLinesDataset 10 | 11 | # Hide lines above until Lab 4 12 | # Hide lines below until Lab 5 13 | from .iam_dataset import IamDataset 14 | from .iam_paragraphs_dataset import IamParagraphsDataset 15 | 16 | # Hide lines above until Lab 5 17 | # Hide lines below until Lab 6 18 | from .fsdl_handwriting_dataset import FsdlHandwritingDataset 19 | 20 | # Hide lines above until Lab 6 21 | -------------------------------------------------------------------------------- /lab7/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | # Hide lines below until Lab 4 9 | from .iam_lines_dataset import IamLinesDataset 10 | 11 | # Hide lines above until Lab 4 12 | # Hide lines below until Lab 5 13 | from .iam_dataset import IamDataset 14 | from .iam_paragraphs_dataset import IamParagraphsDataset 15 | 16 | # Hide lines above until Lab 5 17 | # Hide lines below until Lab 6 18 | from .fsdl_handwriting_dataset import FsdlHandwritingDataset 19 | 20 | # Hide lines above until Lab 6 21 | -------------------------------------------------------------------------------- /lab8/text_recognizer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Dataset modules.""" 2 | from .emnist_dataset import EmnistDataset 3 | 4 | # Hide lines below until Lab 2 5 | from .emnist_lines_dataset import EmnistLinesDataset 6 | 7 | # Hide lines above until Lab 2 8 | # Hide lines below until Lab 4 9 | from .iam_lines_dataset import IamLinesDataset 10 | 11 | # Hide lines above until Lab 4 12 | # Hide lines below until Lab 5 13 | from .iam_dataset import IamDataset 14 | from .iam_paragraphs_dataset import IamParagraphsDataset 15 | 16 | # Hide lines above until Lab 5 17 | # Hide lines below until Lab 6 18 | from .fsdl_handwriting_dataset import FsdlHandwritingDataset 19 | 20 | # Hide lines above until Lab 6 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data 2 | data/processed 3 | data/interim 4 | data/raw/emnist/matlab* 5 | data/raw/fsdl_handwriting/pages 6 | data/raw/iam/iamdb 7 | data/raw/iam/iamdb.zip 8 | data/raw/nltk 9 | 10 | # Editors 11 | .vscode 12 | 13 | # Node 14 | node_modules 15 | 16 | # Python 17 | __pycache__ 18 | .pytest_cache 19 | .ipynb_checkpoints 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # W&B 40 | wandb-debug.log 41 | wandb/* 42 | !wandb/settings 43 | 44 | # Misc 45 | .DS_Store 46 | _labs 47 | logs 48 | .mypy_cache 49 | -------------------------------------------------------------------------------- /lab1/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab2/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab3/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab4/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab5/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab6/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab7/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab8/text_recognizer/datasets/emnist_essentials.json: -------------------------------------------------------------------------------- 1 | {"mapping": [[0, "0"], [1, "1"], [2, "2"], [3, "3"], [4, "4"], [5, "5"], [6, "6"], [7, "7"], [8, "8"], [9, "9"], [10, "A"], [11, "B"], [12, "C"], [13, "D"], [14, "E"], [15, "F"], [16, "G"], [17, "H"], [18, "I"], [19, "J"], [20, "K"], [21, "L"], [22, "M"], [23, "N"], [24, "O"], [25, "P"], [26, "Q"], [27, "R"], [28, "S"], [29, "T"], [30, "U"], [31, "V"], [32, "W"], [33, "X"], [34, "Y"], [35, "Z"], [36, "a"], [37, "b"], [38, "c"], [39, "d"], [40, "e"], [41, "f"], [42, "g"], [43, "h"], [44, "i"], [45, "j"], [46, "k"], [47, "l"], [48, "m"], [49, "n"], [50, "o"], [51, "p"], [52, "q"], [53, "r"], [54, "s"], [55, "t"], [56, "u"], [57, "v"], [58, "w"], [59, "x"], [60, "y"], [61, "z"]], "input_shape": [28, 28]} -------------------------------------------------------------------------------- /lab7/tasks/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -uo pipefail 3 | set +e 4 | 5 | FAILURE=false 6 | 7 | echo "safety" 8 | safety check -r requirements.txt -r requirements-dev.txt || FAILURE=true 9 | 10 | echo "pylint" 11 | pylint api text_recognizer training || FAILURE=true 12 | 13 | echo "pycodestyle" 14 | pycodestyle api text_recognizer training || FAILURE=true 15 | 16 | echo "pydocstyle" 17 | pydocstyle api text_recognizer training || FAILURE=true 18 | 19 | echo "mypy" 20 | mypy api text_recognizer training || FAILURE=true 21 | 22 | echo "bandit" 23 | bandit -ll -r {api,text_recognizer,training} || FAILURE=true 24 | 25 | echo "shellcheck" 26 | shellcheck tasks/*.sh || FAILURE=true 27 | 28 | if [ "$FAILURE" = true ]; then 29 | echo "Linting failed" 30 | exit 1 31 | fi 32 | echo "Linting passed" 33 | exit 0 34 | -------------------------------------------------------------------------------- /lab8/tasks/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -uo pipefail 3 | set +e 4 | 5 | FAILURE=false 6 | 7 | echo "safety" 8 | safety check -r requirements.txt -r requirements-dev.txt || FAILURE=true 9 | 10 | echo "pylint" 11 | pylint api text_recognizer training || FAILURE=true 12 | 13 | echo "pycodestyle" 14 | pycodestyle api text_recognizer training || FAILURE=true 15 | 16 | echo "pydocstyle" 17 | pydocstyle api text_recognizer training || FAILURE=true 18 | 19 | echo "mypy" 20 | mypy api text_recognizer training || FAILURE=true 21 | 22 | echo "bandit" 23 | bandit -ll -r {api,text_recognizer,training} || FAILURE=true 24 | 25 | echo "shellcheck" 26 | shellcheck tasks/*.sh || FAILURE=true 27 | 28 | if [ "$FAILURE" = true ]; then 29 | echo "Linting failed" 30 | exit 1 31 | fi 32 | echo "Linting passed" 33 | exit 0 34 | -------------------------------------------------------------------------------- /lab1/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab2/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab3/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab4/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab5/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab6/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab7/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab8/text_recognizer/networks/misc.py: -------------------------------------------------------------------------------- 1 | """Misc neural network functionality.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def slide_window(image: np.ndarray, window_width: int, window_stride: int) -> np.ndarray: 7 | """ 8 | Parameters 9 | ---------- 10 | image 11 | (image_height, image_width, 1) input 12 | 13 | Returns 14 | ------- 15 | np.ndarray 16 | (num_windows, image_height, window_width, 1) output, where 17 | num_windows is floor((image_width - window_width) / window_stride) + 1 18 | """ 19 | kernel = [1, 1, window_width, 1] 20 | strides = [1, 1, window_stride, 1] 21 | patches = tf.image.extract_patches(image, kernel, strides, [1, 1, 1, 1], "VALID") 22 | patches = tf.transpose(patches, (0, 2, 1, 3)) 23 | patches = tf.expand_dims(patches, -1) 24 | return patches 25 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/create_emnist_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | import text_recognizer.util as util 9 | 10 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist" 11 | 12 | 13 | def create_emnist_support_files(): 14 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 15 | SUPPORT_DIRNAME.mkdir() 16 | 17 | dataset = EmnistDataset() 18 | dataset.load_or_generate_data() 19 | 20 | for ind in [5, 7, 9]: 21 | image = dataset.x_test[ind] 22 | label = dataset.mapping[np.argmax(dataset.y_test[ind])] 23 | print(ind, label) 24 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 25 | 26 | 27 | if __name__ == "__main__": 28 | create_emnist_support_files() 29 | -------------------------------------------------------------------------------- /lab1/training/util.py: -------------------------------------------------------------------------------- 1 | """Function to train a model.""" 2 | from time import time 3 | 4 | from tensorflow.keras.callbacks import EarlyStopping, Callback 5 | 6 | 7 | from text_recognizer.datasets.dataset import Dataset 8 | from text_recognizer.models.base import Model 9 | 10 | EARLY_STOPPING = True 11 | 12 | 13 | 14 | 15 | def train_model(model: Model, dataset: Dataset, epochs: int, batch_size: int, use_wandb: bool = False) -> Model: 16 | """Train model.""" 17 | callbacks = [] 18 | 19 | if EARLY_STOPPING: 20 | early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.01, patience=3, verbose=1, mode="auto") 21 | callbacks.append(early_stopping) 22 | 23 | 24 | model.network.summary() 25 | 26 | t = time() 27 | _history = model.fit(dataset=dataset, batch_size=batch_size, epochs=epochs, callbacks=callbacks) 28 | print("Training took {:2f} s".format(time() - t)) 29 | 30 | return model 31 | -------------------------------------------------------------------------------- /lab2/training/util.py: -------------------------------------------------------------------------------- 1 | """Function to train a model.""" 2 | from time import time 3 | 4 | from tensorflow.keras.callbacks import EarlyStopping, Callback 5 | 6 | 7 | from text_recognizer.datasets.dataset import Dataset 8 | from text_recognizer.models.base import Model 9 | 10 | EARLY_STOPPING = True 11 | 12 | 13 | 14 | 15 | def train_model(model: Model, dataset: Dataset, epochs: int, batch_size: int, use_wandb: bool = False) -> Model: 16 | """Train model.""" 17 | callbacks = [] 18 | 19 | if EARLY_STOPPING: 20 | early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.01, patience=3, verbose=1, mode="auto") 21 | callbacks.append(early_stopping) 22 | 23 | 24 | model.network.summary() 25 | 26 | t = time() 27 | _history = model.fit(dataset=dataset, batch_size=batch_size, epochs=epochs, callbacks=callbacks) 28 | print("Training took {:2f} s".format(time() - t)) 29 | 30 | return model 31 | -------------------------------------------------------------------------------- /lab7/evaluation/evaluate_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Run validation test for CharacterPredictor.""" 2 | import os 3 | from pathlib import Path 4 | from time import time 5 | import unittest 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | from text_recognizer.character_predictor import CharacterPredictor 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 13 | 14 | 15 | class TestEvaluateCharacterPredictor(unittest.TestCase): 16 | def test_evaluate(self): 17 | predictor = CharacterPredictor() 18 | dataset = EmnistDataset() 19 | dataset.load_or_generate_data() 20 | t = time() 21 | metric = predictor.evaluate(dataset) 22 | time_taken = time() - t 23 | print(f"acc: {metric}, time_taken: {time_taken}") 24 | self.assertGreater(metric, 0.6) 25 | self.assertLess(time_taken, 10) 26 | -------------------------------------------------------------------------------- /lab8/evaluation/evaluate_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Run validation test for CharacterPredictor.""" 2 | import os 3 | from pathlib import Path 4 | from time import time 5 | import unittest 6 | 7 | from text_recognizer.datasets import EmnistDataset 8 | from text_recognizer.character_predictor import CharacterPredictor 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 13 | 14 | 15 | class TestEvaluateCharacterPredictor(unittest.TestCase): 16 | def test_evaluate(self): 17 | predictor = CharacterPredictor() 18 | dataset = EmnistDataset() 19 | dataset.load_or_generate_data() 20 | t = time() 21 | metric = predictor.evaluate(dataset) 22 | time_taken = time() - t 23 | print(f"acc: {metric}, time_taken: {time_taken}") 24 | self.assertGreater(metric, 0.6) 25 | self.assertLess(time_taken, 10) 26 | -------------------------------------------------------------------------------- /lab1/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab2/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab3/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab4/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab5/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab6/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab7/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab8/text_recognizer/character_predictor.py: -------------------------------------------------------------------------------- 1 | """CharacterPredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import CharacterModel 7 | import text_recognizer.util as util 8 | 9 | 10 | class CharacterPredictor: 11 | """Given an image of a single handwritten character, recognizes it.""" 12 | 13 | def __init__(self): 14 | self.model = CharacterModel() 15 | self.model.load_weights() 16 | 17 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 18 | """Predict on a single image.""" 19 | if isinstance(image_or_filename, str): 20 | image = util.read_image(image_or_filename, grayscale=True) 21 | else: 22 | image = image_or_filename 23 | return self.model.predict_on_image(image) 24 | 25 | def evaluate(self, dataset): 26 | """Evaluate on a dataset.""" 27 | return self.model.evaluate(dataset.x_test, dataset.y_test) 28 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/test_character_predictor.py: -------------------------------------------------------------------------------- 1 | """Tests for CharacterPredictor class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | 6 | from text_recognizer.character_predictor import CharacterPredictor 7 | 8 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist" 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 11 | 12 | 13 | class TestCharacterPredictor(unittest.TestCase): 14 | """Tests for the CharacterPredictor class.""" 15 | 16 | def test_filename(self): 17 | """Test that CharacterPredictor correctly predicts on a single image, for serveral test images.""" 18 | predictor = CharacterPredictor() 19 | 20 | for filename in SUPPORT_DIRNAME.glob("*.png"): 21 | pred, conf = predictor.predict(str(filename)) 22 | print(f"Prediction: {pred} at confidence: {conf} for image with character {filename.stem}") 23 | self.assertEqual(pred, filename.stem) 24 | self.assertGreater(conf, 0.7) 25 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/create_iam_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating IAM Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import IamLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" 12 | 13 | 14 | def create_iam_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = IamLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 24 | " _" 25 | ) 26 | print(label) 27 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 28 | 29 | 30 | if __name__ == "__main__": 31 | create_iam_lines_support_files() 32 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/create_iam_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating IAM Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import IamLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" 12 | 13 | 14 | def create_iam_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = IamLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 24 | " _" 25 | ) 26 | print(label) 27 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 28 | 29 | 30 | if __name__ == "__main__": 31 | create_iam_lines_support_files() 32 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/create_iam_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating IAM Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import IamLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" 12 | 13 | 14 | def create_iam_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = IamLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 24 | " _" 25 | ) 26 | print(label) 27 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 28 | 29 | 30 | if __name__ == "__main__": 31 | create_iam_lines_support_files() 32 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/create_iam_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating IAM Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import IamLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" 12 | 13 | 14 | def create_iam_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = IamLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 24 | " _" 25 | ) 26 | print(label) 27 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 28 | 29 | 30 | if __name__ == "__main__": 31 | create_iam_lines_support_files() 32 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/create_iam_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating IAM Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import IamLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "iam_lines" 12 | 13 | 14 | def create_iam_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = IamLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 24 | " _" 25 | ) 26 | print(label) 27 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 28 | 29 | 30 | if __name__ == "__main__": 31 | create_iam_lines_support_files() 32 | -------------------------------------------------------------------------------- /lab4/training/experiments/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment_group": "Sample Experiments", 3 | "experiments": [ 4 | { 5 | "dataset": "EmnistDataset", 6 | "model": "CharacterModel", 7 | "network": "mlp", 8 | "network_args": { 9 | "num_layers": 2 10 | }, 11 | "train_args": { 12 | "batch_size": 256 13 | } 14 | }, 15 | { 16 | "dataset": "EmnistDataset", 17 | "model": "CharacterModel", 18 | "network": "mlp", 19 | "network_args": { 20 | "num_layers": 4 21 | }, 22 | "train_args": { 23 | "batch_size": 256 24 | } 25 | }, 26 | { 27 | "dataset": "EmnistDataset", 28 | "model": "CharacterModel", 29 | "network": "lenet", 30 | "train_args": { 31 | "batch_size": 128 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /lab5/training/experiments/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment_group": "Sample Experiments", 3 | "experiments": [ 4 | { 5 | "dataset": "EmnistDataset", 6 | "model": "CharacterModel", 7 | "network": "mlp", 8 | "network_args": { 9 | "num_layers": 2 10 | }, 11 | "train_args": { 12 | "batch_size": 256 13 | } 14 | }, 15 | { 16 | "dataset": "EmnistDataset", 17 | "model": "CharacterModel", 18 | "network": "mlp", 19 | "network_args": { 20 | "num_layers": 4 21 | }, 22 | "train_args": { 23 | "batch_size": 256 24 | } 25 | }, 26 | { 27 | "dataset": "EmnistDataset", 28 | "model": "CharacterModel", 29 | "network": "lenet", 30 | "train_args": { 31 | "batch_size": 128 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /lab6/training/experiments/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment_group": "Sample Experiments", 3 | "experiments": [ 4 | { 5 | "dataset": "EmnistDataset", 6 | "model": "CharacterModel", 7 | "network": "mlp", 8 | "network_args": { 9 | "num_layers": 2 10 | }, 11 | "train_args": { 12 | "batch_size": 256 13 | } 14 | }, 15 | { 16 | "dataset": "EmnistDataset", 17 | "model": "CharacterModel", 18 | "network": "mlp", 19 | "network_args": { 20 | "num_layers": 4 21 | }, 22 | "train_args": { 23 | "batch_size": 256 24 | } 25 | }, 26 | { 27 | "dataset": "EmnistDataset", 28 | "model": "CharacterModel", 29 | "network": "lenet", 30 | "train_args": { 31 | "batch_size": 128 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /lab7/training/experiments/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment_group": "Sample Experiments", 3 | "experiments": [ 4 | { 5 | "dataset": "EmnistDataset", 6 | "model": "CharacterModel", 7 | "network": "mlp", 8 | "network_args": { 9 | "num_layers": 2 10 | }, 11 | "train_args": { 12 | "batch_size": 256 13 | } 14 | }, 15 | { 16 | "dataset": "EmnistDataset", 17 | "model": "CharacterModel", 18 | "network": "mlp", 19 | "network_args": { 20 | "num_layers": 4 21 | }, 22 | "train_args": { 23 | "batch_size": 256 24 | } 25 | }, 26 | { 27 | "dataset": "EmnistDataset", 28 | "model": "CharacterModel", 29 | "network": "lenet", 30 | "train_args": { 31 | "batch_size": 128 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /lab8/training/experiments/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment_group": "Sample Experiments", 3 | "experiments": [ 4 | { 5 | "dataset": "EmnistDataset", 6 | "model": "CharacterModel", 7 | "network": "mlp", 8 | "network_args": { 9 | "num_layers": 2 10 | }, 11 | "train_args": { 12 | "batch_size": 256 13 | } 14 | }, 15 | { 16 | "dataset": "EmnistDataset", 17 | "model": "CharacterModel", 18 | "network": "mlp", 19 | "network_args": { 20 | "num_layers": 4 21 | }, 22 | "train_args": { 23 | "batch_size": 256 24 | } 25 | }, 26 | { 27 | "dataset": "EmnistDataset", 28 | "model": "CharacterModel", 29 | "network": "lenet", 30 | "train_args": { 31 | "batch_size": 128 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /lab1/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab2/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab3/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab4/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | 30 | 31 | if __name__ == "__main__": 32 | create_emnist_lines_support_files() 33 | -------------------------------------------------------------------------------- /lab2/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab3/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab4/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab5/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab6/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab7/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab8/text_recognizer/line_predictor.py: -------------------------------------------------------------------------------- 1 | """LinePredictor class""" 2 | from typing import Tuple, Union 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models import LineModelCtc 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | class LinePredictor: 12 | """Given an image of a line of handwritten text, recognizes text contents.""" 13 | 14 | def __init__(self, dataset_cls=EmnistLinesDataset): 15 | self.model = LineModelCtc(dataset_cls=dataset_cls) 16 | self.model.load_weights() 17 | 18 | def predict(self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: 19 | """Predict on a single image.""" 20 | if isinstance(image_or_filename, str): 21 | image = util.read_image(image_or_filename, grayscale=True) 22 | else: 23 | image = image_or_filename 24 | return self.model.predict_on_image(image) 25 | 26 | def evaluate(self, dataset): 27 | """Evaluate on a dataset.""" 28 | return self.model.evaluate(dataset.x_test, dataset.y_test) 29 | -------------------------------------------------------------------------------- /lab5/text_recognizer/tests/test_paragraph_text_recognizer.py: -------------------------------------------------------------------------------- 1 | """Tests for ParagraphTextRecognizer class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | from text_recognizer.paragraph_text_recognizer import ParagraphTextRecognizer 6 | import text_recognizer.util as util 7 | 8 | 9 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_paragraphs" 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | 14 | class TestParagraphTextRecognizer(unittest.TestCase): 15 | """Test that it can take non-square images of max dimension larger than 256px.""" 16 | 17 | def test_filename(self): # pylint: disable=R0201 18 | predictor = ParagraphTextRecognizer() 19 | num_text_lines_by_name = {"a01-000u-cropped": 7} 20 | for filename in (SUPPORT_DIRNAME).glob("*.jpg"): 21 | full_image = util.read_image(str(filename), grayscale=True) 22 | predicted_text, line_region_crops = predictor.predict(full_image) 23 | print(predicted_text) 24 | assert len(line_region_crops) == num_text_lines_by_name[filename.stem] 25 | -------------------------------------------------------------------------------- /lab6/text_recognizer/tests/test_paragraph_text_recognizer.py: -------------------------------------------------------------------------------- 1 | """Tests for ParagraphTextRecognizer class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | from text_recognizer.paragraph_text_recognizer import ParagraphTextRecognizer 6 | import text_recognizer.util as util 7 | 8 | 9 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_paragraphs" 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | 14 | class TestParagraphTextRecognizer(unittest.TestCase): 15 | """Test that it can take non-square images of max dimension larger than 256px.""" 16 | 17 | def test_filename(self): # pylint: disable=R0201 18 | predictor = ParagraphTextRecognizer() 19 | num_text_lines_by_name = {"a01-000u-cropped": 7} 20 | for filename in (SUPPORT_DIRNAME).glob("*.jpg"): 21 | full_image = util.read_image(str(filename), grayscale=True) 22 | predicted_text, line_region_crops = predictor.predict(full_image) 23 | print(predicted_text) 24 | assert len(line_region_crops) == num_text_lines_by_name[filename.stem] 25 | -------------------------------------------------------------------------------- /lab7/text_recognizer/tests/test_paragraph_text_recognizer.py: -------------------------------------------------------------------------------- 1 | """Tests for ParagraphTextRecognizer class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | from text_recognizer.paragraph_text_recognizer import ParagraphTextRecognizer 6 | import text_recognizer.util as util 7 | 8 | 9 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_paragraphs" 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | 14 | class TestParagraphTextRecognizer(unittest.TestCase): 15 | """Test that it can take non-square images of max dimension larger than 256px.""" 16 | 17 | def test_filename(self): # pylint: disable=R0201 18 | predictor = ParagraphTextRecognizer() 19 | num_text_lines_by_name = {"a01-000u-cropped": 7} 20 | for filename in (SUPPORT_DIRNAME).glob("*.jpg"): 21 | full_image = util.read_image(str(filename), grayscale=True) 22 | predicted_text, line_region_crops = predictor.predict(full_image) 23 | print(predicted_text) 24 | assert len(line_region_crops) == num_text_lines_by_name[filename.stem] 25 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/test_paragraph_text_recognizer.py: -------------------------------------------------------------------------------- 1 | """Tests for ParagraphTextRecognizer class.""" 2 | import os 3 | from pathlib import Path 4 | import unittest 5 | from text_recognizer.paragraph_text_recognizer import ParagraphTextRecognizer 6 | import text_recognizer.util as util 7 | 8 | 9 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_paragraphs" 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | 14 | class TestParagraphTextRecognizer(unittest.TestCase): 15 | """Test that it can take non-square images of max dimension larger than 256px.""" 16 | 17 | def test_filename(self): # pylint: disable=R0201 18 | predictor = ParagraphTextRecognizer() 19 | num_text_lines_by_name = {"a01-000u-cropped": 7} 20 | for filename in (SUPPORT_DIRNAME).glob("*.jpg"): 21 | full_image = util.read_image(str(filename), grayscale=True) 22 | predicted_text, line_region_crops = predictor.predict(full_image) 23 | print(predicted_text) 24 | assert len(line_region_crops) == num_text_lines_by_name[filename.stem] 25 | -------------------------------------------------------------------------------- /lab4/training/prepare_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple way to run experiments defined in a file.""" 3 | import argparse 4 | import json 5 | 6 | 7 | def run_experiments(experiments_filename): 8 | """Run experiments from file.""" 9 | with open(experiments_filename) as f: 10 | experiments_config = json.load(f) 11 | num_experiments = len(experiments_config["experiments"]) 12 | for ind in range(num_experiments): 13 | experiment_config = experiments_config["experiments"][ind] 14 | experiment_config["experiment_group"] = experiments_config["experiment_group"] 15 | print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'") 16 | 17 | 18 | def main(): 19 | """Parse command-line arguments and run experiments from provided file.""" 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.") 22 | args = parser.parse_args() 23 | run_experiments(args.experiments_filename) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /lab5/training/prepare_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple way to run experiments defined in a file.""" 3 | import argparse 4 | import json 5 | 6 | 7 | def run_experiments(experiments_filename): 8 | """Run experiments from file.""" 9 | with open(experiments_filename) as f: 10 | experiments_config = json.load(f) 11 | num_experiments = len(experiments_config["experiments"]) 12 | for ind in range(num_experiments): 13 | experiment_config = experiments_config["experiments"][ind] 14 | experiment_config["experiment_group"] = experiments_config["experiment_group"] 15 | print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'") 16 | 17 | 18 | def main(): 19 | """Parse command-line arguments and run experiments from provided file.""" 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.") 22 | args = parser.parse_args() 23 | run_experiments(args.experiments_filename) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /lab6/training/prepare_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple way to run experiments defined in a file.""" 3 | import argparse 4 | import json 5 | 6 | 7 | def run_experiments(experiments_filename): 8 | """Run experiments from file.""" 9 | with open(experiments_filename) as f: 10 | experiments_config = json.load(f) 11 | num_experiments = len(experiments_config["experiments"]) 12 | for ind in range(num_experiments): 13 | experiment_config = experiments_config["experiments"][ind] 14 | experiment_config["experiment_group"] = experiments_config["experiment_group"] 15 | print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'") 16 | 17 | 18 | def main(): 19 | """Parse command-line arguments and run experiments from provided file.""" 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.") 22 | args = parser.parse_args() 23 | run_experiments(args.experiments_filename) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /lab7/training/prepare_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple way to run experiments defined in a file.""" 3 | import argparse 4 | import json 5 | 6 | 7 | def run_experiments(experiments_filename): 8 | """Run experiments from file.""" 9 | with open(experiments_filename) as f: 10 | experiments_config = json.load(f) 11 | num_experiments = len(experiments_config["experiments"]) 12 | for ind in range(num_experiments): 13 | experiment_config = experiments_config["experiments"][ind] 14 | experiment_config["experiment_group"] = experiments_config["experiment_group"] 15 | print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'") 16 | 17 | 18 | def main(): 19 | """Parse command-line arguments and run experiments from provided file.""" 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.") 22 | args = parser.parse_args() 23 | run_experiments(args.experiments_filename) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /lab8/training/prepare_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple way to run experiments defined in a file.""" 3 | import argparse 4 | import json 5 | 6 | 7 | def run_experiments(experiments_filename): 8 | """Run experiments from file.""" 9 | with open(experiments_filename) as f: 10 | experiments_config = json.load(f) 11 | num_experiments = len(experiments_config["experiments"]) 12 | for ind in range(num_experiments): 13 | experiment_config = experiments_config["experiments"][ind] 14 | experiment_config["experiment_group"] = experiments_config["experiment_group"] 15 | print(f"python training/run_experiment.py --gpu=-1 '{json.dumps(experiment_config)}'") 16 | 17 | 18 | def main(): 19 | """Parse command-line arguments and run experiments from provided file.""" 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("experiments_filename", type=str, help="Filename of JSON file of experiments to run.") 22 | args = parser.parse_args() 23 | run_experiments(args.experiments_filename) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /lab1/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab2/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab3/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab4/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab5/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab6/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab7/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab8/text_recognizer/networks/mlp.py: -------------------------------------------------------------------------------- 1 | """Define mlp network function.""" 2 | from typing import Tuple 3 | 4 | from tensorflow.keras.models import Model, Sequential 5 | from tensorflow.keras.layers import Dense, Dropout, Flatten 6 | 7 | 8 | def mlp( 9 | input_shape: Tuple[int, ...], 10 | output_shape: Tuple[int, ...], 11 | layer_size: int = 128, 12 | dropout_amount: float = 0.2, 13 | num_layers: int = 3, 14 | ) -> Model: 15 | """ 16 | Create a simple multi-layer perceptron: fully-connected layers with dropout between them, with softmax predictions. 17 | Creates num_layers layers. 18 | """ 19 | num_classes = output_shape[0] 20 | 21 | model = Sequential() 22 | # Don't forget to pass input_shape to the first layer of the model 23 | # Your code below (Lab 1) 24 | model.add(Flatten(input_shape=input_shape)) 25 | for _ in range(num_layers): 26 | model.add(Dense(layer_size, activation="relu")) 27 | model.add(Dropout(dropout_amount)) 28 | model.add(Dense(num_classes, activation="softmax")) 29 | # Your code above (Lab 1) 30 | 31 | return model 32 | -------------------------------------------------------------------------------- /lab8/api/tests/test_app.py: -------------------------------------------------------------------------------- 1 | """Tests for web app.""" 2 | import os 3 | from pathlib import Path 4 | from unittest import TestCase 5 | import base64 6 | 7 | from api.app import app 8 | 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 10 | 11 | REPO_DIRNAME = Path(__file__).parents[2].resolve() 12 | # SUPPORT_DIRNAME = REPO_DIRNAME / 'text_recognizer' / 'tests' / 'support' / 'iam_lines' 13 | SUPPORT_DIRNAME = REPO_DIRNAME / "text_recognizer" / "tests" / "support" / "emnist_lines" 14 | 15 | 16 | class TestIntegrations(TestCase): 17 | def setUp(self): 18 | self.app = app.test_client() 19 | 20 | def test_index(self): 21 | response = self.app.get("/") 22 | assert response.get_data().decode() == "Hello, world!" 23 | 24 | def test_predict(self): 25 | with open(SUPPORT_DIRNAME / "or if used the results.png", "rb") as f: 26 | b64_image = base64.b64encode(f.read()) 27 | response = self.app.post("/v1/predict", json={"image": f"data:image/jpeg;base64,{b64_image.decode()}"}) 28 | json_data = response.get_json() 29 | self.assertEqual(json_data["pred"], "or if used the resuits") 30 | -------------------------------------------------------------------------------- /lab6/training/update_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Update metadata.toml with SHA-256 hash of the current file.""" 3 | from pathlib import Path 4 | import argparse 5 | 6 | import toml 7 | 8 | from text_recognizer import util 9 | 10 | 11 | def _get_metadata_filename(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("filename", type=str, help="Path to the metadata.toml file to update.") 14 | args = parser.parse_args() 15 | return Path(args.filename).resolve() 16 | 17 | 18 | def main(): 19 | metadata_filename = _get_metadata_filename() 20 | 21 | metadata = toml.load(metadata_filename) 22 | 23 | data_filename = metadata_filename.parents[0] / metadata["filename"] 24 | supposed_data_sha256 = metadata["sha256"] 25 | actual_data_sha256 = util.compute_sha256(data_filename) 26 | 27 | if supposed_data_sha256 == actual_data_sha256: 28 | print("Nothing to update: SHA-256 matches") 29 | return 30 | 31 | print("Updating metadata SHA-256") 32 | metadata["sha256"] = actual_data_sha256 33 | with open(metadata_filename, "w") as f: 34 | toml.dump(metadata, f) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /lab7/training/update_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Update metadata.toml with SHA-256 hash of the current file.""" 3 | from pathlib import Path 4 | import argparse 5 | 6 | import toml 7 | 8 | from text_recognizer import util 9 | 10 | 11 | def _get_metadata_filename(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("filename", type=str, help="Path to the metadata.toml file to update.") 14 | args = parser.parse_args() 15 | return Path(args.filename).resolve() 16 | 17 | 18 | def main(): 19 | metadata_filename = _get_metadata_filename() 20 | 21 | metadata = toml.load(metadata_filename) 22 | 23 | data_filename = metadata_filename.parents[0] / metadata["filename"] 24 | supposed_data_sha256 = metadata["sha256"] 25 | actual_data_sha256 = util.compute_sha256(data_filename) 26 | 27 | if supposed_data_sha256 == actual_data_sha256: 28 | print("Nothing to update: SHA-256 matches") 29 | return 30 | 31 | print("Updating metadata SHA-256") 32 | metadata["sha256"] = actual_data_sha256 33 | with open(metadata_filename, "w") as f: 34 | toml.dump(metadata, f) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /lab8/training/update_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Update metadata.toml with SHA-256 hash of the current file.""" 3 | from pathlib import Path 4 | import argparse 5 | 6 | import toml 7 | 8 | from text_recognizer import util 9 | 10 | 11 | def _get_metadata_filename(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("filename", type=str, help="Path to the metadata.toml file to update.") 14 | args = parser.parse_args() 15 | return Path(args.filename).resolve() 16 | 17 | 18 | def main(): 19 | metadata_filename = _get_metadata_filename() 20 | 21 | metadata = toml.load(metadata_filename) 22 | 23 | data_filename = metadata_filename.parents[0] / metadata["filename"] 24 | supposed_data_sha256 = metadata["sha256"] 25 | actual_data_sha256 = util.compute_sha256(data_filename) 26 | 27 | if supposed_data_sha256 == actual_data_sha256: 28 | print("Nothing to update: SHA-256 matches") 29 | return 30 | 31 | print("Updating metadata SHA-256") 32 | metadata["sha256"] = actual_data_sha256 33 | with open(metadata_filename, "w") as f: 34 | toml.dump(metadata, f) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /lab3/readme.md: -------------------------------------------------------------------------------- 1 | # Lab 3: Using a sequence model for line text recognition 2 | 3 | ## Goal of the lab 4 | 5 | Use sequence modeling to be able to handle overlapping characters (input sequence no longer maps neatly onto output sequence). 6 | 7 | ## Outline 8 | 9 | - Overview of the model, network, and loss 10 | - Train an LSTM on EMNIST 11 | 12 | ## Follow along 13 | 14 | ``` 15 | git pull 16 | cd lab3 17 | ``` 18 | 19 | ## Overview of model and loss 20 | 21 | - Look at slides for CTC loss 22 | - Look at `networks/line_lstm_ctc.py` 23 | - Look at `models/line_model_ctc.py` 24 | 25 | ## Train LSTM model with CTC loss 26 | 27 | Let's train an LSTM model with CTC loss. 28 | 29 | ```sh 30 | python training/run_experiment.py --save '{"train_args": {"epochs": 16}, "dataset": "EmnistLinesDataset", "model": "LineModelCtc", "network": "line_lstm_ctc"}' 31 | ``` 32 | 33 | or the shortcut `tasks/train_lstm_line_predictor.sh` 34 | 35 | ## Things to try 36 | 37 | If you have time left over, or want to play around with this later on, you can try writing your own non-CTC `line_lstm` network (define it in `text_recognizer/networks/line_lstm.py`). 38 | For example, you could code up an encoder-decoder architecture with attention. 39 | -------------------------------------------------------------------------------- /lab1/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab2/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab3/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab4/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab5/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab6/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab7/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab8/text_recognizer/networks/lenet.py: -------------------------------------------------------------------------------- 1 | """LeNet network.""" 2 | from typing import Tuple 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D 6 | from tensorflow.keras.models import Sequential, Model 7 | 8 | 9 | def lenet(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> Model: 10 | """Return LeNet Keras model.""" 11 | num_classes = output_shape[0] 12 | 13 | # Your code below (Lab 2) 14 | model = Sequential() 15 | if len(input_shape) < 3: 16 | model.add(Lambda(lambda x: tf.expand_dims(x, -1), input_shape=input_shape, name='expand_dims')) 17 | input_shape = (input_shape[0], input_shape[1], 1) 18 | model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape, padding="valid")) 19 | model.add(Conv2D(64, (3, 3), activation="relu", padding="valid")) 20 | model.add(MaxPooling2D(pool_size=(2, 2), padding="valid")) 21 | model.add(Dropout(0.2)) 22 | model.add(Flatten()) 23 | model.add(Dense(128, activation="relu")) 24 | model.add(Dropout(0.2)) 25 | model.add(Dense(num_classes, activation="softmax")) 26 | # Your code above (Lab 2) 27 | 28 | return model 29 | -------------------------------------------------------------------------------- /lab1/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab2/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab3/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab4/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab5/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab6/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab7/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /lab8/text_recognizer/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class to be extended by dataset-specific classes.""" 2 | from pathlib import Path 3 | import argparse 4 | import os 5 | 6 | from text_recognizer import util 7 | 8 | 9 | class Dataset: 10 | """Simple abstract class for datasets.""" 11 | 12 | @classmethod 13 | def data_dirname(cls): 14 | return Path(__file__).resolve().parents[3] / "data" 15 | 16 | def load_or_generate_data(self): 17 | pass 18 | 19 | 20 | def _download_raw_dataset(metadata): 21 | if os.path.exists(metadata["filename"]): 22 | return 23 | print(f"Downloading raw dataset from {metadata['url']}...") 24 | util.download_url(metadata["url"], metadata["filename"]) 25 | print("Computing SHA-256...") 26 | sha256 = util.compute_sha256(metadata["filename"]) 27 | if sha256 != metadata["sha256"]: 28 | raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.") 29 | 30 | 31 | def _parse_args(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument( 34 | "--subsample_fraction", type=float, default=None, help="If given, is used as the fraction of data to expose.", 35 | ) 36 | return parser.parse_args() 37 | -------------------------------------------------------------------------------- /data/raw/iam/readme.md: -------------------------------------------------------------------------------- 1 | # IAM Dataset 2 | 3 | The IAM Handwriting Database contains forms of handwritten English text which can be used to train and test handwritten text recognizers and to perform writer identification and verification experiments. 4 | 5 | - 657 writers contributed samples of their handwriting 6 | - 1,539 pages of scanned text 7 | - 13,353 isolated and labeled text lines 8 | 9 | - http://www.fki.inf.unibe.ch/databases/iam-handwriting-database 10 | 11 | ## Pre-processing 12 | 13 | First, all forms were placed into one directory called `forms`, from original directories like `formsA-D`. 14 | 15 | To save space, I converted the original PNG files to JPG, and resized them to half-size 16 | ``` 17 | mkdir forms-resized 18 | cd forms 19 | ls -1 *.png | parallel --eta -j 6 convert '{}' -adaptive-resize 50% '../forms-resized/{.}.jpg' 20 | ``` 21 | 22 | ## Split 23 | 24 | The data split we will use is 25 | IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines. 26 | 27 | - The validation set has been merged into the train set. 28 | - The train set has 7,101 lines from 326 writers. 29 | - The test set has 1,861 lines from 128 writers. 30 | - The text lines of all data sets are mutually exclusive, thus each writer has contributed to one set only. 31 | -------------------------------------------------------------------------------- /lab8/text_recognizer/tests/support/create_emnist_lines_support_files.py: -------------------------------------------------------------------------------- 1 | """Module for creating EMNIST Lines test support files.""" 2 | from pathlib import Path 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | import text_recognizer.util as util 9 | 10 | 11 | SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "emnist_lines" 12 | 13 | 14 | def create_emnist_lines_support_files(): 15 | shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True) 16 | SUPPORT_DIRNAME.mkdir() 17 | 18 | dataset = EmnistLinesDataset() 19 | dataset.load_or_generate_data() 20 | 21 | for ind in [0, 1, 3]: 22 | image = dataset.x_test[ind] 23 | print(image.sum(), image.dtype) 24 | label = "".join(dataset.mapping[label] for label in np.argmax(dataset.y_test[ind], axis=-1).flatten()).strip( 25 | " _" 26 | ) 27 | print(label) 28 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}.png")) 29 | # Hide lines below until Lab 8 30 | # Inverted version 31 | image = -(image - 255) 32 | util.write_image(image, str(SUPPORT_DIRNAME / f"{label}_i.png")) 33 | # Hide lines above until Lab 8 34 | 35 | 36 | if __name__ == "__main__": 37 | create_emnist_lines_support_files() 38 | -------------------------------------------------------------------------------- /lab1/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab2/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab3/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab4/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab5/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab6/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab7/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab8/text_recognizer/models/character_model.py: -------------------------------------------------------------------------------- 1 | """CharacterModel class.""" 2 | from typing import Callable, Dict, Tuple 3 | 4 | import numpy as np 5 | 6 | from text_recognizer.models.base import Model 7 | from text_recognizer.datasets.emnist_dataset import EmnistDataset 8 | from text_recognizer.networks.mlp import mlp 9 | 10 | 11 | class CharacterModel(Model): 12 | """CharacterModel works on datasets providing images, with one-hot labels.""" 13 | 14 | def __init__( 15 | self, 16 | dataset_cls: type = EmnistDataset, 17 | network_fn: Callable = mlp, 18 | dataset_args: Dict = None, 19 | network_args: Dict = None, 20 | ): 21 | super().__init__(dataset_cls, network_fn, dataset_args, network_args) 22 | 23 | def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: 24 | if image.dtype == np.uint8: 25 | image = (image / 255).astype(np.float32) 26 | # NOTE: integer to character mapping dictionary is self.data.mapping[integer] 27 | # Your code below (Lab 1) 28 | pred_raw = self.network.predict(np.expand_dims(image, 0), batch_size=1).flatten() 29 | ind = np.argmax(pred_raw) 30 | confidence_of_prediction = pred_raw[ind] 31 | predicted_character = self.data.mapping[ind] 32 | # Your code above (Lab 1) 33 | return predicted_character, confidence_of_prediction 34 | -------------------------------------------------------------------------------- /lab7/evaluation/evaluate_line_predictor.py: -------------------------------------------------------------------------------- 1 | """Run validation test for LinePredictor.""" 2 | import os 3 | from pathlib import Path 4 | from time import time 5 | import unittest 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | from text_recognizer.datasets import IamLinesDataset 9 | from text_recognizer.line_predictor import LinePredictor 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | EMNIST_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist_lines" 14 | IAM_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_lines" 15 | 16 | 17 | class TestEvaluateLinePredictorEmnist(unittest.TestCase): 18 | def test_evaluate(self): 19 | predictor = LinePredictor(EmnistLinesDataset) 20 | dataset = EmnistLinesDataset() 21 | 22 | dataset.load_or_generate_data() 23 | 24 | t = time() 25 | metric = predictor.evaluate(dataset) 26 | time_taken = time() - t 27 | 28 | print(f"acc: {metric}, time_taken: {time_taken}") 29 | self.assertGreater(metric, 0.6) 30 | self.assertLess(time_taken, 120) 31 | 32 | 33 | class TestEvaluateLinePredictorIam(unittest.TestCase): 34 | def test_evaluate(self): 35 | predictor = LinePredictor(IamLinesDataset) 36 | dataset = IamLinesDataset() 37 | 38 | dataset.load_or_generate_data() 39 | 40 | t = time() 41 | metric = predictor.evaluate(dataset) 42 | time_taken = time() - t 43 | 44 | print(f"acc: {metric}, time_taken: {time_taken}") 45 | self.assertGreater(metric, 0.6) 46 | self.assertLess(time_taken, 180) 47 | -------------------------------------------------------------------------------- /lab8/evaluation/evaluate_line_predictor.py: -------------------------------------------------------------------------------- 1 | """Run validation test for LinePredictor.""" 2 | import os 3 | from pathlib import Path 4 | from time import time 5 | import unittest 6 | 7 | from text_recognizer.datasets import EmnistLinesDataset 8 | from text_recognizer.datasets import IamLinesDataset 9 | from text_recognizer.line_predictor import LinePredictor 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 12 | 13 | EMNIST_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "emnist_lines" 14 | IAM_SUPPORT_DIRNAME = Path(__file__).parents[0].resolve() / "support" / "iam_lines" 15 | 16 | 17 | class TestEvaluateLinePredictorEmnist(unittest.TestCase): 18 | def test_evaluate(self): 19 | predictor = LinePredictor(EmnistLinesDataset) 20 | dataset = EmnistLinesDataset() 21 | 22 | dataset.load_or_generate_data() 23 | 24 | t = time() 25 | metric = predictor.evaluate(dataset) 26 | time_taken = time() - t 27 | 28 | print(f"acc: {metric}, time_taken: {time_taken}") 29 | self.assertGreater(metric, 0.6) 30 | self.assertLess(time_taken, 120) 31 | 32 | 33 | class TestEvaluateLinePredictorIam(unittest.TestCase): 34 | def test_evaluate(self): 35 | predictor = LinePredictor(IamLinesDataset) 36 | dataset = IamLinesDataset() 37 | 38 | dataset.load_or_generate_data() 39 | 40 | t = time() 41 | metric = predictor.evaluate(dataset) 42 | time_taken = time() - t 43 | 44 | print(f"acc: {metric}, time_taken: {time_taken}") 45 | self.assertGreater(metric, 0.6) 46 | self.assertLess(time_taken, 180) 47 | -------------------------------------------------------------------------------- /lab4/training/gpu_manager.py: -------------------------------------------------------------------------------- 1 | """GPUManager class.""" 2 | import os 3 | import time 4 | 5 | import gpustat 6 | import numpy as np 7 | from redlock import Redlock 8 | 9 | 10 | GPU_LOCK_TIMEOUT = 5000 # ms 11 | 12 | 13 | class GPUManager: 14 | """Class for allocating GPUs.""" 15 | 16 | def __init__(self, verbose: bool = False): 17 | self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}]) 18 | self.verbose = verbose 19 | 20 | def get_free_gpu(self): 21 | """ 22 | If some GPUs are available, try reserving one by checking out an exclusive redis lock. 23 | If none available or can't get lock, sleep and check again. 24 | """ 25 | while True: 26 | gpu_ind = self._get_free_gpu() 27 | if gpu_ind is not None: 28 | return gpu_ind 29 | if self.verbose: 30 | print(f"pid {os.getpid()} sleeping") 31 | time.sleep(GPU_LOCK_TIMEOUT / 1000) 32 | 33 | def _get_free_gpu(self): 34 | try: 35 | available_gpu_inds = [ 36 | gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total 37 | ] 38 | except Exception: # pylint: disable=broad-except 39 | return [0] # Return dummy GPU index if no CUDA GPUs are installed 40 | 41 | if available_gpu_inds: 42 | gpu_ind = np.random.choice(available_gpu_inds) 43 | if self.verbose: 44 | print(f"pid {os.getpid()} picking gpu {gpu_ind}") 45 | if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT): 46 | return int(gpu_ind) 47 | if self.verbose: 48 | print(f"pid {os.getpid()} couldnt get lock") 49 | return None 50 | -------------------------------------------------------------------------------- /lab5/training/gpu_manager.py: -------------------------------------------------------------------------------- 1 | """GPUManager class.""" 2 | import os 3 | import time 4 | 5 | import gpustat 6 | import numpy as np 7 | from redlock import Redlock 8 | 9 | 10 | GPU_LOCK_TIMEOUT = 5000 # ms 11 | 12 | 13 | class GPUManager: 14 | """Class for allocating GPUs.""" 15 | 16 | def __init__(self, verbose: bool = False): 17 | self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}]) 18 | self.verbose = verbose 19 | 20 | def get_free_gpu(self): 21 | """ 22 | If some GPUs are available, try reserving one by checking out an exclusive redis lock. 23 | If none available or can't get lock, sleep and check again. 24 | """ 25 | while True: 26 | gpu_ind = self._get_free_gpu() 27 | if gpu_ind is not None: 28 | return gpu_ind 29 | if self.verbose: 30 | print(f"pid {os.getpid()} sleeping") 31 | time.sleep(GPU_LOCK_TIMEOUT / 1000) 32 | 33 | def _get_free_gpu(self): 34 | try: 35 | available_gpu_inds = [ 36 | gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total 37 | ] 38 | except Exception: # pylint: disable=broad-except 39 | return [0] # Return dummy GPU index if no CUDA GPUs are installed 40 | 41 | if available_gpu_inds: 42 | gpu_ind = np.random.choice(available_gpu_inds) 43 | if self.verbose: 44 | print(f"pid {os.getpid()} picking gpu {gpu_ind}") 45 | if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT): 46 | return int(gpu_ind) 47 | if self.verbose: 48 | print(f"pid {os.getpid()} couldnt get lock") 49 | return None 50 | -------------------------------------------------------------------------------- /lab6/training/gpu_manager.py: -------------------------------------------------------------------------------- 1 | """GPUManager class.""" 2 | import os 3 | import time 4 | 5 | import gpustat 6 | import numpy as np 7 | from redlock import Redlock 8 | 9 | 10 | GPU_LOCK_TIMEOUT = 5000 # ms 11 | 12 | 13 | class GPUManager: 14 | """Class for allocating GPUs.""" 15 | 16 | def __init__(self, verbose: bool = False): 17 | self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}]) 18 | self.verbose = verbose 19 | 20 | def get_free_gpu(self): 21 | """ 22 | If some GPUs are available, try reserving one by checking out an exclusive redis lock. 23 | If none available or can't get lock, sleep and check again. 24 | """ 25 | while True: 26 | gpu_ind = self._get_free_gpu() 27 | if gpu_ind is not None: 28 | return gpu_ind 29 | if self.verbose: 30 | print(f"pid {os.getpid()} sleeping") 31 | time.sleep(GPU_LOCK_TIMEOUT / 1000) 32 | 33 | def _get_free_gpu(self): 34 | try: 35 | available_gpu_inds = [ 36 | gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total 37 | ] 38 | except Exception: # pylint: disable=broad-except 39 | return [0] # Return dummy GPU index if no CUDA GPUs are installed 40 | 41 | if available_gpu_inds: 42 | gpu_ind = np.random.choice(available_gpu_inds) 43 | if self.verbose: 44 | print(f"pid {os.getpid()} picking gpu {gpu_ind}") 45 | if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT): 46 | return int(gpu_ind) 47 | if self.verbose: 48 | print(f"pid {os.getpid()} couldnt get lock") 49 | return None 50 | -------------------------------------------------------------------------------- /lab7/training/gpu_manager.py: -------------------------------------------------------------------------------- 1 | """GPUManager class.""" 2 | import os 3 | import time 4 | 5 | import gpustat 6 | import numpy as np 7 | from redlock import Redlock 8 | 9 | 10 | GPU_LOCK_TIMEOUT = 5000 # ms 11 | 12 | 13 | class GPUManager: 14 | """Class for allocating GPUs.""" 15 | 16 | def __init__(self, verbose: bool = False): 17 | self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}]) 18 | self.verbose = verbose 19 | 20 | def get_free_gpu(self): 21 | """ 22 | If some GPUs are available, try reserving one by checking out an exclusive redis lock. 23 | If none available or can't get lock, sleep and check again. 24 | """ 25 | while True: 26 | gpu_ind = self._get_free_gpu() 27 | if gpu_ind is not None: 28 | return gpu_ind 29 | if self.verbose: 30 | print(f"pid {os.getpid()} sleeping") 31 | time.sleep(GPU_LOCK_TIMEOUT / 1000) 32 | 33 | def _get_free_gpu(self): 34 | try: 35 | available_gpu_inds = [ 36 | gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total 37 | ] 38 | except Exception: # pylint: disable=broad-except 39 | return [0] # Return dummy GPU index if no CUDA GPUs are installed 40 | 41 | if available_gpu_inds: 42 | gpu_ind = np.random.choice(available_gpu_inds) 43 | if self.verbose: 44 | print(f"pid {os.getpid()} picking gpu {gpu_ind}") 45 | if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT): 46 | return int(gpu_ind) 47 | if self.verbose: 48 | print(f"pid {os.getpid()} couldnt get lock") 49 | return None 50 | -------------------------------------------------------------------------------- /lab8/training/gpu_manager.py: -------------------------------------------------------------------------------- 1 | """GPUManager class.""" 2 | import os 3 | import time 4 | 5 | import gpustat 6 | import numpy as np 7 | from redlock import Redlock 8 | 9 | 10 | GPU_LOCK_TIMEOUT = 5000 # ms 11 | 12 | 13 | class GPUManager: 14 | """Class for allocating GPUs.""" 15 | 16 | def __init__(self, verbose: bool = False): 17 | self.lock_manager = Redlock([{"host": "localhost", "port": 6379, "db": 0}]) 18 | self.verbose = verbose 19 | 20 | def get_free_gpu(self): 21 | """ 22 | If some GPUs are available, try reserving one by checking out an exclusive redis lock. 23 | If none available or can't get lock, sleep and check again. 24 | """ 25 | while True: 26 | gpu_ind = self._get_free_gpu() 27 | if gpu_ind is not None: 28 | return gpu_ind 29 | if self.verbose: 30 | print(f"pid {os.getpid()} sleeping") 31 | time.sleep(GPU_LOCK_TIMEOUT / 1000) 32 | 33 | def _get_free_gpu(self): 34 | try: 35 | available_gpu_inds = [ 36 | gpu.index for gpu in gpustat.GPUStatCollection.new_query() if gpu.memory_used < 0.5 * gpu.memory_total 37 | ] 38 | except Exception: # pylint: disable=broad-except 39 | return [0] # Return dummy GPU index if no CUDA GPUs are installed 40 | 41 | if available_gpu_inds: 42 | gpu_ind = np.random.choice(available_gpu_inds) 43 | if self.verbose: 44 | print(f"pid {os.getpid()} picking gpu {gpu_ind}") 45 | if self.lock_manager.lock(f"gpu_{gpu_ind}", GPU_LOCK_TIMEOUT): 46 | return int(gpu_ind) 47 | if self.verbose: 48 | print(f"pid {os.getpid()} couldnt get lock") 49 | return None 50 | -------------------------------------------------------------------------------- /lab8/api/app.py: -------------------------------------------------------------------------------- 1 | """Flask web server serving text_recognizer predictions.""" 2 | import os 3 | 4 | from flask import Flask, request, jsonify 5 | import tensorflow.keras.backend as K 6 | 7 | from text_recognizer.line_predictor import LinePredictor 8 | import text_recognizer.util as util 9 | 10 | os.environ["CUDA_VISIBLE_DEVICES"] = "" # Do not use GPU 11 | 12 | app = Flask(__name__) # pylint: disable=invalid-name 13 | 14 | 15 | @app.route("/") 16 | def index(): 17 | """Provide simple health check route.""" 18 | return "Hello, world!" 19 | 20 | 21 | @app.route("/v1/predict", methods=["GET", "POST"]) 22 | def predict(): 23 | """Provide main prediction API route. Responds to both GET and POST requests.""" 24 | K.clear_session() 25 | predictor = LinePredictor() 26 | image = _load_image() 27 | pred, conf = predictor.predict(image) 28 | print("METRIC confidence {}".format(conf)) 29 | print("METRIC mean_intensity {}".format(image.mean())) 30 | print("INFO pred {}".format(pred)) 31 | return jsonify({"pred": str(pred), "conf": float(conf)}) 32 | 33 | 34 | def _load_image(): 35 | if request.method == "POST": 36 | data = request.get_json() 37 | if data is None: 38 | return "no json received" 39 | return util.read_b64_image(data["image"], grayscale=True) 40 | if request.method == "GET": 41 | image_url = request.args.get("image_url") 42 | if image_url is None: 43 | return "no image_url defined in query string" 44 | print("INFO url {}".format(image_url)) 45 | return util.read_image(image_url, grayscale=True) 46 | raise ValueError("Unsupported HTTP method") 47 | 48 | 49 | def main(): 50 | """Run the app.""" 51 | app.run(host="0.0.0.0", port=8000, debug=False) # nosec 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /lab1/text_recognizer/datasets/dataset_sequence.py: -------------------------------------------------------------------------------- 1 | """DatasetSequence class.""" 2 | import numpy as np 3 | from tensorflow.keras.utils import Sequence 4 | 5 | 6 | def _shuffle(x, y): 7 | """Shuffle x and y maintaining their association.""" 8 | shuffled_indices = np.random.permutation(x.shape[0]) 9 | return x[shuffled_indices], y[shuffled_indices] 10 | 11 | 12 | class DatasetSequence(Sequence): 13 | """ 14 | Minimal implementation of https://keras.io/utils/#sequence. 15 | """ 16 | 17 | def __init__(self, x, y, batch_size=32, augment_fn=None, format_fn=None): 18 | self.x = x 19 | self.y = y 20 | self.batch_size = batch_size 21 | self.augment_fn = augment_fn 22 | self.format_fn = format_fn 23 | 24 | def __len__(self): 25 | """Return length of the dataset.""" 26 | return int(np.ceil(len(self.x) / float(self.batch_size))) 27 | 28 | def __getitem__(self, idx): 29 | """Return a single batch.""" 30 | # idx = 0 # If you want to intentionally overfit to just one batch 31 | begin = idx * self.batch_size 32 | end = (idx + 1) * self.batch_size 33 | 34 | # batch_x = np.take(self.x, range(begin, end), axis=0, mode='clip') 35 | # batch_y = np.take(self.y, range(begin, end), axis=0, mode='clip') 36 | 37 | batch_x = self.x[begin:end] 38 | batch_y = self.y[begin:end] 39 | 40 | if batch_x.dtype == np.uint8: 41 | batch_x = (batch_x / 255).astype(np.float32) 42 | 43 | if self.augment_fn: 44 | batch_x, batch_y = self.augment_fn(batch_x, batch_y) 45 | 46 | if self.format_fn: 47 | batch_x, batch_y = self.format_fn(batch_x, batch_y) 48 | 49 | return batch_x, batch_y 50 | 51 | def on_epoch_end(self) -> None: 52 | """Shuffle data.""" 53 | self.x, self.y = _shuffle(self.x, self.y) 54 | --------------------------------------------------------------------------------