├── .gitignore ├── README.md ├── calculate_noise.py ├── char_dist.gz ├── character_replacement_distributions.json ├── convert.py ├── data_generation ├── alignment.py ├── calculate_distribution.py ├── needle └── noisify.py ├── errors.py ├── evaluate.py ├── evaluate_seq.py ├── evaluate_sequences.py ├── generate_text.py ├── generate_text_from_clusters.py ├── keras_seq2seq.py ├── noisify.py ├── one_to_many.py ├── open2txt.py ├── opennmt.py ├── show.py ├── split.py ├── uniform_lower_distribution.json └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | data/ 3 | models/ 4 | OpenNMT-py/ 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/README.md -------------------------------------------------------------------------------- /calculate_noise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/calculate_noise.py -------------------------------------------------------------------------------- /char_dist.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/char_dist.gz -------------------------------------------------------------------------------- /character_replacement_distributions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/character_replacement_distributions.json -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/convert.py -------------------------------------------------------------------------------- /data_generation/alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/data_generation/alignment.py -------------------------------------------------------------------------------- /data_generation/calculate_distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/data_generation/calculate_distribution.py -------------------------------------------------------------------------------- /data_generation/needle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/data_generation/needle -------------------------------------------------------------------------------- /data_generation/noisify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/data_generation/noisify.py -------------------------------------------------------------------------------- /errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/errors.py -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/evaluate.py -------------------------------------------------------------------------------- /evaluate_seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/evaluate_seq.py -------------------------------------------------------------------------------- /evaluate_sequences.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/evaluate_sequences.py -------------------------------------------------------------------------------- /generate_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/generate_text.py -------------------------------------------------------------------------------- /generate_text_from_clusters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/generate_text_from_clusters.py -------------------------------------------------------------------------------- /keras_seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/keras_seq2seq.py -------------------------------------------------------------------------------- /noisify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/noisify.py -------------------------------------------------------------------------------- /one_to_many.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/one_to_many.py -------------------------------------------------------------------------------- /open2txt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/open2txt.py -------------------------------------------------------------------------------- /opennmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/opennmt.py -------------------------------------------------------------------------------- /show.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/show.py -------------------------------------------------------------------------------- /split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/split.py -------------------------------------------------------------------------------- /uniform_lower_distribution.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/uniform_lower_distribution.json -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurkuNLP/ocr-correction/HEAD/utils.py --------------------------------------------------------------------------------