├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── __init__.py ├── app.py ├── images ├── ocr_image_1.png ├── ocr_image_2.png ├── ocr_image_3.jpg ├── ocr_image_4.jpeg └── ocr_image_5.jpg ├── ocr_core.py ├── requirements.txt ├── static └── uploads │ └── ocr_image_1.png └── templates ├── index.html └── upload.html /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### Description 2 | 3 | 3-5 sentences describing the issue 4 | 5 | #### Steps to Reproduce 6 | 7 | Step by step instructions on how to reproduce this issue 8 | 9 | ##### Expected Behavior 10 | 11 | Description of the expected behavior 12 | 13 | ##### Actual Behavior 14 | 15 | Description of what actually happens 16 | 17 | #### Additional Information 18 | 19 | Information regarding the environment where the issue occurred. This can include 20 | information such as location, platform, origin, version or environment. 21 | 22 | Any additional information, configuration or data that might be necessary or helpful to reproduce the issue and help in identification and resolution of the issue. 23 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### What does this PR do? 2 | 3 | 1 sentence tagline of what the PR includes. 4 | 5 | #### Description of Task to be completed? 6 | 7 | A detailed description of what the PR delivers. 8 | 9 | #### How should this be manually tested? 10 | 11 | Steps on how to test the work delivered by the PR. 12 | 13 | #### Any background context you want to provide? 14 | 15 | Any additional information, configuration or data that might be necessary to a 16 | reviewer of the PR. 17 | 18 | #### What are the relevant issues? 19 | 20 | Reference the issue if applicable. 21 | 22 | #### Screenshots (if appropriate) 23 | 24 | #### Questions: 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .sonarlint/** 2 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | pillow = "*" 10 | pytesseract = "*" 11 | flask = "*" 12 | 13 | [requires] 14 | python_version = "3.7" 15 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "25eeaf5c729233d5e973c8afac5780ecd8a0b4af514993698058d8ac20b73669" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "click": { 20 | "hashes": [ 21 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", 22 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" 23 | ], 24 | "version": "==7.1.2" 25 | }, 26 | "flask": { 27 | "hashes": [ 28 | "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48", 29 | "sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05" 30 | ], 31 | "index": "pypi", 32 | "version": "==1.0.2" 33 | }, 34 | "itsdangerous": { 35 | "hashes": [ 36 | "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", 37 | "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" 38 | ], 39 | "version": "==1.1.0" 40 | }, 41 | "jinja2": { 42 | "hashes": [ 43 | "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419", 44 | "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6" 45 | ], 46 | "index": "pypi", 47 | "version": "==2.11.3" 48 | }, 49 | "markupsafe": { 50 | "hashes": [ 51 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 52 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 53 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 54 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 55 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", 56 | "sha256:195d7d2c4fbb0ee8139a6cf67194f3973a6b3042d742ebe0a9ed36d8b6f0c07f", 57 | "sha256:22c178a091fc6630d0d045bdb5992d2dfe14e3259760e713c490da5323866c39", 58 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 59 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 60 | "sha256:2beec1e0de6924ea551859edb9e7679da6e4870d32cb766240ce17e0a0ba2014", 61 | "sha256:3b8a6499709d29c2e2399569d96719a1b21dcd94410a586a18526b143ec8470f", 62 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 63 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 64 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 65 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 66 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", 67 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 68 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", 69 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 70 | "sha256:6f1e273a344928347c1290119b493a1f0303c52f5a5eae5f16d74f48c15d4a85", 71 | "sha256:6fffc775d90dcc9aed1b89219549b329a9250d918fd0b8fa8d93d154918422e1", 72 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 73 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 74 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 75 | "sha256:7fed13866cf14bba33e7176717346713881f56d9d2bcebab207f7a036f41b850", 76 | "sha256:84dee80c15f1b560d55bcfe6d47b27d070b4681c699c572af2e3c7cc90a3b8e0", 77 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 78 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 79 | "sha256:98bae9582248d6cf62321dcb52aaf5d9adf0bad3b40582925ef7c7f0ed85fceb", 80 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 81 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 82 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 83 | "sha256:a6a744282b7718a2a62d2ed9d993cad6f5f585605ad352c11de459f4108df0a1", 84 | "sha256:acf08ac40292838b3cbbb06cfe9b2cb9ec78fce8baca31ddb87aaac2e2dc3bc2", 85 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 86 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 87 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 88 | "sha256:b1dba4527182c95a0db8b6060cc98ac49b9e2f5e64320e2b56e47cb2831978c7", 89 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 90 | "sha256:b7d644ddb4dbd407d31ffb699f1d140bc35478da613b441c582aeb7c43838dd8", 91 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 92 | "sha256:bf5aa3cbcfdf57fa2ee9cd1822c862ef23037f5c832ad09cfea57fa846dec193", 93 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 94 | "sha256:caabedc8323f1e93231b52fc32bdcde6db817623d33e100708d9a68e1f53b26b", 95 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 96 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", 97 | "sha256:d53bc011414228441014aa71dbec320c66468c1030aae3a6e29778a3382d96e5", 98 | "sha256:d73a845f227b0bfe8a7455ee623525ee656a9e2e749e4742706d80a6065d5e2c", 99 | "sha256:d9be0ba6c527163cbed5e0857c451fcd092ce83947944d6c14bc95441203f032", 100 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", 101 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be", 102 | "sha256:feb7b34d6325451ef96bc0e36e1a6c0c1c64bc1fbec4b854f4529e51887b1621" 103 | ], 104 | "version": "==1.1.1" 105 | }, 106 | "pillow": { 107 | "hashes": [ 108 | "sha256:00fdeb23820f30e43bba78eb9abb00b7a937a655de7760b2e09101d63708b64e", 109 | "sha256:01f948e8220c85eae1aa1a7f8edddcec193918f933fb07aaebe0bfbbcffefbf1", 110 | "sha256:08abf39948d4b5017a137be58f1a52b7101700431f0777bec3d897c3949f74e6", 111 | "sha256:099a61618b145ecb50c6f279666bbc398e189b8bc97544ae32b8fcb49ad6b830", 112 | "sha256:2c1c61546e73de62747e65807d2cc4980c395d4c5600ecb1f47a650c6fa78c79", 113 | "sha256:2ed9c4f694861642401f27dc3cb99772be67cd190e84845c749dae0a06c3bfae", 114 | "sha256:338581b30b908e111be578f0297255f6b57a51358cd16fa0e6f664c9a1f88bff", 115 | "sha256:38c7d48a21cd06fdeee93987147b9b1c55b73b4cfcbf83240568bfbd5adee447", 116 | "sha256:43fd026f613c8e48a25eba1a92f4d2ad7f3903c95d8c33a11611a7717d2ab654", 117 | "sha256:4548236844327a718ce3bb182ab32a16fa2050c61e334e959f554cac052fb0df", 118 | "sha256:5090857876c58885cfa388dc649e5db30aae98a068c26f3fd0ac9d7d9a4d9572", 119 | "sha256:5bbba34f97a26a93f5e8dec469ca4ddd712451418add43da946dbaed7f7a98d2", 120 | "sha256:65a28969a025a0eb4594637b6103201dc4ed2a9508bdab56ac33e43e3081c404", 121 | "sha256:892bb52b70bd5ea9dbbc3ac44f38e84f5a04e9d8b1bff48159d96cb795b81159", 122 | "sha256:8a9becd5cbd5062f973bcd2e7bc79483af310222de112b6541f8af1f93a3cc42", 123 | "sha256:972a7aaeb7c4a2795b52eef52ee991ef040b31009f36deca6207a986607b55f3", 124 | "sha256:97b119c436bfa96a92ac2ca525f7025836d4d4e64b1c9f9eff8dbaf3ff1d86f3", 125 | "sha256:9ba37698e242223f8053cc158f130aee046a96feacbeab65893dbe94f5530118", 126 | "sha256:b1b0e1f626a0f079c0d3696db70132fb1f29aa87c66aecb6501a9b8be64ce9f7", 127 | "sha256:c14c1224fd1a5be2733530d648a316974dbbb3c946913562c6005a76f21ca042", 128 | "sha256:c79a8546c48ae6465189e54e3245a97ddf21161e33ff7eaa42787353417bb2b6", 129 | "sha256:ceb76935ac4ebdf6d7bc845482a4450b284c6ccfb281e34da51d510658ab34d8", 130 | "sha256:e22bffaad04b4d16e1c091baed7f2733fc1ebb91e0c602abf1b6834d17158b1f", 131 | "sha256:ec883b8e44d877bda6f94a36313a1c6063f8b1997aa091628ae2f34c7f97c8d5", 132 | "sha256:f1baa54d50ec031d1a9beb89974108f8f2c0706f49798f4777df879df0e1adb6", 133 | "sha256:f53a5385932cda1e2c862d89460992911a89768c65d176ff8c50cddca4d29bed" 134 | ], 135 | "index": "pypi", 136 | "version": "==6.2.0" 137 | }, 138 | "pytesseract": { 139 | "hashes": [ 140 | "sha256:11c20321595b6e2e904b594633edf1a717212b13bac7512986a2d807b8849770" 141 | ], 142 | "index": "pypi", 143 | "version": "==0.2.6" 144 | }, 145 | "werkzeug": { 146 | "hashes": [ 147 | "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43", 148 | "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c" 149 | ], 150 | "version": "==1.0.1" 151 | } 152 | }, 153 | "develop": {} 154 | } 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![HitCount](http://hits.dwyl.io/ro6ley/python-ocr-example.svg)](http://hits.dwyl.io/ro6ley/python-ocr-example) 2 | 3 | # PyTesseract - Simple Python Optical Character Recognition 4 | 5 | This repository contains the code for this [blogpost](https://stackabuse.com/pytesseract-simple-python-optical-character-recognition/). 6 | 7 | ## Getting Started 8 | 9 | ### Prerequisites 10 | 11 | Kindly ensure you have the following installed on your machine: 12 | 13 | - [ ] [Python 3](https://realpython.com/installing-python/) 14 | - [ ] [Tesseract](https://github.com/tesseract-ocr/tesseract/wiki#installation) 15 | - [ ] [Git]() 16 | - [ ] An IDE or Editor of your choice 17 | 18 | ### Running the Application 19 | 20 | 1. Clone the repository 21 | ``` 22 | $ git clone https://github.com/ro6ley/python-ocr-example.git 23 | ``` 24 | 25 | 2. Check into the cloned repository 26 | ``` 27 | $ cd python-ocr-example 28 | ``` 29 | 30 | 3. If you are using Pipenv, setup the virtual environment and start it as follows: 31 | ``` 32 | $ pipenv install && pipenv shell 33 | ``` 34 | 35 | 4. Install the requirements 36 | ``` 37 | $ pip install -r requirements.txt 38 | ``` 39 | 40 | 4. Run OCR server 41 | ``` 42 | $ python app.py 43 | ``` 44 | 45 | ## Contribution 46 | 47 | Please feel free to raise issues using this [template](./.github/ISSUE_TEMPLATE.md) and I'll get back to you. 48 | 49 | You can also fork the repository, make changes and submit a Pull Request using this [template](./.github/PULL_REQUEST_TEMPLATE.md). 50 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/__init__.py -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from flask import Flask, render_template, request 4 | 5 | from ocr_core import ocr_core 6 | 7 | 8 | UPLOAD_FOLDER = '/static/uploads/' 9 | ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif']) 10 | 11 | app = Flask(__name__) 12 | 13 | 14 | def allowed_file(filename): 15 | return '.' in filename and \ 16 | filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS 17 | 18 | 19 | @app.route('/') 20 | def home_page(): 21 | return render_template('index.html') 22 | 23 | 24 | @app.route('/upload', methods=['GET', 'POST']) 25 | def upload_page(): 26 | if request.method == 'POST': 27 | # check if the post request has the file part 28 | if 'file' not in request.files: 29 | return render_template('upload.html', msg='No file selected') 30 | file = request.files['file'] 31 | # if user does not select file, browser also 32 | # submit a empty part without filename 33 | if file.filename == '': 34 | return render_template('upload.html', msg='No file selected') 35 | 36 | if file and allowed_file(file.filename): 37 | file.save(os.path.join(os.getcwd() + UPLOAD_FOLDER, file.filename)) 38 | 39 | # call the OCR function on it 40 | extracted_text = ocr_core(file) 41 | 42 | # extract the text and display it 43 | return render_template('upload.html', 44 | msg='Successfully processed', 45 | extracted_text=extracted_text, 46 | img_src=UPLOAD_FOLDER + file.filename) 47 | elif request.method == 'GET': 48 | return render_template('upload.html') 49 | 50 | if __name__ == '__main__': 51 | app.run() 52 | -------------------------------------------------------------------------------- /images/ocr_image_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/images/ocr_image_1.png -------------------------------------------------------------------------------- /images/ocr_image_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/images/ocr_image_2.png -------------------------------------------------------------------------------- /images/ocr_image_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/images/ocr_image_3.jpg -------------------------------------------------------------------------------- /images/ocr_image_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/images/ocr_image_4.jpeg -------------------------------------------------------------------------------- /images/ocr_image_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/images/ocr_image_5.jpg -------------------------------------------------------------------------------- /ocr_core.py: -------------------------------------------------------------------------------- 1 | try: 2 | from PIL import Image 3 | except ImportError: 4 | import Image 5 | import pytesseract 6 | 7 | 8 | def ocr_core(filename): 9 | """ 10 | This function will handle the core OCR processing of images. 11 | """ 12 | text = pytesseract.image_to_string(Image.open(filename)) # We'll use Pillow's Image class to open the image and pytesseract to detect the string in the image 13 | return text # Then we will print the text in the image 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.org/simple/ 2 | click==7.1.2 3 | flask==1.0.2 4 | itsdangerous==1.1.0 5 | jinja2==2.11.3 6 | markupsafe==1.1.1 7 | pillow==6.2.0 8 | pytesseract==0.2.6 9 | werkzeug==1.0.1 10 | -------------------------------------------------------------------------------- /static/uploads/ocr_image_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ro6ley/python-ocr-example/fe8e6d1457c6178ea91900e5c97fa50f95f5a228/static/uploads/ocr_image_1.png -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index 5 | 6 | 7 | Hello World. 8 | 9 | 10 | -------------------------------------------------------------------------------- /templates/upload.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Upload Image 5 | 6 | 7 | 8 | {% if msg %} 9 |

{{ msg }}

10 | {% endif %} 11 | 12 |

Upload new File

13 | 14 |
15 |

16 | 17 |

18 | 19 |

Result:

20 | {% if img_src %} 21 | 22 | {% endif %} 23 | 24 | {% if extracted_text %} 25 |

The extracted text from the image above is: {{ extracted_text }}

26 | 27 | {% else %} 28 | The extracted text will be displayed here 29 | {% endif %} 30 | 31 | 32 | --------------------------------------------------------------------------------