├── .gitignore ├── README.md ├── recognise.py ├── requirements.txt ├── test.jpg └── training_instructions.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | input-NEAREST.tif 2 | input-black-n-white.jpg 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 1. Install system dependencies 4 | 5 | $ sudo apt-get install tesseract-ocr 6 | 7 | 2. Download the latest Opencv from https://opencv.org/ 8 | 3. Follow these instructions to install opencv http://stackoverflow.com/questions/15790501/why-cv2-so-missing-after-opencv-installed and set PYTHONPATH like it's said 9 | 10 | export PYTHONPATH=~/projects/opencv/release/lib:$PYTHONPATH 11 | 12 | 4. Install python dependencies 13 | 14 | $ pip install -r requirements.txt 15 | 16 | # Test 17 | 18 | It will recognise text from test.jpg 19 | 20 | $ python recognise.py 21 | 22 | # Contributing [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/hussaintamboli/python-image-to-text/issues) 23 | 24 | -------------------------------------------------------------------------------- /recognise.py: -------------------------------------------------------------------------------- 1 | # Code from http://blog.c22.cc/2010/10/12/python-ocr-or-how-to-break-captchas/ 2 | # and http://stackoverflow.com/questions/14640509/python-error-when-importing-image-to-string-from-tesseract 3 | 4 | # $ tesseract input-NEAREST.tif example -psm 6 5 | 6 | from PIL import Image, ImageFilter, ImageChops 7 | from pytesseract import image_to_string 8 | import cv2 9 | import numpy 10 | 11 | 12 | def preprocess_image_using_pil(image_path): 13 | # unblur, sharpen filters 14 | img = Image.open(image_path) 15 | img = img.convert("RGBA") 16 | 17 | pixdata = img.load() 18 | 19 | # Make the letters bolder for easier recognition 20 | 21 | for y in xrange(img.size[1]): 22 | for x in xrange(img.size[0]): 23 | if pixdata[x, y][0] < 90: 24 | pixdata[x, y] = (0, 0, 0, 255) 25 | 26 | for y in xrange(img.size[1]): 27 | for x in xrange(img.size[0]): 28 | if pixdata[x, y][1] < 136: 29 | pixdata[x, y] = (0, 0, 0, 255) 30 | 31 | for y in xrange(img.size[1]): 32 | for x in xrange(img.size[0]): 33 | if pixdata[x, y][2] > 0: 34 | pixdata[x, y] = (255, 255, 255, 255) 35 | 36 | # And sharpen it 37 | img.filter(ImageFilter.SHARPEN) 38 | img.save("input-black.gif") 39 | 40 | # Make the image bigger (needed for OCR) 41 | basewidth = 1000 # in pixels 42 | im_orig = Image.open('input-black.gif') 43 | wpercent = (basewidth/float(im_orig.size[0])) 44 | hsize = int((float(im_orig.size[1])*float(wpercent))) 45 | big = img.resize((basewidth, hsize), Image.ANTIALIAS) 46 | 47 | # tesseract-ocr only works with TIF so save the bigger image in that format 48 | ext = ".tif" 49 | tif_file = "input-NEAREST.tif" 50 | big.save(tif_file) 51 | 52 | return tif_file 53 | 54 | 55 | def get_captcha_text_from_captcha_image(captcha_path): 56 | # Preprocess the image befor OCR 57 | tif_file = preprocess_image_using_opencv(captcha_path) 58 | # Perform OCR using tesseract-ocr library 59 | image = Image.open(tif_file) 60 | ocr_text = image_to_string(image, config="-psm 6") 61 | alphanumeric_text = ''.join(e for e in ocr_text) 62 | 63 | return alphanumeric_text 64 | 65 | def binarize_image_using_pil(captcha_path, binary_image_path='input-black-n-white.gif'): 66 | im = Image.open(captcha_path).convert('L') 67 | 68 | for i in range(im.size[0]): 69 | for j in range(im.size[1]): 70 | if im.getpixel((i,j)) > 127: 71 | im.putpixel((i,j), 255) 72 | else: 73 | im.putpixel((i,j), 0) 74 | 75 | im.save(binary_image_path) 76 | return binary_image_path 77 | 78 | 79 | def binarize_image_using_opencv(captcha_path, binary_image_path='input-black-n-white.jpg'): 80 | img = cv2.imread(captcha_path) 81 | im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 82 | (thresh, im_bw) = cv2.threshold(im_gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) 83 | # although thresh is used below, gonna pick something suitable 84 | im_bw = cv2.threshold(im_gray, thresh, 255, cv2.THRESH_BINARY)[1] 85 | cv2.imwrite(binary_image_path, im_bw) 86 | return binary_image_path 87 | 88 | 89 | def preprocess_image_using_opencv(captcha_path): 90 | bin_image_path = binarize_image_using_opencv(captcha_path) 91 | 92 | im_bin = Image.open(bin_image_path) 93 | 94 | basewidth = 340 # in pixels 95 | wpercent = (basewidth/float(im_bin.size[0])) 96 | hsize = int((float(im_bin.size[1])*float(wpercent))) 97 | big = im_bin.resize((basewidth, hsize), Image.NEAREST) 98 | 99 | # tesseract-ocr only works with TIF so save the bigger image in that format 100 | ext = ".tif" 101 | tif_file = "input-NEAREST.tif" 102 | big.save(tif_file) 103 | 104 | return tif_file 105 | 106 | 107 | if __name__ == "__main__": 108 | print get_captcha_text_from_captcha_image("test.jpg") 109 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.11.0 2 | Pillow==6.2.0 3 | pytesseract==0.1.6 4 | wheel==0.24.0 5 | -------------------------------------------------------------------------------- /test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hussaintamboli/python-image-to-text/4ece669c654ebe0d18001e2b84b973c4bd04fe7e/test.jpg -------------------------------------------------------------------------------- /training_instructions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hussaintamboli/python-image-to-text/4ece669c654ebe0d18001e2b84b973c4bd04fe7e/training_instructions.pdf --------------------------------------------------------------------------------