├── .gitignore
├── README.md
├── recognise.py
├── requirements.txt
├── test.jpg
└── training_instructions.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | input-NEAREST.tif
2 | input-black-n-white.jpg
3 | 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 1. Install system dependencies
 4 | 
 5 |     $ sudo apt-get install tesseract-ocr
 6 | 
 7 | 2. Download the latest Opencv from https://opencv.org/
 8 | 3. Follow these instructions to install opencv http://stackoverflow.com/questions/15790501/why-cv2-so-missing-after-opencv-installed and set PYTHONPATH like it's said
 9 | 
10 |     export PYTHONPATH=~/projects/opencv/release/lib:$PYTHONPATH
11 | 
12 | 4. Install python dependencies
13 | 
14 |     $ pip install -r requirements.txt
15 | 
16 | # Test
17 | 
18 | It will recognise text from test.jpg
19 | 
20 |     $ python recognise.py
21 | 
22 | # Contributing [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/hussaintamboli/python-image-to-text/issues)
23 | 
24 | 


--------------------------------------------------------------------------------
/recognise.py:
--------------------------------------------------------------------------------
  1 | # Code from http://blog.c22.cc/2010/10/12/python-ocr-or-how-to-break-captchas/
  2 | # and http://stackoverflow.com/questions/14640509/python-error-when-importing-image-to-string-from-tesseract
  3 | 
  4 | # $ tesseract input-NEAREST.tif example -psm 6 
  5 | 
  6 | from PIL import Image, ImageFilter, ImageChops
  7 | from pytesseract import image_to_string
  8 | import cv2
  9 | import numpy
 10 | 
 11 | 
 12 | def preprocess_image_using_pil(image_path):
 13 |     # unblur, sharpen filters
 14 |     img = Image.open(image_path)
 15 |     img = img.convert("RGBA")
 16 | 
 17 |     pixdata = img.load()
 18 | 
 19 |     # Make the letters bolder for easier recognition
 20 |     
 21 |     for y in xrange(img.size[1]):
 22 |         for x in xrange(img.size[0]):
 23 |             if pixdata[x, y][0] < 90:
 24 |                 pixdata[x, y] = (0, 0, 0, 255)
 25 | 
 26 |     for y in xrange(img.size[1]):
 27 |         for x in xrange(img.size[0]):
 28 |             if pixdata[x, y][1] < 136:
 29 |                 pixdata[x, y] = (0, 0, 0, 255)
 30 | 
 31 |     for y in xrange(img.size[1]):
 32 |         for x in xrange(img.size[0]):
 33 |             if pixdata[x, y][2] > 0:
 34 |                 pixdata[x, y] = (255, 255, 255, 255)
 35 | 
 36 |     # And sharpen it
 37 |     img.filter(ImageFilter.SHARPEN)
 38 |     img.save("input-black.gif")
 39 | 
 40 |     #   Make the image bigger (needed for OCR)
 41 |     basewidth = 1000  # in pixels
 42 |     im_orig = Image.open('input-black.gif')
 43 |     wpercent = (basewidth/float(im_orig.size[0]))
 44 |     hsize = int((float(im_orig.size[1])*float(wpercent)))
 45 |     big = img.resize((basewidth, hsize), Image.ANTIALIAS)
 46 | 
 47 |     # tesseract-ocr only works with TIF so save the bigger image in that format
 48 |     ext = ".tif"
 49 |     tif_file = "input-NEAREST.tif"
 50 |     big.save(tif_file)
 51 |     
 52 |     return tif_file
 53 | 
 54 | 
 55 | def get_captcha_text_from_captcha_image(captcha_path):
 56 |     # Preprocess the image befor OCR
 57 |     tif_file = preprocess_image_using_opencv(captcha_path)
 58 |     # Perform OCR using tesseract-ocr library
 59 |     image = Image.open(tif_file)
 60 |     ocr_text = image_to_string(image, config="-psm 6")
 61 |     alphanumeric_text = ''.join(e for e in ocr_text)
 62 | 
 63 |     return alphanumeric_text
 64 | 
 65 | def binarize_image_using_pil(captcha_path, binary_image_path='input-black-n-white.gif'):
 66 |     im = Image.open(captcha_path).convert('L')
 67 |  
 68 |     for i in range(im.size[0]):
 69 |         for j in range(im.size[1]):
 70 |             if im.getpixel((i,j)) > 127:
 71 |                 im.putpixel((i,j), 255)
 72 |             else:
 73 |                 im.putpixel((i,j), 0)
 74 | 
 75 |     im.save(binary_image_path)
 76 |     return binary_image_path
 77 | 
 78 | 
 79 | def binarize_image_using_opencv(captcha_path, binary_image_path='input-black-n-white.jpg'):
 80 |     img = cv2.imread(captcha_path)
 81 |     im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 82 |     (thresh, im_bw) = cv2.threshold(im_gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
 83 |     # although thresh is used below, gonna pick something suitable
 84 |     im_bw = cv2.threshold(im_gray, thresh, 255, cv2.THRESH_BINARY)[1]
 85 |     cv2.imwrite(binary_image_path, im_bw)
 86 |     return binary_image_path
 87 | 
 88 | 
 89 | def preprocess_image_using_opencv(captcha_path):
 90 |     bin_image_path = binarize_image_using_opencv(captcha_path)
 91 | 
 92 |     im_bin = Image.open(bin_image_path)
 93 |     
 94 |     basewidth = 340  # in pixels
 95 |     wpercent = (basewidth/float(im_bin.size[0]))
 96 |     hsize = int((float(im_bin.size[1])*float(wpercent)))
 97 |     big = im_bin.resize((basewidth, hsize), Image.NEAREST)
 98 |     
 99 |     # tesseract-ocr only works with TIF so save the bigger image in that format
100 |     ext = ".tif"
101 |     tif_file = "input-NEAREST.tif"
102 |     big.save(tif_file)
103 | 
104 |     return tif_file
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     print get_captcha_text_from_captcha_image("test.jpg") 
109 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.11.0
2 | Pillow==6.2.0
3 | pytesseract==0.1.6
4 | wheel==0.24.0
5 | 


--------------------------------------------------------------------------------
/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hussaintamboli/python-image-to-text/4ece669c654ebe0d18001e2b84b973c4bd04fe7e/test.jpg


--------------------------------------------------------------------------------
/training_instructions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hussaintamboli/python-image-to-text/4ece669c654ebe0d18001e2b84b973c4bd04fe7e/training_instructions.pdf


--------------------------------------------------------------------------------