├── README.md ├── ocr.py ├── scanner.py └── transform.py /README.md: -------------------------------------------------------------------------------- 1 | # Document Scanner and OCR 2 | 3 | courtesy: pyimagesearch.com 4 | -------------------------------------------------------------------------------- /ocr.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import pytesseract 3 | import cv2 4 | import os 5 | import time 6 | 7 | def preprocess(image,args="thresh"): 8 | # load the example image and convert it to grayscale 9 | # image = img 10 | try: 11 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 12 | 13 | if args == "thresh": 14 | gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 15 | 16 | elif args == "blur": 17 | gray = cv2.medianBlur(gray, 3) 18 | 19 | except: 20 | gray = image 21 | 22 | filename = "{}.jpg".format(os.getpid()) 23 | cv2.imwrite(filename, gray) 24 | 25 | # return gray 26 | return filename 27 | 28 | def ocr(filename): 29 | path = os.getcwd() 30 | im = Image.open(path+"\\"+filename) 31 | text = pytesseract.image_to_string(im) 32 | print(text) 33 | os.remove(filename) 34 | 35 | def main(): 36 | im = cv2.imread("C:\\Users\hp\Desktop\My Files\Practice\Python\Document Scanner\pic2.jpg") 37 | x = preprocess(im) 38 | ocr(x) 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /scanner.py: -------------------------------------------------------------------------------- 1 | from transform import four_point_transform 2 | from skimage.filters import threshold_local 3 | import numpy as np 4 | import argparse 5 | import cv2 6 | import imutils 7 | from ocr import preprocess,ocr 8 | 9 | 10 | def edgeDetection(image): 11 | image = imutils.resize(image, height = 500) 12 | 13 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 14 | gray = cv2.GaussianBlur(gray, (5, 5), 0) 15 | edged = cv2.Canny(gray, 75, 200) 16 | 17 | # cv2.imshow("Image", image) 18 | # cv2.imshow("Edged", edged) 19 | # cv2.waitKey(0) 20 | # cv2.destroyAllWindows() 21 | return edged 22 | 23 | 24 | def findContour(edged): 25 | 26 | cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) 27 | cnts = cnts[0] if imutils.is_cv2() else cnts[1] 28 | cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5] 29 | 30 | for c in cnts: 31 | peri = cv2.arcLength(c, True) 32 | approx = cv2.approxPolyDP(c, 0.02 * peri, True) 33 | 34 | if len(approx) == 4: 35 | screenCnt = approx 36 | break 37 | 38 | # cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2) 39 | # cv2.imshow("Outline", image) 40 | # cv2.waitKey(0) 41 | # cv2.destroyAllWindows() 42 | return screenCnt 43 | 44 | 45 | def scan(screenCnt, image): 46 | ratio = image.shape[0] / 500.0 47 | 48 | warped = four_point_transform(image, screenCnt.reshape(4, 2) * ratio) 49 | 50 | warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) 51 | T = threshold_local(warped, 11, offset = 10, method = "gaussian") 52 | warped = (warped > T).astype("uint8") * 255 53 | 54 | # kernel = np.ones((1,5), np.uint8) # note this is a HORIZONTAL kernel 55 | # kernel = np.array([(0,1,0),(1,1,1),(0,1,0)]) 56 | # e_im = cv2.dilate(warped, kernel, iterations=1) 57 | # e_im = cv2.erode(e_im, kernel, iterations=2) 58 | 59 | # cv2.imshow("Original", imutils.resize(orig, height = 650)) 60 | # cv2.imshow("Scanned", imutils.resize(warped, height = 650)) 61 | # cv2.imshow("Scanne", imutils.resize(e_im, height = 650)) 62 | # cv2.waitKey(0) 63 | 64 | return warped 65 | 66 | def main(): 67 | image = cv2.imread("pic2.jpg") 68 | edged = edgeDetection(image) 69 | screenCnt = findContour(edged) 70 | scannedImage = scan(screenCnt,image) 71 | 72 | ### OCR 73 | processedImg = preprocess(scannedImage) 74 | ocr(processedImg) 75 | 76 | cv2.imshow("Scanned", imutils.resize(scannedImage, height = 650)) 77 | cv2.waitKey(0) 78 | cv2.destroyAllWindows() 79 | 80 | if __name__ == '__main__': 81 | main() -------------------------------------------------------------------------------- /transform.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import cv2 4 | 5 | def order_points(pts): 6 | # initialzie a list of coordinates that will be ordered 7 | # such that the first entry in the list is the top-left, 8 | # the second entry is the top-right, the third is the 9 | # bottom-right, and the fourth is the bottom-left 10 | rect = np.zeros((4, 2), dtype = "float32") 11 | 12 | # the top-left point will have the smallest sum, whereas 13 | # the bottom-right point will have the largest sum 14 | s = pts.sum(axis = 1) 15 | rect[0] = pts[np.argmin(s)] 16 | rect[2] = pts[np.argmax(s)] 17 | 18 | # now, compute the difference between the points, the 19 | # top-right point will have the smallest difference, 20 | # whereas the bottom-left will have the largest difference 21 | diff = np.diff(pts, axis = 1) 22 | rect[1] = pts[np.argmin(diff)] 23 | rect[3] = pts[np.argmax(diff)] 24 | 25 | # return the ordered coordinates 26 | return rect 27 | 28 | def four_point_transform(image, pts): 29 | # obtain a consistent order of the points and unpack them 30 | # individually 31 | rect = order_points(pts) 32 | (tl, tr, br, bl) = rect 33 | 34 | # compute the width of the new image, which will be the 35 | # maximum distance between bottom-right and bottom-left 36 | # x-coordiates or the top-right and top-left x-coordinates 37 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 38 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 39 | maxWidth = max(int(widthA), int(widthB)) 40 | 41 | # compute the height of the new image, which will be the 42 | # maximum distance between the top-right and bottom-right 43 | # y-coordinates or the top-left and bottom-left y-coordinates 44 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 45 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 46 | maxHeight = max(int(heightA), int(heightB)) 47 | 48 | # now that we have the dimensions of the new image, construct 49 | # the set of destination points to obtain a "birds eye view", 50 | # (i.e. top-down view) of the image, again specifying points 51 | # in the top-left, top-right, bottom-right, and bottom-left 52 | # order 53 | dst = np.array([ 54 | [0, 0], 55 | [maxWidth - 1, 0], 56 | [maxWidth - 1, maxHeight - 1], 57 | [0, maxHeight - 1]], dtype = "float32") 58 | 59 | # compute the perspective transform matrix and then apply it 60 | M = cv2.getPerspectiveTransform(rect, dst) 61 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 62 | 63 | # return the warped image 64 | return warped --------------------------------------------------------------------------------