├── README.md
├── ocr.py
├── scanner.py
└── transform.py


/README.md:
--------------------------------------------------------------------------------
1 | # Document Scanner and OCR
2 | 
3 | courtesy: pyimagesearch.com
4 | 


--------------------------------------------------------------------------------
/ocr.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import pytesseract
 3 | import cv2
 4 | import os
 5 | import time
 6 | 
 7 | def preprocess(image,args="thresh"):
 8 | 	# load the example image and convert it to grayscale
 9 | 	# image = img
10 | 	try:
11 | 		gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
12 | 
13 | 		if args == "thresh":
14 | 			gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
15 | 	 
16 | 		elif args == "blur":
17 | 			gray = cv2.medianBlur(gray, 3)
18 | 	
19 | 	except:
20 | 		gray = image
21 | 
22 | 	filename = "{}.jpg".format(os.getpid())
23 | 	cv2.imwrite(filename, gray)
24 | 
25 | 	# return gray
26 | 	return filename
27 | 
28 | def ocr(filename):
29 | 	path = os.getcwd()
30 | 	im = Image.open(path+"\\"+filename)
31 | 	text = pytesseract.image_to_string(im)
32 | 	print(text)
33 | 	os.remove(filename)
34 | 
35 | def main():
36 | 	im = cv2.imread("C:\\Users\hp\Desktop\My Files\Practice\Python\Document Scanner\pic2.jpg")
37 | 	x = preprocess(im)
38 | 	ocr(x)
39 | 
40 | if __name__ == '__main__':
41 | 	main()
42 | 


--------------------------------------------------------------------------------
/scanner.py:
--------------------------------------------------------------------------------
 1 | from transform import four_point_transform
 2 | from skimage.filters import threshold_local
 3 | import numpy as np
 4 | import argparse
 5 | import cv2
 6 | import imutils
 7 | from ocr import preprocess,ocr
 8 | 
 9 | 
10 | def edgeDetection(image):
11 | 	image = imutils.resize(image, height = 500)
12 | 	 
13 | 	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
14 | 	gray = cv2.GaussianBlur(gray, (5, 5), 0)
15 | 	edged = cv2.Canny(gray, 75, 200)
16 | 	 
17 | 	# cv2.imshow("Image", image)
18 | 	# cv2.imshow("Edged", edged)
19 | 	# cv2.waitKey(0)
20 | 	# cv2.destroyAllWindows()
21 | 	return edged
22 | 
23 | 
24 | def findContour(edged):
25 | 
26 | 	cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
27 | 	cnts = cnts[0] if imutils.is_cv2() else cnts[1]
28 | 	cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
29 | 	 
30 | 	for c in cnts:
31 | 		peri = cv2.arcLength(c, True)
32 | 		approx = cv2.approxPolyDP(c, 0.02 * peri, True)
33 | 	 
34 | 		if len(approx) == 4:
35 | 			screenCnt = approx
36 | 			break
37 | 	 
38 | 	# cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
39 | 	# cv2.imshow("Outline", image)
40 | 	# cv2.waitKey(0)
41 | 	# cv2.destroyAllWindows()
42 | 	return screenCnt
43 | 
44 | 
45 | def scan(screenCnt, image):
46 | 	ratio = image.shape[0] / 500.0
47 | 
48 | 	warped = four_point_transform(image, screenCnt.reshape(4, 2) * ratio)
49 | 	 
50 | 	warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
51 | 	T = threshold_local(warped, 11, offset = 10, method = "gaussian")
52 | 	warped = (warped > T).astype("uint8") * 255
53 | 
54 | 	# kernel = np.ones((1,5), np.uint8)  # note this is a HORIZONTAL kernel
55 | 	# kernel = np.array([(0,1,0),(1,1,1),(0,1,0)])
56 | 	# e_im = cv2.dilate(warped, kernel, iterations=1)
57 | 	# e_im = cv2.erode(e_im, kernel, iterations=2) 
58 | 
59 | 	# cv2.imshow("Original", imutils.resize(orig, height = 650))
60 | 	# cv2.imshow("Scanned", imutils.resize(warped, height = 650))
61 | 	# cv2.imshow("Scanne", imutils.resize(e_im, height = 650))
62 | 	# cv2.waitKey(0)
63 | 
64 | 	return warped
65 | 
66 | def main():
67 | 	image = cv2.imread("pic2.jpg")
68 | 	edged = edgeDetection(image)
69 | 	screenCnt = findContour(edged)
70 | 	scannedImage = scan(screenCnt,image)
71 | 	
72 | 	### OCR
73 | 	processedImg = preprocess(scannedImage)
74 | 	ocr(processedImg)
75 | 	
76 | 	cv2.imshow("Scanned", imutils.resize(scannedImage, height = 650))
77 | 	cv2.waitKey(0)
78 | 	cv2.destroyAllWindows()
79 | 
80 | if __name__ == '__main__':
81 | 	main()


--------------------------------------------------------------------------------
/transform.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | def order_points(pts):
 6 | 	# initialzie a list of coordinates that will be ordered
 7 | 	# such that the first entry in the list is the top-left,
 8 | 	# the second entry is the top-right, the third is the
 9 | 	# bottom-right, and the fourth is the bottom-left
10 | 	rect = np.zeros((4, 2), dtype = "float32")
11 | 
12 | 	# the top-left point will have the smallest sum, whereas
13 | 	# the bottom-right point will have the largest sum
14 | 	s = pts.sum(axis = 1)
15 | 	rect[0] = pts[np.argmin(s)]
16 | 	rect[2] = pts[np.argmax(s)]
17 | 
18 | 	# now, compute the difference between the points, the
19 | 	# top-right point will have the smallest difference,
20 | 	# whereas the bottom-left will have the largest difference
21 | 	diff = np.diff(pts, axis = 1)
22 | 	rect[1] = pts[np.argmin(diff)]
23 | 	rect[3] = pts[np.argmax(diff)]
24 | 
25 | 	# return the ordered coordinates
26 | 	return rect
27 | 
28 | def four_point_transform(image, pts):
29 | 	# obtain a consistent order of the points and unpack them
30 | 	# individually
31 | 	rect = order_points(pts)
32 | 	(tl, tr, br, bl) = rect
33 |  
34 | 	# compute the width of the new image, which will be the
35 | 	# maximum distance between bottom-right and bottom-left
36 | 	# x-coordiates or the top-right and top-left x-coordinates
37 | 	widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
38 | 	widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
39 | 	maxWidth = max(int(widthA), int(widthB))
40 |  
41 | 	# compute the height of the new image, which will be the
42 | 	# maximum distance between the top-right and bottom-right
43 | 	# y-coordinates or the top-left and bottom-left y-coordinates
44 | 	heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
45 | 	heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
46 | 	maxHeight = max(int(heightA), int(heightB))
47 |  
48 | 	# now that we have the dimensions of the new image, construct
49 | 	# the set of destination points to obtain a "birds eye view",
50 | 	# (i.e. top-down view) of the image, again specifying points
51 | 	# in the top-left, top-right, bottom-right, and bottom-left
52 | 	# order
53 | 	dst = np.array([
54 | 		[0, 0],
55 | 		[maxWidth - 1, 0],
56 | 		[maxWidth - 1, maxHeight - 1],
57 | 		[0, maxHeight - 1]], dtype = "float32")
58 |  
59 | 	# compute the perspective transform matrix and then apply it
60 | 	M = cv2.getPerspectiveTransform(rect, dst)
61 | 	warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
62 |  
63 | 	# return the warped image
64 | 	return warped


--------------------------------------------------------------------------------