├── OCR.png ├── README.md ├── c01.png ├── c02.png ├── c03.png ├── c04.png ├── card.png ├── card1.png ├── card1.tiff ├── card2.png ├── card3.png └── readCarddetails.py /OCR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/OCR.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bank-Cards-Reader 2 | A python program using tesseract and OpenCV to extract the vital information like Card Holder's Name, Expiry, Card Number from credit/debit card images, it uses OCR-A template matching for font recognition in openCV with thresholding (to preprocess image) and median blurring (to remove noise) to get the best results 3 | -------------------------------------------------------------------------------- /c01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/c01.png -------------------------------------------------------------------------------- /c02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/c02.png -------------------------------------------------------------------------------- /c03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/c03.png -------------------------------------------------------------------------------- /c04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/c04.png -------------------------------------------------------------------------------- /card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/card.png -------------------------------------------------------------------------------- /card1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/card1.png -------------------------------------------------------------------------------- /card1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/card1.tiff -------------------------------------------------------------------------------- /card2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/card2.png -------------------------------------------------------------------------------- /card3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Geek-Mans/Bank-Cards-Reader/fdc534fdd6c634c7e327741e1f2e9038d1c63339/card3.png -------------------------------------------------------------------------------- /readCarddetails.py: -------------------------------------------------------------------------------- 1 | from imutils import contours 2 | import numpy as np 3 | import argparse 4 | import imutils 5 | import cv2 6 | import pytesseract as tes 7 | import re 8 | from PIL import Image 9 | import os 10 | import imquality.brisque as brisque 11 | from skimage import io, img_as_float 12 | 13 | 14 | #Using brisque scores to find quality of image 15 | #but due to no proper availability of bank card images we can't create a relationship 16 | def quality_check(loc): 17 | try: 18 | img= img_as_float(io.imread(loc, as_gray=True)) 19 | score=brisque.score(img) 20 | print("Image Score = ", score) 21 | except: 22 | print("Exception in finding scores") 23 | 24 | 25 | 26 | 27 | #Many Cards use OCR-A language (Matching with its template) 28 | def template_match(image,req_str): 29 | try: 30 | # load the reference OCR-A image from disk, convert it to grayscale, 31 | # and threshold it, such that the digits appear as *white* on a 32 | # *black* background 33 | # and invert it, such that the digits appear as *white* on a *black* 34 | ref = cv2.imread("C:\\Users\\Man$\\Desktop\\Cards\\OCR.png") 35 | ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY) 36 | ref = cv2.threshold(ref, 10, 255, cv2.THRESH_BINARY_INV)[1] 37 | 38 | # find contours in the OCR-A image (i.e,. the outlines of the digits) 39 | # sort them from left to right, and initialize a dictionary to map 40 | # digit name to the ROI 41 | refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL, 42 | cv2.CHAIN_APPROX_SIMPLE) 43 | refCnts = imutils.grab_contours(refCnts) 44 | refCnts = contours.sort_contours(refCnts, method="left-to-right")[0] 45 | digits = {} 46 | 47 | # loop over the OCR-A reference contours 48 | for (i, c) in enumerate(refCnts): 49 | # compute the bounding box for the digit, extract it, and resize 50 | # it to a fixed size 51 | (x, y, w, h) = cv2.boundingRect(c) 52 | roi = ref[y:y + h, x:x + w] 53 | roi = cv2.resize(roi, (57, 88)) 54 | 55 | # update the digits dictionary, mapping the digit name to the ROI 56 | digits[i] = roi 57 | 58 | # initialize a rectangular (wider than it is tall) and square 59 | # structuring kernel 60 | rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 3)) 61 | sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) 62 | 63 | # load the input image, resize it, and convert it to grayscale 64 | 65 | 66 | image = imutils.resize(image, width=300) 67 | 68 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 69 | 70 | # apply a tophat (whitehat) morphological operator to find light 71 | # regions against a dark background (i.e., the credit card numbers) 72 | tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, rectKernel) 73 | 74 | # compute the Scharr gradient of the tophat image, then scale 75 | # the rest back into the range [0, 255] 76 | gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0, 77 | ksize=-1) 78 | gradX = np.absolute(gradX) 79 | (minVal, maxVal) = (np.min(gradX), np.max(gradX)) 80 | gradX = (255 * ((gradX - minVal) / (maxVal - minVal))) 81 | gradX = gradX.astype("uint8") 82 | 83 | # apply a closing operation using the rectangular kernel to help 84 | # cloes gaps in between credit card number digits, then apply 85 | # Otsu's thresholding method to binarize the image 86 | gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel) 87 | thresh = cv2.threshold(gradX, 0, 255, 88 | cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 89 | 90 | # apply a second closing operation to the binary image, again 91 | # to help close gaps between credit card number regions 92 | thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel) 93 | 94 | # find contours in the thresholded image, then initialize the 95 | # list of digit locations 96 | cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, 97 | cv2.CHAIN_APPROX_SIMPLE) 98 | cnts = imutils.grab_contours(cnts) 99 | locs = [] 100 | 101 | # loop over the contours 102 | for (i, c) in enumerate(cnts): 103 | # compute the bounding box of the contour, then use the 104 | # bounding box coordinates to derive the aspect ratio 105 | (x, y, w, h) = cv2.boundingRect(c) 106 | ar = w / float(h) 107 | 108 | # since credit cards used a fixed size fonts with 4 groups 109 | # of 4 digits, we can prune potential contours based on the 110 | # aspect ratio 111 | if ar > 2.5 and ar < 4.0: 112 | # contours can further be pruned on minimum/maximum width 113 | # and height 114 | if (w > 40 and w < 55) and (h > 10 and h < 20): 115 | # append the bounding box region of the digits group 116 | # to our locations list 117 | locs.append((x, y, w, h)) 118 | 119 | # sort the digit locations from left-to-right, then initialize the 120 | # list of classified digits 121 | locs = sorted(locs, key=lambda x:x[0]) 122 | output = [] 123 | 124 | # loop over the 4 groupings of 4 digits 125 | for (i, (gX, gY, gW, gH)) in enumerate(locs): 126 | # initialize the list of group digits 127 | groupOutput = [] 128 | 129 | # extract the group ROI of 4 digits from the grayscale image, 130 | # then apply thresholding to segment the digits from the 131 | # background of the credit card 132 | group = gray[gY - 5:gY + gH + 5, gX - 5:gX + gW + 5] 133 | group = cv2.threshold(group, 0, 255, 134 | cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 135 | 136 | # detect the contours of each individual digit in the group, 137 | # then sort the digit contours from left to right 138 | digitCnts = cv2.findContours(group.copy(), cv2.RETR_EXTERNAL, 139 | cv2.CHAIN_APPROX_SIMPLE) 140 | digitCnts = imutils.grab_contours(digitCnts) 141 | digitCnts = contours.sort_contours(digitCnts, 142 | method="left-to-right")[0] 143 | 144 | # loop over the digit contours 145 | for c in digitCnts: 146 | # compute the bounding box of the individual digit, extract 147 | # the digit, and resize it to have the same fixed size as 148 | # the reference OCR-A images 149 | (x, y, w, h) = cv2.boundingRect(c) 150 | roi = group[y:y + h, x:x + w] 151 | roi = cv2.resize(roi, (57, 88)) 152 | 153 | # initialize a list of template matching scores 154 | scores = [] 155 | 156 | # loop over the reference digit name and digit ROI 157 | for (digit, digitROI) in digits.items(): 158 | # apply correlation-based template matching, take the 159 | # score, and update the scores list 160 | result = cv2.matchTemplate(roi, digitROI, 161 | cv2.TM_CCOEFF) 162 | (_, score, _, _) = cv2.minMaxLoc(result) 163 | scores.append(score) 164 | 165 | # the classification for the digit ROI will be the reference 166 | # digit name with the *largest* template matching score 167 | groupOutput.append(str(np.argmax(scores))) 168 | 169 | # draw the digit classifications around the group 170 | cv2.rectangle(image, (gX - 5, gY - 5), 171 | (gX + gW + 5, gY + gH + 5), (0, 0, 255), 2) 172 | cv2.putText(image, "".join(groupOutput), (gX, gY - 15), 173 | cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2) 174 | 175 | # update the output digits list 176 | output.extend(groupOutput) 177 | 178 | # display the output credit card information to the screen 179 | #print("Credit Card Type: {}".format(FIRST_NUMBER[output[0]])) 180 | #tes.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract' 181 | text = tes.image_to_string(image) 182 | card_number="".join(output) 183 | print("c = ",card_number) 184 | card_expiry = re.findall('\d{2}/\d{2}',text) 185 | if(req_str=="card_expiry")and len(card_expiry)>0: 186 | return (card_expiry[0]) 187 | elif(req_str=="card_number")and len(card_number)>0: 188 | return(card_number) 189 | else: 190 | return None 191 | except: 192 | return None 193 | 194 | #card_number 195 | def find_c_no(text,image): 196 | card_number = re.findall('\d{4}\s\d{4}\s\d{4}\s\d{4}',text) 197 | #if not sending to template match 198 | if not card_number: 199 | card_number=template_match(image,"card_number") 200 | return card_number 201 | return card_number[0] 202 | 203 | #card_expiry 204 | def find_c_exp(text,image): 205 | card_expiry = re.findall('\d{2}/\d{2}',text) 206 | #if not sending to template match 207 | if not card_expiry: 208 | card_expiry=template_match(image,"card_expiry") 209 | return card_expiry 210 | return card_expiry[0] 211 | 212 | #card_name 213 | def find_c_name(text): 214 | after_expiry=re.split('\d{2}/\d{2}',text) #name exists after expiry 215 | card_name=[] 216 | if len(after_expiry)>1: 217 | index=len(after_expiry)-1 218 | possible_names=re.split('\n',after_expiry[index]) #cleaning after_expiry 219 | if possible_names: 220 | for i in possible_names: 221 | # if name is in format [fname midname lastname] 222 | if(re.search('[A-Za-z]+[.]*\s[A-Za-z]+[.]*\s[A-Za-z]+',i)): 223 | card_name=re.findall('[A-Za-z]+[.]*\s[A-Za-z]+[.]*\s[A-Za-z]+',i) 224 | # if name is in format [fname lname] 225 | elif(re.search('[A-Za-z]+\s[A-Za-z]+',i)): 226 | card_name=re.findall('[A-Za-z]+\s[A-Za-z]+',i) 227 | if card_name: 228 | return card_name[0] 229 | return None 230 | 231 | 232 | 233 | 234 | 235 | #thresholding and removing noise with median blur along with normal OCR 236 | def find_details(loc): 237 | image = cv2.imread(loc) 238 | #image = cv2.fastNlMeansDenoisingColored(image,None,20,10,7,21) 239 | cv2.imshow("Image",image) 240 | #tes.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract' 241 | text = tes.image_to_string(Image.open(loc)) 242 | 243 | #thresholding 244 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 245 | gray_thresh = cv2.threshold(gray, 0, 255, 246 | cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 247 | filename_thresh = "C:\\Users\\Man$\\Desktop\\{}.png".format(os.getpid()) 248 | cv2.imwrite(filename_thresh, gray) 249 | # load the image as a PIL/Pillow image, apply OCR, and then delete 250 | # the temporary file 251 | text_thresh = tes.image_to_string(gray_thresh) 252 | 253 | 254 | gray_blur = cv2.medianBlur(gray, 3) 255 | #filename_blur = "C:\\Users\\Man$\\Desktop\\a.png".format(os.getpid()) 256 | #cv2.imwrite(filename_blur, gray) 257 | # load the image as a PIL/Pillow image, apply OCR, and then delete 258 | # the temporary file 259 | text_blur = tes.image_to_string(gray_blur) 260 | 261 | '''print("extract 1. ",text) 262 | print("extract 2. ",text_thresh) 263 | print("extract 3. ",text_blur)''' 264 | 265 | card_number= find_c_no(text,image) 266 | if not card_number: 267 | card_number= find_c_no(text_thresh,gray_thresh) 268 | if not card_number: 269 | card_number= find_c_no(text_blur,gray_blur) 270 | print("Card_Number: ",card_number) 271 | 272 | 273 | card_expiry= find_c_exp(text,image) 274 | if not card_expiry: 275 | card_expiry= find_c_exp(text_thresh,gray_thresh) 276 | if not card_expiry: 277 | card_expiry= find_c_exp(text_blur,gray_blur) 278 | print("Card_Expiry: ",card_expiry) 279 | 280 | card_name= find_c_name(text) 281 | if not card_name: 282 | card_name= find_c_name(text_thresh) 283 | if not card_expiry: 284 | card_name= find_c_name(text_blur) 285 | print("Card_Name: ",card_name) 286 | 287 | #os.remove(filename_thresh) 288 | #os.remove(filename_blur) 289 | 290 | 291 | 292 | 293 | # driver 294 | if __name__ == '__main__': 295 | loc="C:\\Users\\Man$\\Desktop\\Cards\\card2.png" 296 | quality_check(loc) 297 | find_details(loc) 298 | --------------------------------------------------------------------------------