├── DocsAndPresentation └── KNN_Overview.png ├── GenData.py ├── TrainAndTest.py ├── classifications.txt ├── flattened_images.txt ├── readme.txt ├── test1.png ├── test2.png ├── test3.png ├── training_chars.png └── training_chars_small_test.png /DocsAndPresentation/KNN_Overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/DocsAndPresentation/KNN_Overview.png -------------------------------------------------------------------------------- /GenData.py: -------------------------------------------------------------------------------- 1 | # GenData.py 2 | 3 | import sys 4 | import numpy as np 5 | import cv2 6 | import os 7 | 8 | # module level variables ########################################################################## 9 | MIN_CONTOUR_AREA = 100 10 | 11 | RESIZED_IMAGE_WIDTH = 20 12 | RESIZED_IMAGE_HEIGHT = 30 13 | 14 | ################################################################################################### 15 | def main(): 16 | imgTrainingNumbers = cv2.imread("training_chars.png") # read in training numbers image 17 | 18 | if imgTrainingNumbers is None: # if image was not read successfully 19 | print "error: image not read from file \n\n" # print error message to std out 20 | os.system("pause") # pause so user can see error message 21 | return # and exit function (which exits program) 22 | # end if 23 | 24 | imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image 25 | imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur 26 | 27 | # filter image from grayscale to black and white 28 | imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image 29 | 255, # make pixels that pass the threshold full white 30 | cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results 31 | cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black 32 | 11, # size of a pixel neighborhood used to calculate threshold value 33 | 2) # constant subtracted from the mean or weighted mean 34 | 35 | cv2.imshow("imgThresh", imgThresh) # show threshold image for reference 36 | 37 | imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image 38 | 39 | imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours 40 | cv2.RETR_EXTERNAL, # retrieve the outermost contours only 41 | cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points 42 | 43 | # declare empty numpy array, we will use this to write to file later 44 | # zero rows, enough cols to hold all image data 45 | npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) 46 | 47 | intClassifications = [] # declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end 48 | 49 | # possible chars we are interested in are digits 0 through 9, put these in list intValidChars 50 | intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'), 51 | ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'), 52 | ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'), 53 | ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z')] 54 | 55 | for npaContour in npaContours: # for each contour 56 | if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA: # if contour is big enough to consider 57 | [intX, intY, intW, intH] = cv2.boundingRect(npaContour) # get and break out bounding rect 58 | 59 | # draw rectangle around each contour as we ask user for input 60 | cv2.rectangle(imgTrainingNumbers, # draw rectangle on original training image 61 | (intX, intY), # upper left corner 62 | (intX+intW,intY+intH), # lower right corner 63 | (0, 0, 255), # red 64 | 2) # thickness 65 | 66 | imgROI = imgThresh[intY:intY+intH, intX:intX+intW] # crop char out of threshold image 67 | imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage 68 | 69 | cv2.imshow("imgROI", imgROI) # show cropped out char for reference 70 | cv2.imshow("imgROIResized", imgROIResized) # show resized image for reference 71 | cv2.imshow("training_numbers.png", imgTrainingNumbers) # show training numbers image, this will now have red rectangles drawn on it 72 | 73 | intChar = cv2.waitKey(0) # get key press 74 | 75 | if intChar == 27: # if esc key was pressed 76 | sys.exit() # exit program 77 | elif intChar in intValidChars: # else if the char is in the list of chars we are looking for . . . 78 | 79 | intClassifications.append(intChar) # append classification char to integer list of chars (we will convert to float later before writing to file) 80 | 81 | npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image to 1d numpy array so we can write to file later 82 | npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0) # add current flattened impage numpy array to list of flattened image numpy arrays 83 | # end if 84 | # end if 85 | # end for 86 | 87 | fltClassifications = np.array(intClassifications, np.float32) # convert classifications list of ints to numpy array of floats 88 | 89 | npaClassifications = fltClassifications.reshape((fltClassifications.size, 1)) # flatten numpy array of floats to 1d so we can write to file later 90 | 91 | print "\n\ntraining complete !!\n" 92 | 93 | np.savetxt("classifications.txt", npaClassifications) # write flattened images to file 94 | np.savetxt("flattened_images.txt", npaFlattenedImages) # 95 | 96 | cv2.destroyAllWindows() # remove windows from memory 97 | 98 | return 99 | 100 | ################################################################################################### 101 | if __name__ == "__main__": 102 | main() 103 | # end if 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /TrainAndTest.py: -------------------------------------------------------------------------------- 1 | # TrainAndTest.py 2 | 3 | import cv2 4 | import numpy as np 5 | import operator 6 | import os 7 | 8 | # module level variables ########################################################################## 9 | MIN_CONTOUR_AREA = 100 10 | 11 | RESIZED_IMAGE_WIDTH = 20 12 | RESIZED_IMAGE_HEIGHT = 30 13 | 14 | ################################################################################################### 15 | class ContourWithData(): 16 | 17 | # member variables ############################################################################ 18 | npaContour = None # contour 19 | boundingRect = None # bounding rect for contour 20 | intRectX = 0 # bounding rect top left corner x location 21 | intRectY = 0 # bounding rect top left corner y location 22 | intRectWidth = 0 # bounding rect width 23 | intRectHeight = 0 # bounding rect height 24 | fltArea = 0.0 # area of contour 25 | 26 | def calculateRectTopLeftPointAndWidthAndHeight(self): # calculate bounding rect info 27 | [intX, intY, intWidth, intHeight] = self.boundingRect 28 | self.intRectX = intX 29 | self.intRectY = intY 30 | self.intRectWidth = intWidth 31 | self.intRectHeight = intHeight 32 | 33 | def checkIfContourIsValid(self): # this is oversimplified, for a production grade program 34 | if self.fltArea < MIN_CONTOUR_AREA: return False # much better validity checking would be necessary 35 | return True 36 | 37 | ################################################################################################### 38 | def main(): 39 | allContoursWithData = [] # declare empty lists, 40 | validContoursWithData = [] # we will fill these shortly 41 | 42 | try: 43 | npaClassifications = np.loadtxt("classifications.txt", np.float32) # read in training classifications 44 | except: 45 | print "error, unable to open classifications.txt, exiting program\n" 46 | os.system("pause") 47 | return 48 | # end try 49 | 50 | try: 51 | npaFlattenedImages = np.loadtxt("flattened_images.txt", np.float32) # read in training images 52 | except: 53 | print "error, unable to open flattened_images.txt, exiting program\n" 54 | os.system("pause") 55 | return 56 | # end try 57 | 58 | npaClassifications = npaClassifications.reshape((npaClassifications.size, 1)) # reshape numpy array to 1d, necessary to pass to call to train 59 | 60 | kNearest = cv2.ml.KNearest_create() # instantiate KNN object 61 | 62 | kNearest.train(npaFlattenedImages, cv2.ml.ROW_SAMPLE, npaClassifications) 63 | 64 | imgTestingNumbers = cv2.imread("test1.png") # read in testing numbers image 65 | 66 | if imgTestingNumbers is None: # if image was not read successfully 67 | print "error: image not read from file \n\n" # print error message to std out 68 | os.system("pause") # pause so user can see error message 69 | return # and exit function (which exits program) 70 | # end if 71 | 72 | imgGray = cv2.cvtColor(imgTestingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image 73 | imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur 74 | 75 | # filter image from grayscale to black and white 76 | imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image 77 | 255, # make pixels that pass the threshold full white 78 | cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results 79 | cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black 80 | 11, # size of a pixel neighborhood used to calculate threshold value 81 | 2) # constant subtracted from the mean or weighted mean 82 | 83 | imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image 84 | 85 | imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours 86 | cv2.RETR_EXTERNAL, # retrieve the outermost contours only 87 | cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points 88 | 89 | for npaContour in npaContours: # for each contour 90 | contourWithData = ContourWithData() # instantiate a contour with data object 91 | contourWithData.npaContour = npaContour # assign contour to contour with data 92 | contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour) # get the bounding rect 93 | contourWithData.calculateRectTopLeftPointAndWidthAndHeight() # get bounding rect info 94 | contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour) # calculate the contour area 95 | allContoursWithData.append(contourWithData) # add contour with data object to list of all contours with data 96 | # end for 97 | 98 | for contourWithData in allContoursWithData: # for all contours 99 | if contourWithData.checkIfContourIsValid(): # check if valid 100 | validContoursWithData.append(contourWithData) # if so, append to valid contour list 101 | # end if 102 | # end for 103 | 104 | validContoursWithData.sort(key = operator.attrgetter("intRectX")) # sort contours from left to right 105 | 106 | strFinalString = "" # declare final string, this will have the final number sequence by the end of the program 107 | 108 | for contourWithData in validContoursWithData: # for each contour 109 | # draw a green rect around the current char 110 | cv2.rectangle(imgTestingNumbers, # draw rectangle on original testing image 111 | (contourWithData.intRectX, contourWithData.intRectY), # upper left corner 112 | (contourWithData.intRectX + contourWithData.intRectWidth, contourWithData.intRectY + contourWithData.intRectHeight), # lower right corner 113 | (0, 255, 0), # green 114 | 2) # thickness 115 | 116 | imgROI = imgThresh[contourWithData.intRectY : contourWithData.intRectY + contourWithData.intRectHeight, # crop char out of threshold image 117 | contourWithData.intRectX : contourWithData.intRectX + contourWithData.intRectWidth] 118 | 119 | imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage 120 | 121 | npaROIResized = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image into 1d numpy array 122 | 123 | npaROIResized = np.float32(npaROIResized) # convert from 1d numpy array of ints to 1d numpy array of floats 124 | 125 | retval, npaResults, neigh_resp, dists = kNearest.findNearest(npaROIResized, k = 1) # call KNN function find_nearest 126 | 127 | strCurrentChar = str(chr(int(npaResults[0][0]))) # get character from results 128 | 129 | strFinalString = strFinalString + strCurrentChar # append current char to full string 130 | # end for 131 | 132 | print "\n" + strFinalString + "\n" # show the full string 133 | 134 | cv2.imshow("imgTestingNumbers", imgTestingNumbers) # show input image with green boxes drawn around found digits 135 | cv2.waitKey(0) # wait for user key press 136 | 137 | cv2.destroyAllWindows() # remove windows from memory 138 | 139 | return 140 | 141 | ################################################################################################### 142 | if __name__ == "__main__": 143 | main() 144 | # end if 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /classifications.txt: -------------------------------------------------------------------------------- 1 | 9.000000000000000000e+01 2 | 8.400000000000000000e+01 3 | 8.200000000000000000e+01 4 | 8.000000000000000000e+01 5 | 7.700000000000000000e+01 6 | 7.000000000000000000e+01 7 | 6.900000000000000000e+01 8 | 6.800000000000000000e+01 9 | 6.600000000000000000e+01 10 | 6.500000000000000000e+01 11 | 8.900000000000000000e+01 12 | 8.800000000000000000e+01 13 | 8.700000000000000000e+01 14 | 8.600000000000000000e+01 15 | 8.500000000000000000e+01 16 | 8.300000000000000000e+01 17 | 8.100000000000000000e+01 18 | 7.900000000000000000e+01 19 | 7.800000000000000000e+01 20 | 7.600000000000000000e+01 21 | 7.500000000000000000e+01 22 | 7.400000000000000000e+01 23 | 7.300000000000000000e+01 24 | 7.200000000000000000e+01 25 | 7.100000000000000000e+01 26 | 6.700000000000000000e+01 27 | 5.700000000000000000e+01 28 | 5.600000000000000000e+01 29 | 5.500000000000000000e+01 30 | 5.400000000000000000e+01 31 | 5.300000000000000000e+01 32 | 5.100000000000000000e+01 33 | 5.000000000000000000e+01 34 | 4.800000000000000000e+01 35 | 5.200000000000000000e+01 36 | 4.900000000000000000e+01 37 | 8.200000000000000000e+01 38 | 8.000000000000000000e+01 39 | 7.700000000000000000e+01 40 | 6.800000000000000000e+01 41 | 6.600000000000000000e+01 42 | 6.500000000000000000e+01 43 | 9.000000000000000000e+01 44 | 8.900000000000000000e+01 45 | 8.800000000000000000e+01 46 | 8.700000000000000000e+01 47 | 8.600000000000000000e+01 48 | 8.500000000000000000e+01 49 | 8.400000000000000000e+01 50 | 8.300000000000000000e+01 51 | 8.100000000000000000e+01 52 | 7.900000000000000000e+01 53 | 7.800000000000000000e+01 54 | 7.600000000000000000e+01 55 | 7.500000000000000000e+01 56 | 7.400000000000000000e+01 57 | 7.300000000000000000e+01 58 | 7.200000000000000000e+01 59 | 7.100000000000000000e+01 60 | 7.000000000000000000e+01 61 | 6.900000000000000000e+01 62 | 6.700000000000000000e+01 63 | 5.200000000000000000e+01 64 | 4.900000000000000000e+01 65 | 5.700000000000000000e+01 66 | 5.600000000000000000e+01 67 | 5.500000000000000000e+01 68 | 5.400000000000000000e+01 69 | 5.300000000000000000e+01 70 | 5.100000000000000000e+01 71 | 5.000000000000000000e+01 72 | 4.800000000000000000e+01 73 | 9.000000000000000000e+01 74 | 8.900000000000000000e+01 75 | 8.800000000000000000e+01 76 | 8.700000000000000000e+01 77 | 8.600000000000000000e+01 78 | 8.500000000000000000e+01 79 | 8.400000000000000000e+01 80 | 8.300000000000000000e+01 81 | 8.200000000000000000e+01 82 | 8.100000000000000000e+01 83 | 8.000000000000000000e+01 84 | 7.900000000000000000e+01 85 | 7.800000000000000000e+01 86 | 7.700000000000000000e+01 87 | 7.600000000000000000e+01 88 | 7.500000000000000000e+01 89 | 7.400000000000000000e+01 90 | 7.300000000000000000e+01 91 | 7.200000000000000000e+01 92 | 7.100000000000000000e+01 93 | 7.000000000000000000e+01 94 | 6.900000000000000000e+01 95 | 6.800000000000000000e+01 96 | 6.700000000000000000e+01 97 | 6.600000000000000000e+01 98 | 6.500000000000000000e+01 99 | 5.700000000000000000e+01 100 | 5.600000000000000000e+01 101 | 5.500000000000000000e+01 102 | 5.400000000000000000e+01 103 | 5.300000000000000000e+01 104 | 5.200000000000000000e+01 105 | 5.100000000000000000e+01 106 | 5.000000000000000000e+01 107 | 4.900000000000000000e+01 108 | 4.800000000000000000e+01 109 | 9.000000000000000000e+01 110 | 8.900000000000000000e+01 111 | 8.800000000000000000e+01 112 | 8.700000000000000000e+01 113 | 8.600000000000000000e+01 114 | 8.500000000000000000e+01 115 | 8.400000000000000000e+01 116 | 8.200000000000000000e+01 117 | 8.000000000000000000e+01 118 | 7.800000000000000000e+01 119 | 7.700000000000000000e+01 120 | 7.600000000000000000e+01 121 | 7.500000000000000000e+01 122 | 7.400000000000000000e+01 123 | 7.300000000000000000e+01 124 | 7.200000000000000000e+01 125 | 7.000000000000000000e+01 126 | 6.900000000000000000e+01 127 | 6.800000000000000000e+01 128 | 6.600000000000000000e+01 129 | 6.500000000000000000e+01 130 | 8.300000000000000000e+01 131 | 8.100000000000000000e+01 132 | 7.900000000000000000e+01 133 | 7.100000000000000000e+01 134 | 6.700000000000000000e+01 135 | 5.500000000000000000e+01 136 | 5.100000000000000000e+01 137 | 5.000000000000000000e+01 138 | 4.900000000000000000e+01 139 | 5.700000000000000000e+01 140 | 5.600000000000000000e+01 141 | 5.400000000000000000e+01 142 | 5.300000000000000000e+01 143 | 5.200000000000000000e+01 144 | 4.800000000000000000e+01 145 | 8.200000000000000000e+01 146 | 8.000000000000000000e+01 147 | 7.700000000000000000e+01 148 | 6.800000000000000000e+01 149 | 6.600000000000000000e+01 150 | 6.500000000000000000e+01 151 | 9.000000000000000000e+01 152 | 8.900000000000000000e+01 153 | 8.800000000000000000e+01 154 | 8.700000000000000000e+01 155 | 8.600000000000000000e+01 156 | 8.500000000000000000e+01 157 | 8.400000000000000000e+01 158 | 8.300000000000000000e+01 159 | 8.100000000000000000e+01 160 | 7.900000000000000000e+01 161 | 7.800000000000000000e+01 162 | 7.600000000000000000e+01 163 | 7.500000000000000000e+01 164 | 7.400000000000000000e+01 165 | 7.300000000000000000e+01 166 | 7.200000000000000000e+01 167 | 7.100000000000000000e+01 168 | 7.000000000000000000e+01 169 | 6.900000000000000000e+01 170 | 6.700000000000000000e+01 171 | 5.500000000000000000e+01 172 | 5.300000000000000000e+01 173 | 5.200000000000000000e+01 174 | 5.700000000000000000e+01 175 | 5.600000000000000000e+01 176 | 5.400000000000000000e+01 177 | 5.100000000000000000e+01 178 | 5.000000000000000000e+01 179 | 4.900000000000000000e+01 180 | 4.800000000000000000e+01 181 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | The video pretty much explains it all: 2 | https://www.youtube.com/watch?v=c96w1JS28AY 3 | -------------------------------------------------------------------------------- /test1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/test1.png -------------------------------------------------------------------------------- /test2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/test2.png -------------------------------------------------------------------------------- /test3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/test3.png -------------------------------------------------------------------------------- /training_chars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/training_chars.png -------------------------------------------------------------------------------- /training_chars_small_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrocontrollersAndMore/OpenCV_3_KNN_Character_Recognition_Python/97c0305db7211cd335b499aa1ee84f7884e6e44f/training_chars_small_test.png --------------------------------------------------------------------------------