├── paper └── text_detection.pdf └── src ├── 036.jpg ├── 1.png ├── image.jpg ├── lena.jpg ├── sofsign.jpg └── swt_new.py /paper/text_detection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/paper/text_detection.pdf -------------------------------------------------------------------------------- /src/036.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/src/036.jpg -------------------------------------------------------------------------------- /src/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/src/1.png -------------------------------------------------------------------------------- /src/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/src/image.jpg -------------------------------------------------------------------------------- /src/lena.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/src/lena.jpg -------------------------------------------------------------------------------- /src/sofsign.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developer-aj/TextDetection/HEAD/src/sofsign.jpg -------------------------------------------------------------------------------- /src/swt_new.py: -------------------------------------------------------------------------------- 1 | """ 2 | swt Preforms stoke width transform on input image 3 | A novel image operator that seeks to find the value of stroke width 4 | for each image pixel. It's use is meant for the task of text 5 | detection in natural images. 6 | 7 | im = RGB input image of size m x n x 3 8 | searchDirection = gradient direction is either 1 to detect dark text on light 9 | background or -1 to detect light text on dark background. 10 | 11 | swtMap = resulting mapping of stroke withs for image pixels 12 | """ 13 | 14 | from numpy import * 15 | import numpy 16 | import math 17 | import cv2 18 | import cv2.cv as cv 19 | from decimal import * 20 | from matplotlib import pyplot as plt 21 | 22 | def swt(name, searchDirection): 23 | 24 | src = cv2.imread(name) 25 | 26 | # gray image 27 | imgray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) 28 | #cv2.imwrite("gray.jpg", imgray) 29 | #cv2.imshow("gray", imgray) 30 | """ 31 | # detecting MSER regions 32 | vis = src.copy() 33 | mser = cv2.MSER() 34 | regions = mser.detect(imgray, None) 35 | hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions] 36 | cv2.polylines(vis, hulls, 1, (0, 255, 0)) 37 | #print regions 38 | cv2.imwrite("mser.jpg", vis) 39 | """ 40 | # initialize 41 | edgePointRows = [] 42 | edgePointCols = [] 43 | height = size(imgray, 0) 44 | width = size(imgray, 1) 45 | pixels = height*width 46 | 47 | # Find edges using canny edge detector 48 | edgeMap = cv2.Canny(imgray, 100, 300) 49 | #cv2.imwrite("canny.jpg", edgeMap) 50 | 51 | """ vis = cv2.cvtColor(vis, cv2.COLOR_BGR2GRAY) 52 | final = vis & edgeMap 53 | cv2.imwrite("fin.jpg", final)""" 54 | 55 | # Get all edge pixel positions 56 | for row in range(height): 57 | for col in range(width): 58 | if(edgeMap[row][col] > 0): 59 | edgePointRows.append(row) 60 | edgePointCols.append(col) 61 | 62 | # Find horizontal & vertical gradients 63 | dx = cv2.Sobel(imgray, cv2.CV_32F, 1, 0, ksize = 3, scale = -1, delta = 1,borderType = cv2.BORDER_DEFAULT) 64 | dy = cv2.Sobel(imgray, cv2.CV_32F, 0, 1, ksize = 3, scale = -1, delta = 1,borderType = cv2.BORDER_DEFAULT) 65 | 66 | """(minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(dx) 67 | #ret,dx = cv2.threshold(dx, 255.0/(maxVal - minVal), -minVal * 255.0/(maxVal - minVal), cv2.THRESH_BINARY) 68 | dxd = cv2.convertTo(dx, CV_8U, 255.0/(maxVal - minVal), -minVal * 255.0/(maxVal - minVal)) 69 | (minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(dy) 70 | #ret,dy = cv2.threshold(dy, 255.0/(maxVal - minVal), -minVal * 255.0/(maxVal - minVal), cv2.THRESH_BINARY) 71 | """ 72 | 73 | #cv2.imwrite("gradx.jpg", dx) 74 | #cv2.imwrite("grady.jpg", dy) 75 | 76 | # initializing matrix of gradient direction 77 | theta = zeros(imgray.shape, uint8) 78 | 79 | # calculating theta 80 | for row in range(height): 81 | for col in range(width): 82 | if(edgeMap[row][col] > 0): 83 | theta[row][col] = math.atan2(dy[row][col], dx[row][col]) 84 | #cv2.imwrite("theta.jpg", theta) 85 | print theta 86 | 87 | # initializing stroke width array with infinity 88 | swtMap = 255*ones(imgray.shape, uint8) 89 | #print swtMap 90 | 91 | # Set the maximum stroke width. this number is variable for now but must be 92 | # made to be more dynamic in the future 93 | maxStrokeWidth = 350 94 | 95 | # Initialize container for all stroke points found 96 | strokePointsX = zeros(size(edgePointCols)) 97 | strokePointsY = zeros(size(strokePointsX)) 98 | sizeOfStrokePoints = 0 99 | #print len(strokePointsX), len(strokePointsY) 100 | 101 | # Iterate through all edge points and compute stoke widths 102 | for i in range(size(edgePointRows)): 103 | step = 1 104 | initialX = edgePointRows[i] 105 | initialY = edgePointCols[i] 106 | isStroke = 0 107 | initialTheta = theta[initialX][initialY] 108 | sizeOfRay = 0 109 | pointOfRayX = array(random.randint(0, 1, maxStrokeWidth)) 110 | pointOfRayY = array(random.randint(0, 1, maxStrokeWidth)) 111 | 112 | # record first point of the ray 113 | pointOfRayX[sizeOfRay] = initialX 114 | pointOfRayY[sizeOfRay] = initialY 115 | 116 | # increase the size of the ray 117 | sizeOfRay += 1 118 | 119 | # follow the ray 120 | while step < maxStrokeWidth: 121 | nextX = numpy.round(initialX + cos(initialTheta) * searchDirection * step) 122 | nextY = numpy.round(initialY + sin(initialTheta) * searchDirection * step) 123 | 124 | step += 1 125 | 126 | # Break loop if out of bounds. For some reason this is really slow. 127 | if (nextX < 0 or nextY < 0 or nextX >= height or nextY >= width): 128 | break 129 | 130 | # record next point of the ray 131 | pointOfRayX[sizeOfRay] = nextX 132 | pointOfRayY[sizeOfRay] = nextY 133 | 134 | # increase size of the ray 135 | sizeOfRay += 1 136 | 137 | # another edge pixel has been found 138 | if edgeMap[nextX][nextY]: 139 | oppositeTheta = theta[nextX][nextY] 140 | 141 | # gradient direction roughly opposite 142 | if (oppositeTheta >= (-initialTheta - pi/6) or oppositeTheta <= (-initialTheta + pi/6)) : 143 | isStroke = 1 144 | strokePointsX[sizeOfStrokePoints] = initialX 145 | strokePointsY[sizeOfStrokePoints] = initialY 146 | sizeOfStrokePoints += 1 147 | #print "." 148 | 149 | break 150 | 151 | # edge pixel is part of stroke 152 | if isStroke : 153 | 154 | # calculate stroke width 155 | strokeWidth = math.sqrt((nextX - initialX)**2 + (nextY - initialY)**2) 156 | 157 | # iterate all ray points and populate with minimum stroke width 158 | for j in range(sizeOfRay): 159 | swtMap[pointOfRayX[j]][pointOfRayY[j]] = min(swtMap[pointOfRayX[j]][pointOfRayY[j]], strokeWidth) 160 | 161 | # writing images 162 | #cv2.imwrite("swt_pass1.jpg", swtMap) 163 | 164 | # Iterate through all stoke points for a refinement pass. 165 | # Refer to figure 4b in the paper. 166 | for i in range(sizeOfStrokePoints): 167 | step = 1 168 | initialX = strokePointsX[i] 169 | initialY = strokePointsY[i] 170 | initialTheta = theta[initialX][initialY] 171 | sizeOfRay = 0 172 | pointOfRayX = array(random.randint(0, 1, maxStrokeWidth)) 173 | pointOfRayY = array(random.randint(0, 1, maxStrokeWidth)) 174 | swtValues = array(random.randint(0, 1, maxStrokeWidth)) 175 | sizeOfSWTValues = 0 176 | 177 | # record first point of the ray 178 | pointOfRayX[sizeOfRay] = initialX 179 | pointOfRayY[sizeOfRay] = initialY 180 | 181 | # increase the size of the ray 182 | sizeOfRay += 1 183 | 184 | # record the swt value of the first stroke point 185 | swtValues[sizeOfSWTValues] = swtMap[initialX][initialY] 186 | sizeOfSWTValues += 1 187 | 188 | 189 | # follow the ray 190 | while step < maxStrokeWidth: 191 | nextX = round(initialX + cos(initialTheta) * searchDirection * step) 192 | nextY = round(initialY + sin(initialTheta) * searchDirection * step) 193 | 194 | step += 1 195 | 196 | # record next point of the ray 197 | pointOfRayX[sizeOfRay] = nextX 198 | pointOfRayY[sizeOfRay] = nextY 199 | 200 | # increase size of the ray 201 | sizeOfRay += 1 202 | 203 | # record the swt value of next stroke point 204 | swtValues[sizeOfSWTValues] = swtMap[nextX][nextY] 205 | sizeOfSWTValues += 1 206 | 207 | # another edge pixel has been found 208 | if edgeMap[nextX][nextY]: 209 | break 210 | 211 | # calculate stroke width as the median value of all swtValues seen 212 | strokeWidth = median(swtValues[0:sizeOfSWTValues]) 213 | 214 | # Iterate all ray points and populate with the minimum stroke width 215 | for j in range(sizeOfRay): 216 | swtMap[pointOfRayX[j]][pointOfRayY[j]] = min(swtMap[pointOfRayX[j]][pointOfRayY[j]], strokeWidth) 217 | 218 | #cv2.imwrite("swt_pass2.jpg", swtMap) 219 | titles = ['ORIG', 'Canny', 'GradientX', 'GradientY', 'Theta','swt'] 220 | images = [src, edgeMap, dx, dy, theta, swtMap] 221 | 222 | for i in xrange(6): 223 | plt.subplot(2,3,i+1),plt.imshow(images[i],'gray') 224 | plt.title(titles[i]) 225 | plt.xticks([]),plt.yticks([]) 226 | 227 | plt.show() 228 | cv2.imwrite("dx.jpg", dx) 229 | print sizeOfStrokePoints 230 | 231 | swt("036.jpg", -1) 232 | --------------------------------------------------------------------------------