├── __init__.py ├── .gitignore ├── .DS_Store ├── SimonLucas.jpg ├── lib ├── blank.png ├── __init__.py ├── localize.py ├── profiler.py ├── textCropper.py ├── fastConnectedComponents.pyx ├── fastRay.pyx ├── textLocalizer.py ├── letterCombinator.py ├── connected_components.py └── swt.py ├── test ├── .DS_Store ├── images │ ├── N.jpg │ ├── 036.jpg │ ├── stop.jpg │ ├── billboard.jpg │ ├── elevator.JPG │ ├── epic_ave.jpg │ ├── fallout_5.jpg │ ├── fallout_6.jpg │ ├── sofsign.jpg │ ├── stopsign.jpg │ ├── traffic.jpg │ ├── conditions.JPG │ ├── rab_butler.JPG │ ├── text_b_on_w.jpg │ ├── text_w_on_b.jpg │ ├── 22_washington.JPG │ ├── caution_laser.jpg │ ├── emergency_exit.jpg │ ├── emergency_stop.JPG │ ├── memorex_CD_R.JPG │ ├── panasync_e70.JPG │ ├── postgrad_study.JPG │ ├── race_for_life.JPG │ ├── tesco_liquid.JPG │ ├── caution_laser_2.jpg │ ├── fallout_shelter.jpg │ ├── traffic_cropped.jpeg │ ├── billboard-cropped.jpg │ ├── ComputerScienceSmall.jpg │ ├── howard_jacobson_redback.JPG │ ├── litter_colchester_borough.JPG │ ├── first_eastern_national_bus_times.JPG │ ├── garden_path_at_giverney_claude_monet.JPG │ └── uk_dance_prototype_inspired_records.JPG ├── test_connectedComponent_with_swt.py ├── test_tessaract.py ├── test_cropper.py ├── test_text_localizer.py ├── test_swt.py └── test_letter_combinator.py ├── results ├── billboard_swt.png ├── sofsign_swt.png └── swt_img_benchmarks.txt ├── README.md └── setup.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | lib/*.c 4 | lib/*.so 5 | lib/*.pyc 6 | .DS_Store -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/.DS_Store -------------------------------------------------------------------------------- /SimonLucas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/SimonLucas.jpg -------------------------------------------------------------------------------- /lib/blank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/lib/blank.png -------------------------------------------------------------------------------- /test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/.DS_Store -------------------------------------------------------------------------------- /test/images/N.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/N.jpg -------------------------------------------------------------------------------- /test/images/036.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/036.jpg -------------------------------------------------------------------------------- /test/images/stop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/stop.jpg -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["swt", "connected_components", "textCropper", "textLocalizer", "letterCombinator"] -------------------------------------------------------------------------------- /results/billboard_swt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/results/billboard_swt.png -------------------------------------------------------------------------------- /results/sofsign_swt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/results/sofsign_swt.png -------------------------------------------------------------------------------- /test/images/billboard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/billboard.jpg -------------------------------------------------------------------------------- /test/images/elevator.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/elevator.JPG -------------------------------------------------------------------------------- /test/images/epic_ave.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/epic_ave.jpg -------------------------------------------------------------------------------- /test/images/fallout_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/fallout_5.jpg -------------------------------------------------------------------------------- /test/images/fallout_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/fallout_6.jpg -------------------------------------------------------------------------------- /test/images/sofsign.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/sofsign.jpg -------------------------------------------------------------------------------- /test/images/stopsign.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/stopsign.jpg -------------------------------------------------------------------------------- /test/images/traffic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/traffic.jpg -------------------------------------------------------------------------------- /test/images/conditions.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/conditions.JPG -------------------------------------------------------------------------------- /test/images/rab_butler.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/rab_butler.JPG -------------------------------------------------------------------------------- /test/images/text_b_on_w.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/text_b_on_w.jpg -------------------------------------------------------------------------------- /test/images/text_w_on_b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/text_w_on_b.jpg -------------------------------------------------------------------------------- /test/images/22_washington.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/22_washington.JPG -------------------------------------------------------------------------------- /test/images/caution_laser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/caution_laser.jpg -------------------------------------------------------------------------------- /test/images/emergency_exit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/emergency_exit.jpg -------------------------------------------------------------------------------- /test/images/emergency_stop.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/emergency_stop.JPG -------------------------------------------------------------------------------- /test/images/memorex_CD_R.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/memorex_CD_R.JPG -------------------------------------------------------------------------------- /test/images/panasync_e70.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/panasync_e70.JPG -------------------------------------------------------------------------------- /test/images/postgrad_study.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/postgrad_study.JPG -------------------------------------------------------------------------------- /test/images/race_for_life.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/race_for_life.JPG -------------------------------------------------------------------------------- /test/images/tesco_liquid.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/tesco_liquid.JPG -------------------------------------------------------------------------------- /test/images/caution_laser_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/caution_laser_2.jpg -------------------------------------------------------------------------------- /test/images/fallout_shelter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/fallout_shelter.jpg -------------------------------------------------------------------------------- /test/images/traffic_cropped.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/traffic_cropped.jpeg -------------------------------------------------------------------------------- /test/images/billboard-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/billboard-cropped.jpg -------------------------------------------------------------------------------- /test/images/ComputerScienceSmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/ComputerScienceSmall.jpg -------------------------------------------------------------------------------- /test/images/howard_jacobson_redback.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/howard_jacobson_redback.JPG -------------------------------------------------------------------------------- /test/images/litter_colchester_borough.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/litter_colchester_borough.JPG -------------------------------------------------------------------------------- /test/images/first_eastern_national_bus_times.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/first_eastern_national_bus_times.JPG -------------------------------------------------------------------------------- /test/images/garden_path_at_giverney_claude_monet.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/garden_path_at_giverney_claude_monet.JPG -------------------------------------------------------------------------------- /test/images/uk_dance_prototype_inspired_records.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlintz/StrokeWidthTransform/HEAD/test/images/uk_dance_prototype_inspired_records.JPG -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | StrokeWidthTransform 2 | ==================== 3 | 4 | SWT implemented in Python 5 | 6 | To build package run the following from the StrokeWidthTranform directory 7 | ``` 8 | OPT="-O3 -ffast-math" python setup.py build_ext -i 9 | ``` 10 | 11 | To use package 12 | ``` 13 | from StrokeWidthTransform import swt 14 | ``` -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | setup( 5 | cmdclass = {'build_ext': build_ext}, 6 | ext_modules = [ 7 | Extension("lib.fastRay", 8 | ["lib/fastRay.pyx"], 9 | ), 10 | Extension("lib.fastConnectedComponents", 11 | ['lib/fastConnectedComponents.pyx'], 12 | ) 13 | ]) -------------------------------------------------------------------------------- /lib/localize.py: -------------------------------------------------------------------------------- 1 | import letterCombinator as lc 2 | import connected_components as cc 3 | from swt import swt 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | import cv2 7 | import math, random 8 | from profiler import * 9 | import itertools 10 | import textLocalizer as tl 11 | 12 | def generateSwt(img): 13 | img = cv2.imread('test/images/'+img+'.jpg', 0) 14 | ccImg = tl.TextLocalizer.findLetterChains(img, 1) 15 | plt.subplot(2,1,1) 16 | plt.imshow(ccImg) 17 | 18 | ccImg = tl.TextLocalizer.findLetterChains(img, -1) 19 | plt.subplot(2,1,2) 20 | plt.imshow(ccImg) 21 | plt.show() 22 | 23 | if __name__ == "__main__": 24 | if len(sys.argv) != 2: 25 | print "Usage: python localize.py " 26 | else: 27 | generateSwt(sys.argv[1]) 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/test_connectedComponent_with_swt.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.join(os.path.dirname(__file__), '../', 'lib')) 3 | import lib.swt as swt 4 | import lib.connected_components as cc 5 | import numpy as np 6 | from matplotlib import pyplot as plt 7 | import cv2 8 | import math 9 | from profiler import * 10 | 11 | @timeit 12 | def test_cc_with_swt(): 13 | img = cv2.imread('test/stopsign.jpg', 0) 14 | swt_pos = swt.strokeWidthTransform(img, 1) 15 | swt_pos_dilated = 255 - cv2.dilate(255 - swt_pos, kernel = np.ones((2,2),np.uint8), iterations = 2) 16 | regions = cc.connectComponents(swt_pos) 17 | ccImg = cc.connectedComponentsToImg(swt_pos, regions, img.shape[0], img.shape[1]) 18 | 19 | plt.imshow(ccImg) 20 | plt.show() 21 | 22 | if __name__ == "__main__": 23 | test_cc_with_swt() -------------------------------------------------------------------------------- /results/swt_img_benchmarks.txt: -------------------------------------------------------------------------------- 1 | Timing: 036.jpg 2 | multiprocess start 3 | multiprocess ran - Time Elapsed: 3.450937 s 4 | single process start 5 | single process ran - Time Elapsed: 2.654990 s 6 | 7 | Timing: billboard-cropped.jpg 8 | multiprocess start 9 | multiprocess ran - Time Elapsed: 0.327305 s 10 | single process start 11 | single process ran - Time Elapsed: 0.346880 s 12 | 13 | Timing: billboard.jpg 14 | multiprocess start 15 | multiprocess ran - Time Elapsed: 6.308556 s 16 | single process start 17 | single process ran - Time Elapsed: 7.686914 s 18 | 19 | Timing: sofsign.jpg 20 | multiprocess start 21 | multiprocess ran - Time Elapsed: 1.533666 s 22 | single process start 23 | single process ran - Time Elapsed: 1.427003 s 24 | 25 | Timing: traffic.jpg 26 | multiprocess start 27 | multiprocess ran - Time Elapsed: 0.438128 s 28 | single process start 29 | single process ran - Time Elapsed: 0.311239 s -------------------------------------------------------------------------------- /lib/profiler.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def timeit(method): 4 | 5 | def timed(*args, **kw): 6 | ts = time.time() 7 | result = method(*args, **kw) 8 | te = time.time() 9 | 10 | print '%2.2f sec' % \ 11 | (te-ts) 12 | return result 13 | 14 | return timed 15 | 16 | class Timer(object): 17 | def __init__(self): 18 | self.startTimes = {} 19 | self.stopTimes = {} 20 | 21 | def start(self, name): 22 | print name + " start" 23 | self.startTimes[name] = time.time() 24 | 25 | def stop(self, name): 26 | stopTime = time.time() 27 | print "%s ran - Time Elapsed: %f s" % (name, (stopTime - self.startTimes[name])) 28 | self.stopTimes[name] = stopTime 29 | 30 | def startOnce(self, name): 31 | if not self.startTimes.get(name, False): 32 | self.start(name) 33 | 34 | def stopOnce(self, name): 35 | if not self.stopTimes.get(name, False): 36 | self.stop(name) 37 | 38 | -------------------------------------------------------------------------------- /test/test_tessaract.py: -------------------------------------------------------------------------------- 1 | import Image 2 | import pytesseract 3 | import cv2 4 | import numpy as np 5 | 6 | cv2_images = [cv2.imread('images/fallout_6.jpg',0)] 7 | 8 | for image in cv2_images: 9 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) 10 | cl_image = clahe.apply(image) 11 | cl_image = np.uint8(cl_image*255) 12 | pil_im = Image.fromarray(cl_image) 13 | print pytesseract.image_to_string(pil_im, lang="eng") 14 | print "*******" 15 | 16 | 17 | 18 | #im = Image.fromarray(np.uint8(cm.gist_earth(myarray)*255)) 19 | 20 | 21 | img = cv2.imread('images/fallout_6.jpg',0) 22 | 23 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) 24 | cl1 = clahe.apply(img) 25 | cv2.imwrite('images/fallout_6_better.jpg', cl1) 26 | cv2_im = cv2.imread('images/fallout_6_better.jpg',0) 27 | pil_im = Image.fromarray(cv2_im) 28 | print pytesseract.image_to_string(pil_im, lang="eng") 29 | 30 | 31 | 32 | """ 33 | print ("******") 34 | print pytesseract.image_to_string(Image.open('images/fallout_5_better.jpg'), lang="eng") 35 | print ("******") 36 | print pytesseract.image_to_string(Image.open('images/fallout_6_better.jpg'), lang="eng") 37 | """ -------------------------------------------------------------------------------- /lib/textCropper.py: -------------------------------------------------------------------------------- 1 | import letterCombinator as lc 2 | import textLocalizer as tl 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | import cv2 6 | import math, random 7 | from profiler import * 8 | import itertools 9 | 10 | class TextCropper(object): 11 | @staticmethod 12 | def cropTextRegionsFromImage(img, threshold=0): 13 | """ returns an array of cropped images which likely contain text regions 14 | """ 15 | imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 16 | rows, cols = imgGray.shape 17 | localizer = tl.TextLocalizer() 18 | lines_pos = localizer.findLines(imgGray, 1, ['size', 'borders']) 19 | lines_neg = localizer.findLines(imgGray, -1, ['size', 'borders']) 20 | 21 | croppedRegions = [] 22 | for line in lines_pos: 23 | croppedRegions.append(TextCropper.getCroppedRegions(img, line, threshold)) 24 | for line in lines_neg: 25 | croppedRegions.append(TextCropper.getCroppedRegions(img, line, threshold)) 26 | return croppedRegions 27 | 28 | @staticmethod 29 | def getCroppedRegions(img, line, threshold): 30 | ((minY, minX), (maxY, maxX)) = line.bounds() 31 | return img[minY-threshold:maxY+threshold, minX-threshold:maxX+threshold] 32 | -------------------------------------------------------------------------------- /test/test_cropper.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.insert(0, '../') 3 | # sys.path.append(os.path.join(os.path.dirname(__file__), '../', 'lib')) 4 | import lib.textCropper as tc 5 | import numpy as np 6 | from matplotlib import pyplot as plt 7 | import cv2 8 | import math, random 9 | from lib.profiler import * 10 | import itertools 11 | import Image 12 | import pytesseract 13 | 14 | 15 | def find_text(img): 16 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 17 | 18 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) 19 | cl1 = clahe.apply(gray) 20 | cv2.imwrite('placeholder.jpg', cl1) 21 | cv2_im = cv2.imread('placeholder.jpg',0) 22 | pil_im = Image.fromarray(cv2_im) 23 | return pytesseract.image_to_string(pil_im, lang="eng") 24 | 25 | def test_text_cropper(): 26 | if len(sys.argv) > 1: 27 | image_name= sys.argv[1] 28 | else: 29 | image_name = "emergency_stop" 30 | img = cv2.imread('images/' + image_name + '.jpg') 31 | croppedRegions = tc.TextCropper.cropTextRegionsFromImage(img, 20) 32 | for i in range(len(croppedRegions)): 33 | plt.subplot(len(croppedRegions), 1, i+1) 34 | if croppedRegions[i].size != 0: 35 | b,g,r = cv2.split(croppedRegions[i]) 36 | rgbImg = cv2.merge([r,g,b]) 37 | # cv2.imwrite('fallout_'+str(i)+'.jpg', croppedRegions[i]) 38 | print find_text(rgbImg) 39 | plt.imshow(rgbImg) 40 | plt.show() 41 | 42 | if __name__ == "__main__": 43 | test_text_cropper() -------------------------------------------------------------------------------- /lib/fastConnectedComponents.pyx: -------------------------------------------------------------------------------- 1 | import Queue 2 | 3 | def bfs(img, int rows, int cols): 4 | # q = Queue.Queue() 5 | q = [] 6 | enqueued = {} 7 | tags = [] 8 | 9 | cdef int tag_count = 0 10 | cdef int x, y, child_x, child_y, i, j, color 11 | cdef float b_shade, n_shade 12 | 13 | for i in range(rows): 14 | for j in range(cols): 15 | first_pix = (i, j) 16 | if not first_pix in enqueued: 17 | tags.append([]) 18 | tags[tag_count] = [] 19 | q.append(first_pix) 20 | enqueued[first_pix] = True 21 | while len(q) > 0: 22 | [y,x] = q.pop(0) 23 | color = img[y, x] 24 | b_shade = compute_b_shade(color, 255.001) 25 | 26 | for pix in [(y,x-1), (y,x+1), (y-1,x), (y+1,x)]: 27 | child_y = pix[0] 28 | child_x = pix[1] 29 | if child_y >= 0 and child_y < rows and child_x >= 0 and child_x < cols: 30 | n_shade = compute_b_shade(img[child_y, child_x], 255.001) 31 | if are_neighbors(n_shade, b_shade): 32 | if not pix in enqueued: 33 | q.append(pix) 34 | enqueued[pix] = True 35 | tags[tag_count].append((y, x, color)) 36 | tag_count += 1 37 | return tags 38 | 39 | cdef float compute_b_shade(int color, float offset): 40 | return color * (-1) + offset 41 | 42 | cdef float compute_n_shade(int color, float offset): 43 | return color * (-1) + offset 44 | 45 | cdef int are_neighbors(float n_shade, float b_shade): 46 | if (n_shade/b_shade) < 3 and (n_shade/b_shade) > 0.33: 47 | return 1 48 | return 0 -------------------------------------------------------------------------------- /test/test_text_localizer.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.join(os.path.dirname(__file__), '../', 'lib')) 3 | import lib.swt as swt 4 | import lib.letterCombinator as lc 5 | import lib.textLocalizer as tl 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | import cv2 9 | import math, random 10 | from profiler import * 11 | import itertools 12 | 13 | def test_localizeText(): 14 | img = cv2.imread('test/images/fallout_shelter.jpg', 0) 15 | rows, cols = img.shape 16 | 17 | renderer = tl.LetterRenderer() 18 | localizer = tl.TextLocalizer() 19 | 20 | plt.subplot(2,1,1) 21 | new_img = np.zeros((rows, cols, 3), np.uint8) 22 | lines = localizer.findLines(img, 1, ['size', 'borders']) 23 | renderer.draw_word_lines(new_img, lines) 24 | plt.imshow(new_img) 25 | 26 | plt.subplot(2,1,2) 27 | new_img = np.zeros((rows, cols, 3), np.uint8) 28 | lines = localizer.findLines(img, -1, ['size', 'borders']) 29 | renderer.draw_word_lines(new_img, lines) 30 | plt.imshow(new_img) 31 | plt.show() 32 | 33 | def test_findLetters(): 34 | img = cv2.imread('test/images/emergency_stop.jpg', 0) 35 | rows, cols = img.shape 36 | new_img = np.zeros((rows, cols, 3), np.uint8) 37 | 38 | renderer = tl.LetterRenderer() 39 | localizer = tl.TextLocalizer() 40 | letters = localizer.findLetters(img, -1, ['size', 'borders']) 41 | 42 | renderer.draw_letters(new_img, letters) 43 | for letter in letters: 44 | renderer.draw_letter_rect(new_img, letter) 45 | plt.imshow(new_img) 46 | plt.show() 47 | 48 | def test_findLetterPairs(): 49 | img = cv2.imread('test/images/uk_dance_prototype_inspired_records.jpg', 0) 50 | rows, cols = img.shape 51 | new_img = np.zeros((rows, cols, 3), np.uint8) 52 | 53 | renderer = tl.LetterRenderer() 54 | localizer = tl.TextLocalizer() 55 | letters = localizer.findLetters(img, -1, ['size', 'borders']) 56 | 57 | letterPairs = lc.LetterCombinator.generateLetterPairs(letters) 58 | filteredLetterPairs = localizer.filterLetterPairs(letterPairs) 59 | 60 | letterChains = [lc.LetterChain.chainFromPair(pair) for pair in filteredLetterPairs] 61 | for chain in letterChains: 62 | renderer.draw_letter_rect(new_img, chain.chainToRegion()) 63 | plt.imshow(new_img) 64 | plt.show() 65 | 66 | if __name__ == "__main__": 67 | test_localizeText() -------------------------------------------------------------------------------- /test/test_swt.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.join(os.path.dirname(__file__), '../', 'swt')) 3 | import fastRay as fastRay 4 | from lib.swt import swt 5 | import numpy as np 6 | from matplotlib import pyplot as plt 7 | import cv2 8 | import math 9 | from profiler import * 10 | 11 | def test_profileSWT(): 12 | imgs = ['036.jpg', 'billboard-cropped.jpg', 13 | 'billboard.jpg', 'sofsign.jpg', 'traffic.jpg'] 14 | 15 | for imgname in imgs: 16 | img = cv2.imread('test/'+imgname,0) 17 | print 'Timing: ' + imgname 18 | swt_pos = swt.strokeWidthTransform(img, 1) 19 | 20 | def test_imageSWT(): 21 | filename = 'test/rab_butler.jpg' 22 | img = cv2.imread(filename,0) 23 | B,G,R = cv2.split(cv2.imread(filename,1)) 24 | img_color = cv2.merge((R,G,B)) 25 | swt_pos = swt.strokeWidthTransform(img, 1) 26 | swt_pos_dilated = 255 - cv2.dilate(255 - swt_pos, kernel = np.ones((2,2),np.uint8), iterations = 2) 27 | swt_neg = swt.strokeWidthTransform(img, -1) 28 | swt_neg_dilated = 255 - cv2.dilate(255 - swt_neg, kernel = np.ones((2,2),np.uint8), iterations = 2) 29 | 30 | plt.subplot(3,2,1) 31 | plt.imshow(img_color, interpolation="none") 32 | plt.title('original image') 33 | 34 | plt.subplot(3,2,3) 35 | plt.imshow(swt_pos, cmap="gray", interpolation="none") 36 | plt.title('positive swt of image') 37 | plt.subplot(3,2,4) 38 | plt.imshow(swt_pos_dilated, cmap="gray", interpolation="none") 39 | plt.title('dilated positive swt of image') 40 | 41 | plt.subplot(3,2,5) 42 | plt.title('negative swt of image') 43 | plt.imshow(swt_neg, cmap="gray", interpolation="none") 44 | plt.subplot(3,2,6) 45 | plt.title('dilated negative swt of image') 46 | plt.imshow(swt_neg_dilated, cmap="gray", interpolation="none") 47 | 48 | plt.show() 49 | 50 | def test_edge_detect(): 51 | filename = 'test/elevator.jpg' 52 | img = cv2.imread(filename,0) 53 | th = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ 54 | cv2.THRESH_BINARY,11,2) 55 | edges = cv2.Canny(th, 100, 200) 56 | plt.imshow(edges, 'gray') 57 | plt.show() 58 | 59 | def test_gradient(): 60 | filename = 'test/rab_butler.jpg' 61 | img = cv2.imread(filename,0) 62 | edges = cv2.Canny(img, 100, 300) 63 | thetas = swt.gradient(img, edges) 64 | plt.imshow(thetas, 'gray') 65 | plt.show() 66 | 67 | def test_first_pass(): 68 | filename = 'test/elevator.jpg' 69 | img = cv2.imread(filename,0) 70 | edges = swt.adaptiveEdges(img) 71 | thetas = swt.gradient(img, edges) 72 | firstPass, rays = fastRay.castRays(edges, thetas, 1) 73 | plt.imshow(firstPass, 'gray') 74 | plt.show() 75 | 76 | def test_plot_rays(): 77 | filename = 'test/elevator.jpg' 78 | img = cv2.imread(filename,0) 79 | edges = cv2.Canny(img, 100, 300) 80 | thetas = swt.gradient(img, edges) 81 | firstPass, rays = fastRay.castRays(edges, thetas, -1) 82 | 83 | rayPlot = np.zeros((img.shape[0], img.shape[1])) 84 | for ray in rays: 85 | for pixel in ray: 86 | rayPlot[pixel[0], pixel[1]] = 255 87 | plt.imshow(rayPlot, 'gray') 88 | plt.show() 89 | 90 | def test_first_and_second_pass(): 91 | filename = 'test/elevator.jpg' 92 | img = cv2.imread(filename,0) 93 | edges = cv2.Canny(img, 100, 300) 94 | thetas = swt.gradient(img, edges) 95 | firstPass, rays = fastRay.castRays(edges, thetas, 1) 96 | secondPass = swt.refineRays(firstPass, rays) 97 | plt.imshow(secondPass, 'gray') 98 | plt.show() 99 | 100 | if __name__ == "__main__": 101 | test_first_pass() -------------------------------------------------------------------------------- /lib/fastRay.pyx: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | cdef float angleDifference(float angle1, float angle2): 5 | """ Returns angle difference between ray starting angle and the ending angle 6 | arguments -- 7 | angle1: angle in radians 8 | angle2: angle in radians 9 | 10 | return -- 11 | distance between the angles in radians 12 | """ 13 | return abs(abs(angle1 - angle2) - math.pi) 14 | 15 | def castRays(edges, angles, direction, maxRayLength=100): 16 | """ casts a ray for every edge in the image 17 | arguments -- 18 | edges: black and white image result of canny edge detector 19 | angles: black and white image result of sobel operator 20 | direction: 1 or -1 21 | 22 | return -- 23 | [swt first pass, rays] 24 | """ 25 | swt = np.empty((edges.shape[0], edges.shape[1])) 26 | swt.fill(255) # swt vector is initialized with maximum values 27 | rays = [] 28 | nonZeroEdges = edges.nonzero() 29 | edgeIndices = zip(nonZeroEdges[0], nonZeroEdges[1]) 30 | edgeLookup = set(edgeIndices) 31 | 32 | for (row, column) in edgeIndices: 33 | ray = castRay(row, column, angles, edgeLookup, maxRayLength, direction) 34 | if ray: 35 | if len(ray) > 1: 36 | rays.append(ray) 37 | 38 | allRayLengths = map(lambda x: rayLength(x), filter(lambda x: x != None, rays)) 39 | 40 | if len(allRayLengths) == 0: 41 | return [swt, None] 42 | 43 | minL, maxL = min(allRayLengths), max(allRayLengths) 44 | for ray in rays: 45 | for pixel in ray: 46 | swt[pixel[0], pixel[1]] = min(normalize(rayLength(ray), minL, maxL, 0, 255), swt[pixel[0], pixel[1]]) 47 | return [swt, rays] 48 | 49 | cdef castRay(int startRow, int startColumn, angles, edgeIndices, int maxRayLength, int direction): 50 | """ Returns length of the ray 51 | arguments -- 52 | row/column: ray starting position 53 | angles: result of sobel operator 54 | edgeIndices: indices of edge pixels in image 55 | maxRayLength: maximum length of ray in pixels 56 | direction: 1 or -1 57 | 58 | return -- 59 | an array of pixels if valid ray or None 60 | """ 61 | 62 | cdef int height = angles.shape[0] 63 | cdef int width = angles.shape[1] 64 | cdef int rayLength = 1 65 | cdef float rayDirection = angles[row][column] 66 | cdef int rayValid = False 67 | cdef int rayPixelRow 68 | cdef int rayPixelColumn 69 | cdef int oppositeDirection 70 | cdef int row = startRow 71 | cdef int column = startColumn 72 | 73 | ray = [(startRow, startColumn)] 74 | 75 | while rayLength < maxRayLength: 76 | 77 | rayPixelRow = row + math.sin(rayDirection)*rayLength*direction 78 | rayPixelColumn = column+math.cos(rayDirection)*rayLength*direction 79 | if rayPixelRow >= height or rayPixelRow < 0 or rayPixelColumn >= width or rayPixelColumn < 0: 80 | return None 81 | 82 | if not rayValid: 83 | rayValid = True 84 | ray.append((rayPixelRow, rayPixelColumn)) 85 | 86 | if (rayPixelRow, rayPixelColumn) in edgeIndices: 87 | oppositeDirection = angles[rayPixelRow][rayPixelColumn] 88 | 89 | if angleDifference(rayDirection, oppositeDirection) > (math.pi / 2): 90 | rayValid = False 91 | 92 | if rayValid: 93 | return ray 94 | else: 95 | return None 96 | 97 | rayLength += 1 98 | return None 99 | 100 | cdef float normalize(float value, int oldMin, int oldMax, int newMin, int newMax): 101 | """ interpolation function from http://stackoverflow.com/questions/929103/convert-a-number-range-to-another-range-maintaining-ratio 102 | arguments -- 103 | value: value you are mapping from 104 | oldmin, oldmax: extrema of domain 105 | newmin, newmax: extrema of range 106 | 107 | return -- 108 | value mapped to new range 109 | """ 110 | # return value 111 | return (((value - oldMin) * (newMax - newMin)) / (oldMax - oldMin)) + newMin 112 | 113 | cdef float rayLength(ray): 114 | """ Returns length of the ray 115 | arguments -- 116 | ray: ray of pixels 117 | 118 | return -- 119 | ray length 120 | """ 121 | return ((ray[0][0] - ray[-1][0])**2+(ray[0][1] - ray[-1][1])**2)**.5 122 | -------------------------------------------------------------------------------- /lib/textLocalizer.py: -------------------------------------------------------------------------------- 1 | import letterCombinator as lc 2 | import connected_components as cc 3 | import swt 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | import cv2 7 | import math, random 8 | from profiler import * 9 | import itertools 10 | 11 | class TextLocalizer(object): 12 | @staticmethod 13 | def filterLetterPairs(letterPairs): 14 | letterPairs = filter(lambda x: x.similarComponentStrokeWidthRatio(2.5), letterPairs) 15 | letterPairs = filter(lambda x: x.similarComponentHeightRatio(), letterPairs) 16 | letterPairs = filter(lambda x: x.similarComponentDistance(2.0), letterPairs) 17 | return letterPairs 18 | 19 | @staticmethod 20 | def findLetters(img, direction, letterFilters): 21 | strokeWidthTranform = swt.strokeWidthTransform(img, direction) 22 | # Generate Regions 23 | regions = cc.connectComponents(strokeWidthTranform) 24 | regions_dict = TextLocalizer.regions_to_dict(regions) 25 | bounds = cc.map_to_bounds(regions_dict) 26 | # Filter Letter Candidates 27 | letterCandidates_dict = cc.applyFilters(regions_dict, bounds, letterFilters) 28 | letterCandidates_arr = filter(lambda x: len(x) > 0, TextLocalizer.regions_to_arr(letterCandidates_dict)) 29 | letters = [lc.Letter(x) for x in letterCandidates_arr] 30 | letters = filter(lambda x: x.height() > 0 and x.width() > 0, letters) 31 | 32 | return letters 33 | 34 | @staticmethod 35 | def findLines(img, direction=-1, letterFilters=('size', 'borders')): 36 | 37 | letters = TextLocalizer.findLetters(img, direction, letterFilters) 38 | 39 | letterPairs = lc.LetterCombinator.generateLetterPairs(letters) 40 | filteredLetterPairs = TextLocalizer.filterLetterPairs(letterPairs) 41 | 42 | letterChains = [lc.LetterChain.chainFromPair(pair) for pair in filteredLetterPairs] 43 | 44 | lines = lc.LetterCombinator.findAllLines(letterChains) 45 | # validLines = filter(lambda x: len(x.letters) > 2, lines) 46 | validLines = TextLocalizer.validateLines(lines) 47 | return validLines 48 | 49 | @staticmethod 50 | def validateLines(lines, heightThreshold=2.0): 51 | linesWithEnoughLetters = filter(lambda x: len(x.letters) > 2, lines) 52 | validLines = [] 53 | 54 | return linesWithEnoughLetters 55 | 56 | @staticmethod 57 | def regions_to_dict(regions): 58 | d = {} 59 | for i, region in enumerate(regions): 60 | d[i] = region 61 | return d 62 | 63 | @staticmethod 64 | def regions_to_arr(regions): 65 | arr = [[] for i in range(max(regions.keys())+1)] 66 | for i, v in regions.items(): 67 | arr[i] = v 68 | return arr 69 | 70 | class LetterRenderer(object): 71 | @staticmethod 72 | def draw_word_line(img, line): 73 | for letter in line.letters: 74 | LetterRenderer.draw_letter(img, letter) 75 | LetterRenderer.draw_letter_rect(img, line.chainToRegion()) 76 | 77 | @staticmethod 78 | def draw_word_lines(img, lines): 79 | for line in lines: 80 | LetterRenderer.draw_word_line(img, line) 81 | 82 | @staticmethod 83 | def draw_letter(img, letter): 84 | random_color = (255*random.random(), 255*random.random(), 255*random.random()) 85 | for pixel in letter.letterPixels: 86 | (y, x, w) = pixel 87 | img[y, x] = random_color 88 | 89 | @staticmethod 90 | def draw_letters(img, letters): 91 | for letter in letters: 92 | LetterRenderer.draw_letter(img, letter) 93 | 94 | @staticmethod 95 | def draw_letter_rect(img, letter): 96 | # if len(region) == 0: 97 | # return 98 | rows, cols, _ = img.shape 99 | box = letter.bounds() 100 | (miny, minx), (maxy, maxx) = box 101 | lower_left = (minx, miny) 102 | lower_right = (maxx, miny) 103 | upper_left = (minx, maxy) 104 | upper_right = (maxx, maxy) 105 | 106 | color = (255*random.random(), 255*random.random(), 255*random.random()) 107 | 108 | cv2.line(img, lower_left, lower_right, color, 2) 109 | cv2.line(img, lower_left, upper_left, color, 2) 110 | cv2.line(img, upper_right, lower_right, color, 2) 111 | cv2.line(img, upper_right, upper_left, color, 2) 112 | 113 | @staticmethod 114 | def draw_letter_center(img, letter): 115 | cv2.circle(img, (int(letter.center()[1]), int(letter.center()[0])), 2, (255, 0, 0)) 116 | -------------------------------------------------------------------------------- /test/test_letter_combinator.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.join(os.path.dirname(__file__), '../', 'swt')) 3 | import lib.letterCombinator as lc 4 | import lib.connected_components as cc 5 | from lib.swt import swt 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | import cv2 9 | import math, random 10 | from profiler import * 11 | import itertools 12 | 13 | def test_find_letters(): 14 | img = cv2.imread('test/images/emergency_stop.jpg', 0) 15 | rows, cols = img.shape 16 | 17 | # Compute SWT 18 | swt_pos = swt.strokeWidthTransform(img, 1) 19 | 20 | SAMPLE = swt_pos 21 | 22 | # Generate Regions 23 | regions = cc.connectComponents(SAMPLE) 24 | regions_dict = regions_to_dict(regions) 25 | bounds = cc.map_to_bounds(regions_dict) 26 | 27 | # Filter Letter Candidates 28 | # letterCandidates_dict = cc.applyFilters(regions_dict, bounds, ['size', 'borders', 'aspect_ratio_and_diameter']) 29 | letterCandidates_dict = cc.applyFilters(regions_dict, bounds, ['size', 'borders']) 30 | letterCandidates_arr = filter(lambda x: len(x) > 0, regions_to_arr(letterCandidates_dict)) 31 | 32 | letters = [lc.Letter(x) for x in letterCandidates_arr] 33 | 34 | for letter in letters: 35 | draw_letter_rect(swt_pos, letter) 36 | plt.imshow(swt_pos, 'gray') 37 | plt.show() 38 | 39 | def test_letterCombinator(): 40 | img = cv2.imread('test/images/rab_butler.jpg', 0) 41 | rows, cols = img.shape 42 | 43 | # Compute SWT 44 | swt_pos = swt.strokeWidthTransform(img, -1) 45 | 46 | SAMPLE = swt_pos 47 | 48 | # Generate Regions 49 | regions = cc.connectComponents(SAMPLE) 50 | regions_dict = regions_to_dict(regions) 51 | bounds = cc.map_to_bounds(regions_dict) 52 | 53 | # Filter Letter Candidates 54 | letterCandidates_dict = cc.applyFilters(regions_dict, bounds, ['size', 'borders']) 55 | letterCandidates_arr = filter(lambda x: len(x) > 0, regions_to_arr(letterCandidates_dict)) 56 | 57 | letters = [lc.Letter(x) for x in letterCandidates_arr] 58 | letters = filter(lambda x:x.height()>0 and x.width()>0, letters) 59 | 60 | # Combine Letters 61 | letterPairs = lc.LetterCombinator.generateLetterPairs(letters) 62 | 63 | letterPairs = filter(lambda x: x.similarComponentStrokeWidthRatio(2.5), letterPairs) 64 | letterPairs = filter(lambda x: x.similarComponentHeightRatio(), letterPairs) 65 | letterPairs = filter(lambda x: x.similarComponentDistance(2.0), letterPairs) 66 | 67 | ccImg = cc.connectedComponentsToImg(SAMPLE, letterCandidates_arr, rows, cols, True) 68 | 69 | letterChains = [lc.LetterChain.chainFromPair(pair) for pair in letterPairs] 70 | lines = lc.LetterCombinator.findAllLines(letterChains) 71 | 72 | for chain in lines: 73 | if len(chain.letters) > 2: 74 | draw_letter_rect(ccImg, chain.chainToRegion()) 75 | 76 | plt.imshow(ccImg) 77 | plt.show() 78 | 79 | def test_letter_pairs(): 80 | img = cv2.imread('test/images/rab_butler.jpg', 0) 81 | rows, cols = img.shape 82 | 83 | # Compute SWT 84 | swt_pos = swt.strokeWidthTransform(img, -1) 85 | 86 | SAMPLE = swt_pos 87 | 88 | # Generate Regions 89 | regions = cc.connectComponents(SAMPLE) 90 | regions_dict = regions_to_dict(regions) 91 | bounds = cc.map_to_bounds(regions_dict) 92 | 93 | # Filter Letter Candidates 94 | letterCandidates_dict = cc.applyFilters(regions_dict, bounds, ['size', 'borders']) 95 | letterCandidates_arr = filter(lambda x: len(x) > 0, regions_to_arr(letterCandidates_dict)) 96 | 97 | letters = [lc.Letter(x) for x in letterCandidates_arr] 98 | letters = filter(lambda x:x.height()>0 and x.width()>0, letters) 99 | 100 | # Combine Letters 101 | print len(letters) 102 | letterPairs = lc.LetterCombinator.generateLetterPairs(letters) 103 | 104 | letterPairs = filter(lambda x: x.similarComponentStrokeWidthRatio(2.5), letterPairs) 105 | letterPairs = filter(lambda x: x.similarComponentHeightRatio(), letterPairs) 106 | letterPairs = filter(lambda x: x.similarComponentDistance(2.0), letterPairs) 107 | 108 | # plt.subplot(2,1,1) 109 | letterChains = [lc.LetterChain.chainFromPair(pair) for pair in letterPairs] 110 | # ccImg = cc.connectedComponentsToImg(SAMPLE, letterCandidates_arr, rows, cols, True) 111 | # for chain in letterChains: 112 | # draw_letter_rect(ccImg, chain.chainToRegion()) 113 | # plt.imshow(ccImg) 114 | 115 | plt.subplot(2,1,2) 116 | ccImg = cc.connectedComponentsToImg(SAMPLE, letterCandidates_arr, rows, cols, True) 117 | lines = lc.LetterCombinator.findAllLines(letterChains) 118 | for chain in lines: 119 | if len(chain.letters) > 2: 120 | draw_letter_rect(ccImg, chain.chainToRegion()) 121 | plt.imshow(ccImg) 122 | plt.show() 123 | 124 | def regions_to_dict(regions): 125 | d = {} 126 | for i, region in enumerate(regions): 127 | d[i] = region 128 | return d 129 | 130 | def regions_to_arr(regions): 131 | arr = [[] for i in range(max(regions.keys())+1)] 132 | for i, v in regions.items(): 133 | arr[i] = v 134 | return arr 135 | 136 | def draw_letter_rect(img, letter): 137 | # if len(region) == 0: 138 | # return 139 | if len(img.shape) == 3: 140 | rows, cols, _ = img.shape 141 | else: 142 | rows, cols = img.shape 143 | box = letter.bounds() 144 | (miny, minx), (maxy, maxx) = box 145 | lower_left = (minx, miny) 146 | lower_right = (maxx, miny) 147 | upper_left = (minx, maxy) 148 | upper_right = (maxx, maxy) 149 | 150 | color = (255*random.random(), 255*random.random(), 255*random.random()) 151 | 152 | cv2.line(img, lower_left, lower_right, color, 2) 153 | cv2.line(img, lower_left, upper_left, color, 2) 154 | cv2.line(img, upper_right, lower_right, color, 2) 155 | cv2.line(img, upper_right, upper_left, color, 2) 156 | 157 | if __name__ == "__main__": 158 | test_letterCombinator() -------------------------------------------------------------------------------- /lib/letterCombinator.py: -------------------------------------------------------------------------------- 1 | import connected_components as cc 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | import cv2 5 | import math 6 | from profiler import * 7 | import itertools 8 | import swt 9 | 10 | class Letter(object): 11 | def __init__(self, letterPixels): 12 | self.letterPixels = letterPixels 13 | 14 | def distanceToLetter(self, letter): 15 | centerA = self.center() 16 | centerB = letter.center() 17 | return ((centerA[0]-centerB[0])**2+(centerA[1]-centerB[1])**2)**.5 18 | 19 | def width(self): 20 | xVals = map(lambda x:x[1], self.letterPixels) 21 | return max(xVals) - min(xVals) 22 | 23 | def height(self): 24 | yVals = map(lambda x:x[0], self.letterPixels) 25 | return max(yVals) - min(yVals) 26 | 27 | def bounds(self): 28 | min_y = min([y for (y,x,w) in self.letterPixels]) 29 | max_y = max([y for (y,x,w) in self.letterPixels]) 30 | min_x = min([x for (y,x,w) in self.letterPixels]) 31 | max_x = max([x for (y,x,w) in self.letterPixels]) 32 | return ((min_y, min_x), (max_y, max_x)) 33 | 34 | def center(self): 35 | ((min_y, min_x), (max_y, max_x)) = self.bounds() 36 | center_y = (min_y+max_y) / 2.0 37 | center_x = (min_x+max_x) / 2.0 38 | return (center_y, center_x) 39 | 40 | def bottomLeft(self): 41 | ((min_y, min_x), (max_y, max_x)) = self.bounds() 42 | return (min_y, min_x) 43 | 44 | def bottomRight(self): 45 | ((min_y, min_x), (max_y, max_x)) = self.bounds() 46 | return (min_y, max_x) 47 | 48 | def topLeft(self): 49 | ((min_y, min_x), (max_y, max_x)) = self.bounds() 50 | return (max_y, min_x) 51 | 52 | def topRight(self): 53 | ((min_y, min_x), (max_y, max_x)) = self.bounds() 54 | return (max_y, max_x) 55 | 56 | def strokeWidth(self): 57 | componentColors = map(lambda x: x[2], self.letterPixels) 58 | return sum(componentColors)/len(self.letterPixels) 59 | 60 | class LetterPair(object): 61 | def __init__(self, letterA, letterB): 62 | self.letterA = letterA 63 | self.letterB = letterB 64 | 65 | def letterDistance(self): 66 | return self.letterA.distanceToLetter(self.letterB) 67 | 68 | def similarComponentStrokeWidthRatio(self, threshold=1.5): 69 | if max(self.letterA.strokeWidth(), self.letterB.strokeWidth())/min(self.letterA.strokeWidth(), self.letterB.strokeWidth()) < threshold: 70 | return True 71 | return False 72 | 73 | def similarComponentHeightRatio(self, threshold=2.0): 74 | if (max(self.letterA.height(), self.letterB.height()) / min(self.letterA.height(), self.letterB.height())) < threshold: 75 | return True 76 | return False 77 | 78 | def similarComponentDistance(self, threshold=1.5): 79 | if self.letterDistance() < (threshold*max(self.letterA.width(), self.letterB.width())): 80 | return True 81 | return False 82 | 83 | def mergeLetters(self): 84 | return Letter(self.letterA.letterPixels + self.letterB.letterPixels) 85 | 86 | 87 | class LetterChain(object): 88 | def __init__(self): 89 | self.letters = [] 90 | self.hasMerged = False 91 | self.direction = None 92 | 93 | @classmethod 94 | def chainFromPair(cls, pair): 95 | letterChain = cls() 96 | letterChain.letters.append(pair.letterA) 97 | letterChain.letters.append(pair.letterB) 98 | letterChain.direction = math.atan2(pair.letterA.center()[0] - pair.letterB.center()[0], 99 | pair.letterA.center()[1] - pair.letterB.center()[1]) 100 | return letterChain 101 | 102 | def bounds(self): 103 | minX = None 104 | maxX = None 105 | minY = None 106 | maxY = None 107 | for letter in self.letters: 108 | ((min_y, min_x), (max_y, max_x)) = letter.bounds() 109 | if minX == None: 110 | minX = min_x 111 | maxX = max_x 112 | minY = min_y 113 | maxY = max_y 114 | else: 115 | if min_y < minY: 116 | minY = min_y 117 | if min_x < minX: 118 | minX = min_x 119 | if max_y > maxY: 120 | maxY = max_y 121 | if max_x > maxX: 122 | maxX = max_x 123 | return ((minY, minX), (maxY, maxX)) 124 | 125 | def height(self): 126 | ((minY, minX), (maxY, maxX)) = self.bounds() 127 | return maxX - minX 128 | 129 | def chainToRegion(self): 130 | region = [] 131 | for letter in self.letters: 132 | region += letter.letterPixels 133 | return Letter(region) 134 | 135 | def mergeWithChain(self, chain): 136 | lettersIndex = set(self.letters) 137 | for elem in chain.letters: 138 | if elem not in lettersIndex: 139 | self.letters.append(elem) 140 | 141 | def sharesBounds(self, chain): 142 | (selfMinY, selfMinX), (selfMaxY, selfMaxX) = self.bounds() 143 | (otherMinY, otherMinX), (otherMaxY, otherMaxX) = chain.bounds() 144 | if selfMinX > otherMaxX or otherMinX > selfMaxX: 145 | return False 146 | if selfMinY > otherMaxY or otherMinY > selfMaxY: 147 | return False 148 | return True 149 | 150 | 151 | class LetterCombinator(object): 152 | @staticmethod 153 | def generateLetterPairs(letters): 154 | letterPairs = list(itertools.combinations(letters, 2)) 155 | return [LetterPair(x,y) for (x,y) in letterPairs] 156 | 157 | @staticmethod 158 | def findLines(letterChains): 159 | lines = [] 160 | for chain in letterChains: 161 | didMerge = False 162 | for i, line in enumerate(lines): 163 | if line.sharesBounds(chain) and swt.angleDifference(line.direction, chain.direction) < math.pi: 164 | didMerge = True 165 | lines[i].mergeWithChain(chain) 166 | if not didMerge: 167 | lines.append(chain) 168 | return lines 169 | 170 | @staticmethod 171 | def findAllLines(letterChains): 172 | lines = LetterCombinator.findLines(letterChains) 173 | length = len(lines) 174 | while True: 175 | lines = LetterCombinator.findLines(lines) 176 | 177 | if length == len(lines): 178 | break 179 | length = len(lines) 180 | return lines -------------------------------------------------------------------------------- /lib/connected_components.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python Connected Components Implementation 3 | Developed by Diana Vermilya and Nathan Lintz 4 | """ 5 | 6 | import cv2 7 | import Queue 8 | import numpy as np 9 | import math, random 10 | from matplotlib import pyplot as plt 11 | import copy 12 | from profiler import * 13 | import fastConnectedComponents 14 | 15 | t = Timer() 16 | 17 | def generateListOfAllPixels(rows, cols): 18 | all_pixels = [] 19 | for i in range(rows): 20 | for j in range(cols): 21 | all_pixels.append((i,j)) 22 | return all_pixels 23 | 24 | def connectComponents(img): 25 | rows = img.shape[0] 26 | cols = img.shape[1] 27 | 28 | all_pixels = generateListOfAllPixels(rows, cols) 29 | 30 | # t.start('cython') 31 | components = fastConnectedComponents.bfs(img, rows, cols) 32 | # t.stop('cython') 33 | # t.start('python') 34 | # components = bfs(img, all_pixels, rows, cols) 35 | # t.stop('python') 36 | return components 37 | 38 | def bfs(img, all_pixels, rows, cols): 39 | q = Queue.Queue() 40 | enqueued = {} 41 | tags = {} 42 | tag_count = 0 43 | for i in range(len(all_pixels)): 44 | first_pix = all_pixels[i] 45 | if not first_pix in enqueued: 46 | tags[tag_count] = [] 47 | q.put(first_pix) 48 | enqueued[first_pix] = True 49 | 50 | while not q.empty(): 51 | [y,x] = q.get() 52 | b_shade = img[y,x]*(-1) + 255 + 0.001 53 | 54 | for pix in [(y,x-1), (y,x+1), (y-1,x), (y+1,x)]: 55 | if pix[0] >= 0 and pix[0] < rows and pix[1] >= 0 and pix[1] < cols: 56 | n_shade = img[pix[0], pix[1]]*(-1) + 255 + 0.001 57 | if float(n_shade)/b_shade < 3 and float(n_shade)/b_shade > 0.33: 58 | if not pix in enqueued: 59 | q.put(pix) 60 | enqueued[pix] = True 61 | tags[tag_count].append((y,x,img[y,x])) 62 | tag_count += 1 63 | return tags 64 | 65 | def connectedComponentsToImg(swt, connectComponents, rows, cols, multicolor=False): 66 | new_image = np.zeros((rows, cols,3), np.uint8) 67 | 68 | for i, v in enumerate(connectComponents): 69 | avg_color = meanComponentColor(swt, v) 70 | random_color = (255*random.random(), 255*random.random(), 255*random.random()) 71 | for y, x, swt[y,x] in v: 72 | if multicolor: 73 | new_image[y,x] = random_color 74 | else: 75 | new_image[y, x] = avg_color 76 | return new_image 77 | 78 | def meanComponentColor(swt, component): 79 | componentColors = map(lambda x: x[2], component) 80 | if len(componentColors): 81 | return sum(componentColors)/len(component) 82 | return 0 83 | 84 | def thresholded_cc(): 85 | swt = cv2.imread('sample_swt.png',0) 86 | rows = swt.shape[0] 87 | cols = swt.shape[1] 88 | # new_image = np.zeros((rows, cols,3), np.uint8) 89 | tags = connectComponents(swt) 90 | new_image = connectedComponentsToImg(swt, tags, rows, cols) 91 | 92 | 93 | cv2.imwrite('blank.png', new_image) 94 | plt.imshow(new_image) 95 | plt.show() 96 | return tags 97 | 98 | def filter_by_size(regions, *args): 99 | SIZE_TOLERANCE = 10 100 | return {k:v for (k,v) in regions.iteritems() if len(v) > SIZE_TOLERANCE} 101 | 102 | def meets_variance_tolerance(region): 103 | VARIANCE_TOLERANCE = 2.0 104 | mean_width = sum([w for (y,x,w) in region])/float(len(region)) 105 | 106 | for (y,x,w) in region: 107 | if w < mean_width/VARIANCE_TOLERANCE: 108 | return False 109 | return True 110 | 111 | def filter_by_variance(regions, *args): 112 | return {k:v for (k,v) in regions.iteritems() if meets_variance_tolerance(v) == True} 113 | 114 | def meets_aspect_ratio_and_diameter(region, bounds): 115 | (min_y, max_y, min_x, max_x) = bounds 116 | y_diff = max_y - min_y 117 | x_diff = float(max_x - min_x + 0.001) 118 | aspect_ratio = y_diff/x_diff 119 | 120 | weights = [w for (y,x,w) in region] 121 | median_weight = np.median(np.array(weights)) 122 | diameter = math.sqrt(y_diff**2 + x_diff**2) 123 | 124 | if aspect_ratio < 10 and aspect_ratio > 0.1: 125 | if diameter/median_weight < 10: 126 | return True 127 | return False 128 | 129 | def filter_by_aspect_ratio_and_diameter(regions, bounds_map): 130 | return {k:v for (k,v) in regions.iteritems() if meets_aspect_ratio_and_diameter(v, bounds_map[k]) == True} 131 | 132 | 133 | def map_to_bounds(regions): 134 | bounds = {} 135 | for (key, region) in regions.iteritems(): 136 | min_y = min([y for (y,x,w) in region]) 137 | max_y = max([y for (y,x,w) in region]) 138 | min_x = min([x for (y,x,w) in region]) 139 | max_x = max([x for (y,x,w) in region]) 140 | bounds_for_region = (min_y, max_y, min_x, max_x) 141 | bounds[key] = bounds_for_region 142 | # print bounds 143 | return bounds 144 | 145 | def contains(bounds_a, bounds_b): 146 | (min_ya, max_ya, min_xa, max_xa) = bounds_a 147 | (min_yb, max_yb, min_xb, max_xb) = bounds_b 148 | if min_ya < min_yb and max_ya > max_yb and min_xa < min_xb and max_xa > max_xb: 149 | return True 150 | 151 | def filter_out_borders(regions, bounds): 152 | #400 gets filtered out here 153 | regions_contained = {} 154 | keys = regions.keys() 155 | for region in keys: 156 | for other_region in keys: 157 | if contains(bounds[region], bounds[other_region]): 158 | regions_contained[region] = regions_contained.get(region, 0) + 1 159 | regions_contained[region] = regions_contained.get(region, 0) 160 | 161 | # print regions_contained 162 | return {k:v for (k,v) in regions.iteritems() if regions_contained[k] < 3} 163 | 164 | def applyFilters(connectedComponents, bounds_map, filterNames): 165 | filters = { 166 | 'size': filter_by_size, 167 | 'variance': filter_by_variance, 168 | 'aspect_ratio_and_diameter': filter_by_aspect_ratio_and_diameter, 169 | 'borders': filter_out_borders 170 | } 171 | 172 | for name in filterNames: 173 | connectedComponents = filters[name](connectedComponents, bounds_map) 174 | return connectedComponents 175 | 176 | 177 | if __name__ == "__main__": 178 | regions = thresholded_cc() 179 | # bounds_map = map_to_bounds(regions) 180 | # print len(regions) 181 | # regions = filter_by_size(regions) 182 | # print len(regions) 183 | # regions = filter_by_variance(regions) 184 | # print len(regions) 185 | # regions = filter_by_aspect_ratio_and_diameter(regions, bounds_map) 186 | # print len(regions) 187 | # regions = filter_out_borders(regions, bounds_map) 188 | # print len(regions) 189 | 190 | -------------------------------------------------------------------------------- /lib/swt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python Stroke Width Transform Implementation 3 | Developed by Diana Vermilya and Nathan Lintz 4 | """ 5 | 6 | import numpy as np 7 | import cv2 8 | import math 9 | import copy 10 | from multiprocessing import Pool 11 | from functools import partial 12 | from profiler import * 13 | import fastRay 14 | 15 | t = Timer() 16 | def strokeWidthTransform(img, direction=1, cannyThresholds=(100,300)): 17 | """ Returns the stroke width transform of a color image 18 | 19 | arguments -- 20 | img: 2d grayscale array of image 21 | direction: -1 detects light text on dark background, 1 detects dark text on light background 22 | 23 | return -- 24 | 2d grayscale array where each pixel's value is its stroke width 25 | """ 26 | edges = cv2.Canny(img, 100, 300) 27 | 28 | thetas = gradient(img, edges) 29 | 30 | firstPass, rays = fastRay.castRays(edges, thetas, direction) 31 | 32 | if rays == None: 33 | return firstPass 34 | 35 | secondPass = refineRays(firstPass, rays) 36 | return secondPass 37 | 38 | def refineRays(swt, rays): 39 | """ Second pass as described in article 40 | pixels who are longer than the median value of the ray are set to the medianLength 41 | arguments -- 42 | swt: swt from first pass 43 | rays: array of pixels in ray 44 | 45 | returns: 46 | refined swt image 47 | 48 | """ 49 | swt = copy.deepcopy(swt) # TODO: Do we need a deepcopy here? 50 | for ray in rays: 51 | medianLength = np.median(map(lambda x: swt[x[0]][x[1]], ray)) # Test Min, might work better 52 | # medianLength = np.min(map(lambda x: swt[x[0]][x[1]], ray)) 53 | for pixel in ray: 54 | if swt[pixel[0]][pixel[1]] > medianLength: 55 | swt[pixel[0]][pixel[1]] = medianLength 56 | return swt 57 | 58 | def castProcess(angles, edgeLookup, maxRayLength, direction, pixel): 59 | ray = castRay(pixel, angles, edgeLookup, maxRayLength, direction) 60 | if ray: 61 | if len(ray) > 1: 62 | return ray 63 | return None 64 | 65 | 66 | def castRays(edges, angles, direction, maxRayLength=100): 67 | """ casts a ray for every edge in the image 68 | arguments -- 69 | edges: black and white image result of canny edge detector 70 | angles: black and white image result of sobel operator 71 | direction: 1 or -1 72 | 73 | return -- 74 | [swt first pass, rays] 75 | """ 76 | swt = np.empty((edges.shape[0], edges.shape[1])) 77 | swt.fill(255) # swt vector is initialized with maximum values 78 | rays = [] 79 | nonZeroEdges = edges.nonzero() 80 | edgeIndices = zip(nonZeroEdges[0], nonZeroEdges[1]) 81 | edgeLookup = set(edgeIndices) 82 | 83 | for (row, column) in edgeIndices: 84 | ray = castRay((row,column), angles, edgeLookup, maxRayLength, direction) 85 | if ray: 86 | if len(ray) > 1: 87 | rays.append(ray) 88 | 89 | allRayLengths = map(lambda x: rayLength(x), filter(lambda x: x != None, rays)) 90 | 91 | if len(allRayLengths) == 0: 92 | return [swt, None] 93 | 94 | minL, maxL = min(allRayLengths), max(allRayLengths) 95 | for ray in rays: 96 | for pixel in ray: 97 | swt[pixel[0], pixel[1]] = min(normalize(rayLength(ray), minL, maxL, 0, 255), swt[pixel[0], pixel[1]]) 98 | return [swt, rays] 99 | 100 | 101 | def normalize(value, oldMin, oldMax, newMin, newMax): 102 | """ interpolation function from http://stackoverflow.com/questions/929103/convert-a-number-range-to-another-range-maintaining-ratio 103 | arguments -- 104 | value: value you are mapping from 105 | oldmin, oldmax: extrema of domain 106 | newmin, newmax: extrema of range 107 | 108 | return -- 109 | value mapped to new range 110 | """ 111 | return (((value - oldMin) * (newMax - newMin)) / (oldMax - oldMin)) + newMin 112 | # return value 113 | 114 | def castRay(startPixel, angles, edgeIndices, maxRayLength, direction): 115 | """ Returns length of the ray 116 | arguments -- 117 | startPixel: (row, column) rays starting position 118 | angles: result of sobel operator 119 | edgeIndices: indices of edge pixels in image 120 | maxRayLength: maximum length of ray in pixels 121 | direction: 1 or -1 122 | 123 | return -- 124 | an array of pixels if valid ray or None 125 | """ 126 | row, column = startPixel 127 | height, width = angles.shape 128 | rayLength = 1 129 | rayDirection = angles[row][column] 130 | rayValid = False 131 | ray = [(row, column)] 132 | while rayLength < maxRayLength: 133 | pixel = (int(row + math.sin(rayDirection)*rayLength*direction), 134 | int(column+math.cos(rayDirection)*rayLength*direction)) 135 | if pixel[0] >= height or pixel[0] < 0 or pixel[1] >= width or pixel[1] < 0: 136 | return None 137 | 138 | if not rayValid: 139 | rayValid = True 140 | ray.append(pixel) 141 | 142 | if pixel in edgeIndices: 143 | oppositeDirection = angles[pixel[0]][pixel[1]] 144 | 145 | if angleDifference(rayDirection, oppositeDirection) > math.pi / 2: 146 | rayValid = False 147 | 148 | if rayValid: 149 | return ray 150 | else: 151 | return None 152 | 153 | rayLength += 1 154 | return None 155 | 156 | 157 | def angleDifference(angle1, angle2): 158 | """ Returns angle difference between ray starting angle and the ending angle 159 | arguments -- 160 | angle1: angle in radians 161 | angle2: angle in radians 162 | 163 | return -- 164 | distance between the angles in radians 165 | """ 166 | # return abs(abs(angle1 - angle2) - math.pi) 167 | return math.atan2(math.sin(angle1-angle2), math.cos(angle1-angle2)) 168 | 169 | def rayLength(ray): 170 | """ Returns length of the ray 171 | arguments -- 172 | ray: ray of pixels 173 | 174 | return -- 175 | ray length 176 | """ 177 | return ((ray[0][0] - ray[-1][0])**2+(ray[0][1] - ray[-1][1])**2)**.5 178 | 179 | def gradient(img, edges): 180 | """ Returns matrix of angles 181 | arguments -- 182 | edges: black and white image result of canny edge detector 183 | 184 | return -- 185 | matrix of theta values 186 | """ 187 | 188 | rows = np.size(img, 0) 189 | columns = np.size(img, 1) 190 | dx = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize = 5, scale = -1, delta = 1, borderType = cv2.BORDER_DEFAULT) 191 | dy = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize = 5, scale = -1, delta = 1, borderType = cv2.BORDER_DEFAULT) 192 | 193 | theta = np.zeros(img.shape) 194 | for row in range(rows): 195 | for col in range(columns): 196 | if(edges[row][col] > 0): 197 | theta[row][col] = math.atan2(dy[row][col], dx[row][col]) 198 | return theta 199 | --------------------------------------------------------------------------------