├── .gitignore
├── README.txt
├── Screenshots
    ├── Blob Bounding Boxes.jpg
    ├── Blob Y-Histogram.jpg
    ├── Blobs.jpg
    ├── Bounding Box Width Histogram.jpg
    ├── Early Blob Bounding Boxes.jpg
    ├── Early Reverse Binarized Blob Bounding Boxes.jpg
    ├── Early Y-Histogram on Pecha.jpg
    └── example_output_to_recognizer
    │   ├── dza_char.jpg
    │   ├── ga_char.jpg
    │   ├── la_char.jpg
    │   ├── ma_char.jpg
    │   ├── na_char.jpg
    │   ├── pha_char.jpg
    │   ├── sa_char.jpg
    │   ├── sha_char.jpg
    │   ├── tha_char.jpg
    │   ├── ya_char.jpg
    │   └── zha_char.jpg
├── character_segmentation.py
├── config.py
├── get_min_max_char_width.py
├── legacy_functions
    ├── 2009_07_14.txt
    ├── avg_box_width.py
    ├── count_chars.py
    ├── create_bbox.py
    ├── create_bbox_contours.py
    ├── cum_sum.py
    ├── cut_images.py
    ├── draw_lines.py
    ├── extra_statements.py
    ├── hp.py
    ├── make_hist.py
    ├── make_plot.py
    ├── numpy1.py
    ├── ocr.py
    ├── opencv_drawing.py
    ├── plot.py
    ├── print_w_h.py
    ├── process_image_dimensions.txt
    ├── sequential_finish.py
    ├── stats.py
    ├── vp.py
    └── wh.py
├── line_drawing.py
└── make_char_dirs.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | 
4 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | === About ===
 2 | 
 3 | A Python OCR program for handwritten Tibetan manuscripts like the ones recently digitized in Mongolia: http://www.tibet-dps.org/tempangma_kangyur.htm
 4 | The main script for doing character segmentation is character_segmentation.py.  This script contains a function process_image which is explained in detail below.
 5 | 
 6 | === USAGE ===
 7 | 
 8 | 1) Edit config.py
 9 | 2) python character_segmentation.py
10 | 
11 | === Dependencies ===
12 | 
13 | OpenCV - http://opencv.org/
14 | cvBlobsLib - http://www.eng.auburn.edu/~troppel/internal/sparc/TourBot/TourBot%20References/cvdocuments/cvBlobsLib.html
15 | NumPy - http://www.numpy.org/
16 | matplotlib - http://matplotlib.org/
17 | 
18 | === The Function process_image ===
19 | 
20 | This function processes each image again and again untill an acceptable result is obtained.  Acceptable means, for instance, no potential character images still exceed max height or width.
21 | 
22 | This process involves the following steps:
23 | 
24 | 1) Find all the blobs of ink on the page using the class CBlobResult from cvBlobsLib.
25 | 
26 | 2) Determine whether each blob needs to be passed to the vertical projection (vp) or horizontal projection (hp) functions.  Predefined variables like min_char_width, max_width_b4_cutoff, and max_height_b4_cutoff are taken into account.
27 | 
28 | 3) Rerun process_image if some blobs were passed to vertical projection or horizontal projection functions.  (Vertical projection and horizontal projection functions result in alterations to the original image, by drawing one pixel width white lines.)
29 | 
30 | 4) Determine the written order of blobs:
31 | 
32 | Start each iteration by calling the function get_blobs_sorted_by_distance_from_previous_blob, which gives you a list of blobs sorted by distance from previous blob.  The first blob is the one closest to (0, 0).  For each ensuing blob, we compare the distance between its Min X and Min Y and the Max X and Min Y of the previous blob. Distance between points is determined with the function distance_between_points, which takes the square root of the sum of the squares of the X and Y distances.
33 | 
34 |    Distance of a blob's Min X and Min Y from Max X and Min Y of the previously known character blob doesn't determine next character blob in 2 cases:
35 | 
36 |    A) At the end of a line, where the next character is far away.  Therefore we need to determine if we are at the end of line, and to do this we are currently checking to see if any blob remains on the same line.  So the code that attempts to do this is:
37 | 
38 |       if ( int(blob.maxx) > int(current_blob.minx) ) and ( abs(int(blob.miny)-int(current_blob.miny)) < 30 ):
39 | 
40 |    B) When we are not yet at the end of the line, but still the the closest blob in terms of distance from the Max X and Min Y of the previously known character is on the next line. So the code that attempts rule out blobs on other lines is:
41 | 
42 |       if ( int(blob.maxx) > int(previous_blob.minx) ) and ( abs(int(blob.miny)-int(previous_blob.miny)) < 30 )
43 | 
44 | 5) Write each character blob as an image within the folio_characters_directory with the following as part of its filename: its line number, its character number in the line, and optionally, its dimensions.  Additionally, a bounding box is drawn around it on the original image.
45 | 
46 | 6) Exit
47 | 


--------------------------------------------------------------------------------
/Screenshots/Blob Bounding Boxes.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Blob Bounding Boxes.jpg


--------------------------------------------------------------------------------
/Screenshots/Blob Y-Histogram.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Blob Y-Histogram.jpg


--------------------------------------------------------------------------------
/Screenshots/Blobs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Blobs.jpg


--------------------------------------------------------------------------------
/Screenshots/Bounding Box Width Histogram.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Bounding Box Width Histogram.jpg


--------------------------------------------------------------------------------
/Screenshots/Early Blob Bounding Boxes.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Early Blob Bounding Boxes.jpg


--------------------------------------------------------------------------------
/Screenshots/Early Reverse Binarized Blob Bounding Boxes.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Early Reverse Binarized Blob Bounding Boxes.jpg


--------------------------------------------------------------------------------
/Screenshots/Early Y-Histogram on Pecha.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/Early Y-Histogram on Pecha.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/dza_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/dza_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/ga_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/ga_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/la_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/la_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/ma_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/ma_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/na_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/na_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/pha_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/pha_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/sa_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/sa_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/sha_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/sha_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/tha_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/tha_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/ya_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/ya_char.jpg


--------------------------------------------------------------------------------
/Screenshots/example_output_to_recognizer/zha_char.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writepython/tibetan-ocr/ac7fbe3096d84fb4006225e0f9adb00b664b0332/Screenshots/example_output_to_recognizer/zha_char.jpg


--------------------------------------------------------------------------------
/character_segmentation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys, os, operator, time, datetime
  3 | 
  4 | from math import sqrt
  5 | from PIL import Image
  6 | 
  7 | from opencv.cv import *
  8 | from opencv.highgui import *
  9 | 
 10 | from pyblobs.BlobResult import CBlobResult
 11 | from pyblobs.Blob import CBlob # Note: This must be imported in order to destroy blobs and use other methods
 12 | 
 13 | from config import *
 14 | from line_drawing import *
 15 | 
 16 | class FakeBlob:
 17 |     def __init__(self, minx=0, miny=0, maxx=0, maxy=0):
 18 |         self.minx = str(minx)
 19 |         self.miny = str(miny)
 20 |         self.maxx = str(maxx)
 21 |         self.maxy = str(maxy)    
 22 | 
 23 | ## def output_blob(blob,output_dir="/home/ryan/openCV/b/"):
 24 | ##     blob_height = int(blob.maxy-blob.miny)
 25 | ##     blob_width = int(blob.maxx-blob.minx)
 26 | ##     blob_size = cvSize(blob_width,blob_height)
 27 | ##     blob_image = cvCreateImage(blob_size, 8, 1)
 28 | ##     cvZero(blob_image)
 29 | ##     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
 30 | ##     info_string = "w_%04d__h_%04d__miny_%04d.png"
 31 | ##     info = info_string % (blob_width, blob_height, int(blob.miny))
 32 | ##     cvSaveImage(output_dir+info, blob_image)
 33 | 
 34 | def distance_between_points(point1, point2):
 35 |     squared_distance1 = (point1[0]-point2[0])**2
 36 |     squared_distance2 = (point1[1]-point2[1])**2
 37 |     distance = sqrt(squared_distance1+squared_distance2)
 38 |     return distance
 39 | 
 40 | def get_blobs_sorted_by_distance_from_previous_blob(blobs, previous_blob):
 41 |     distance_blob_tuples = []
 42 |     for blob in blobs:
 43 |         try:
 44 |             distance_blob_tuples.append( (distance_between_points((int(blob.minx),int(blob.miny)), (int(previous_blob.maxx),int(previous_blob.miny))), blob) )
 45 |         except:
 46 |             print "type blob: %s, type previous: %s" % (type(blob), type(previous_blob))
 47 |     distance_blob_tuples.sort()
 48 |     return [tup[1] for tup in distance_blob_tuples]
 49 |     
 50 | ## FIX LINE VAR VARAIBLE NAMES
 51 | def find_corresponding_anchor(sup_or_sub,anchors,line_num):
 52 |     for anchor in anchors:
 53 |         if (sup_or_sub.minx >= anchor.minx) and (sup_or_sub.maxx <= anchor.maxx):
 54 |             anchor.sup = sup_or_sub
 55 | 
 56 | def vp(blob, vp_dir, left_right_margin = 10, show_blobs=True):
 57 |     def get_col_miny_maxy(column):
 58 |         miny = 0
 59 |         maxy = 0
 60 |         for i,pixel_value in enumerate(column):
 61 |             if pixel_value == 255:
 62 |                 maxy = i
 63 |                 if not miny:
 64 |                     miny = i
 65 |         return miny,maxy
 66 |     print "NEW VP BLOB"
 67 |     colsum_colnum_col_tuples = []
 68 |     vp_point_pairs = []
 69 | 
 70 |     blob_height = int(blob.maxy-blob.miny)
 71 |     blob_width = int(blob.maxx-blob.minx)
 72 |     blob_size = cvSize(blob_width,blob_height)
 73 |     blob_image = cvCreateImage(blob_size, 8, 1)
 74 |     cvZero(blob_image)
 75 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
 76 | 
 77 |     if show_blobs:
 78 |         blob_image_2 = cvCreateImage(blob_size, 8, 3)    
 79 |         cvRectangle(blob_image_2, cvPoint(0,0), cvPoint(blob_width, blob_height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
 80 |         blob.FillBlob(blob_image_2, CV_RGB(0,255,0), -1*int(blob.minx), -1*int(blob.miny))        
 81 | 
 82 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
 83 |     num_cols = int(blob_image.cols)
 84 |     num_cols_minus_margin = num_cols - left_right_margin
 85 | 
 86 |     for (i, col) in enumerate(blob_image.colrange()):
 87 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
 88 |         if (i+1 < min_char_width) or (i > num_cols_minus_margin):
 89 |             continue
 90 |         col_sum = cvSum(col)
 91 |         colsum_colnum_col_tuples.append((int(col_sum[0]),i,col))
 92 | 
 93 |     colsum_colnum_col_tuples.sort()
 94 | 
 95 |     new_vp_colsum_colnum_col = colsum_colnum_col_tuples[0]
 96 |     new_vp_col = new_vp_colsum_colnum_col[1]
 97 |     delta_miny, delta_maxy = get_col_miny_maxy(new_vp_colsum_colnum_col[2])
 98 |     point1 = cvPoint(int(blob.minx)+new_vp_col, int(blob.miny)+delta_miny)
 99 |     point2 = cvPoint(int(blob.minx)+new_vp_col, int(blob.miny)+delta_maxy)
100 |     vp_point_pairs = (point1,point2)
101 | 
102 |     if show_blobs:
103 |         point1 = cvPoint(0+new_vp_col, 0+delta_miny)
104 |         point2 = cvPoint(0+new_vp_col, 0+delta_maxy)
105 |         cvDrawLine(blob_image_2, point1, point2, CV_RGB(255,0,255), 2, 8, 0)
106 |         info_string = "%s__%04d__%04d.png"
107 |         info = info_string % (str(datetime.datetime.now()), blob_width, blob_height)
108 |         cvSaveImage(os.path.join(vp_dir, info), blob_image_2)
109 | ##         info_string = "%04d__%04d__%s.png"
110 | ##         info = info_string % (blob_width, blob_height, str(datetime.datetime.now())) 
111 | ##         cvSaveImage(os.path.join(vp_dir, info, blob_image_2)
112 | ##    cvReleaseImage(blob_image)    
113 |     return vp_point_pairs
114 | 
115 | def hp(blob, hp_dir, show_blobs=True):
116 |     print "NEW HP BLOB"
117 |     def get_row_minx_maxx(row):
118 |         minx = 0
119 |         maxx = 0
120 |         for i,pixel_value in enumerate(row):
121 |             if pixel_value == 255:
122 |                 maxx = i
123 |                 if not minx:
124 |                     minx = i
125 |         return minx,maxx
126 | 
127 |     hp_point_pairs = []
128 | 
129 |     blob_height = int(blob.maxy-blob.miny)
130 |     blob_width = int(blob.maxx-blob.minx)
131 |     blob_size = cvSize(blob_width,blob_height)
132 |     blob_image = cvCreateImage(blob_size, 8, 1)
133 |     cvZero(blob_image)
134 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
135 | 
136 |     if show_blobs:
137 |         hp_point_pairs_2 = []        
138 |         blob_image_2 = cvCreateImage(blob_size, 8, 3)    
139 |         cvRectangle(blob_image_2, cvPoint(0,0), cvPoint(blob_width, blob_height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
140 |         blob.FillBlob(blob_image_2, CV_RGB(0,255,0), -1*int(blob.minx), -1*int(blob.miny))
141 |     
142 | ##    for acceptable_row_range in acceptable_row_ranges:
143 |     rowsum_rownum_row_tuples = []
144 |     hp_rows = []
145 | 
146 |     for (i, row) in enumerate(blob_image.rowrange()):
147 | ##            if acceptable_row_range[0] <= i <= acceptable_row_range[1]:
148 |         row_sum = cvSum(row)
149 |         rowsum_rownum_row_tuples.append((int(row_sum[0]),i,row))
150 |     rowsum_rownum_row_tuples.sort()
151 |     lowest_rowsum_tuple = rowsum_rownum_row_tuples[0]
152 |     new_hp_row = lowest_rowsum_tuple[1]
153 |     delta_minx, delta_maxx = get_row_minx_maxx(lowest_rowsum_tuple[2])
154 |     point1 = cvPoint(int(blob.minx)+delta_minx, int(blob.miny)+new_hp_row)
155 |     point2 = cvPoint(int(blob.minx)+delta_maxx, int(blob.miny)+new_hp_row)
156 |     hp_points = (point1, point2)
157 |     if show_blobs:
158 | ##        for acceptable_row_range in acceptable_row_ranges:
159 |         rowsum_rownum_row_tuples = []
160 |         hp_rows = []
161 |         
162 |         for (i, row) in enumerate(blob_image.rowrange()):
163 |         ##    if acceptable_row_range[0] <= i <= acceptable_row_range[1]:
164 |             row_sum = cvSum(row)
165 |             rowsum_rownum_row_tuples.append((int(row_sum[0]),i,row))
166 |         rowsum_rownum_row_tuples.sort()
167 |         lowest_rowsum_tuple = rowsum_rownum_row_tuples[0]
168 |         new_hp_row = lowest_rowsum_tuple[1]
169 |         delta_minx, delta_maxx = get_row_minx_maxx(lowest_rowsum_tuple[2])
170 |         point1 = cvPoint(0+delta_minx, 0+new_hp_row)
171 |         point2 = cvPoint(0+delta_maxx, 0+new_hp_row)
172 |         hp_point_pairs_2.append([point1,point2])        
173 |         for point_pair in hp_point_pairs_2:
174 |             cvDrawLine(blob_image_2, point_pair[0], point_pair[1], CV_RGB(255,0,0), 2, 8, 0)
175 |         info_string = "w_%04d__h_%04d.png"
176 |         info = info_string % (blob_width, blob_height)
177 |         cvSaveImage(os.path.join(hp_dir, info), blob_image_2)            
178 |     return hp_points
179 | 
180 | def get_blob_line(blob, y_values):
181 |     diff_linenum_list = [ [abs(blob.min_y - y), i+1] for (i, y) in enumerate(y_values[1:]) ]
182 |     return min(diff_linenum_list)[1]
183 |     
184 | def process_image(bi_image, folio_hp_directory, folio_vp_directory, folio_intermediate_directory, folio_characters_directory, folio_name, num_lines, previous_blob_count=0):
185 | 
186 |     image_size = cvGetSize(bi_image)
187 |     image_width = bi_image.width
188 |     image_height = bi_image.height
189 |     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
190 |     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
191 |     mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
192 |     cvSet(mask,1)
193 |     initial_blobs = CBlobResult(gray_image, mask, 100, False)
194 |     initial_blob_count = initial_blobs.GetNumBlobs()
195 |     print "%s Initial Blob Count:  %s" % (folio_name, initial_blob_count)
196 |     print "%s - Width %s - Height %s" % (folio_name, image_width, image_height)
197 | 
198 |     need_to_rerun = False
199 |     need_to_find_first_character = True
200 |     blob_list = [initial_blobs.GetBlob(i) for i in range(1,initial_blob_count)]
201 |     blobs = []
202 | 
203 |     for blob in blob_list:
204 |         width = int(blob.maxx) - int(blob.minx)
205 |         height = int(blob.maxy) - int(blob.miny)
206 |         blob_miny = int(blob.miny)
207 |         blob_maxy = int(blob.maxy)
208 | 
209 |         blob.width = int(blob.maxx) - int(blob.minx)
210 |         blob.height = int(blob.maxy) - int(blob.miny)
211 |         blob.min_y = int(blob.miny)
212 |         blob.max_y = int(blob.maxy)
213 |         blob.min_x = int(blob.minx)
214 |         blob.max_x = int(blob.maxx)        
215 |         # area = int(blob.area)
216 |         if max_width_b4_cutoff > width > min_char_width and max_height_b4_cutoff > height > min_char_height:
217 |                 blobs.append(blob)
218 |         elif width > min_char_width and height > min_char_height:
219 |             if width >= max_width_b4_cutoff:
220 |                 need_to_rerun = True
221 |                 vp_point_0, vp_point_1 = vp(blob, folio_vp_directory)
222 |                 cvDrawLine(bi_image, vp_point_0, vp_point_1, CV_RGB(255,255,255), 2, 8, 0)
223 |             elif height >= max_height_b4_cutoff:
224 |                 need_to_rerun = True
225 |                 hp_point_0, hp_point_1 = hp(blob, folio_hp_directory)
226 |                 cvDrawLine(bi_image, hp_point_0, hp_point_1, CV_RGB(255,255,255), 2, 8, 0)
227 |     if need_to_rerun and initial_blob_count - previous_blob_count > 1:
228 |         process_image(bi_image, folio_hp_directory, folio_vp_directory, folio_intermediate_directory, folio_characters_directory, folio_name, num_lines, previous_blob_count=initial_blob_count)        
229 |     else:
230 |         print "FINISHED CHOPPING BLOBS for %s" % folio_name
231 |         cvSaveImage(os.path.join(folio_intermediate_directory,"blobs_chopped.png"), bi_image)
232 |         # create images needed for misc. drawings 
233 |         line_drawing_image = cvCloneImage(bi_image)
234 |         bounding_box_image = cvCloneImage(bi_image)
235 |         black_square_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
236 |         cvRectangle(black_square_image, cvPoint(0,0), cvPoint(black_square_image.width, black_square_image.height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
237 |         line_drawing_from_black_squares_image = cvCloneImage(bi_image)
238 |         curved_bottom_line_drawing_image = cvCloneImage(bi_image)        
239 | 
240 |         create_black_square_image(blobs, folio_intermediate_directory, black_square_image)        
241 |         y_values = get_y_values_and_create_line_drawing(blobs, folio_intermediate_directory, line_drawing_image, num_lines)
242 |         y_bottom_values = create_line_drawing_from_black_squares(line_drawing_from_black_squares_image, black_square_image, folio_intermediate_directory, y_values)
243 |         create_curved_bottom_line_drawing_image(curved_bottom_line_drawing_image, folio_intermediate_directory, y_bottom_values)
244 |         # y_values = get_y_values_and_create_multiline_drawing(blobs, folio_intermediate_directory, line_drawing_image, num_lines=8)        
245 |         y_values.insert(0, 0)
246 |         create_histogram(blobs, folio_intermediate_directory)
247 |         create_bounding_box_image(blobs, folio_intermediate_directory, bounding_box_image)
248 | 
249 |         # character output
250 |         char_string = "text_%02d__folio_%02d__line_%02d__char_%02d__miny_%04d.png"
251 |         line_color_dict = {1:(45,106,19), 2:(255,0,255), 3:(0,255,255), 4:(255,0,0), 5:(0,0,255),
252 |                            6:(0,255,0), 7:(213,111,56), 8:(50,5,150), 9:(205,5,0), 10:(56,70,105),
253 |                            11:(222,35,5), 12:(25,250,14), 13:(40,40,45), 14:(50,55,60), 15:(23,11,156),
254 |                            16:(240, 0, 240)
255 |                            }
256 |         
257 |         blobs = sorted( blobs, key=operator.attrgetter('min_x') )
258 |         line_counters = [1] * (num_lines+1)
259 |         for blob in blobs:
260 |             line = get_blob_line(blob, y_values)
261 |             char_num = line_counters[line]
262 |             line_counters[line] = line_counters[line] + 1
263 | 
264 |             char_image_name = char_string % (0, 0, line, char_num, blob.min_y)
265 |             blob_height = blob.max_y - blob.min_y
266 |             blob_width = blob.max_x - blob.min_x
267 |             blob_size = cvSize(blob_width,blob_height)
268 |             blob_image = cvCreateImage(blob_size, 8, 1)
269 |             cvRectangle(blob_image, cvPoint(0,0), cvPoint(blob_image.width, blob_image.height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
270 |             blob.FillBlob(blob_image, CV_RGB(0,0,0), -1*blob.min_x, -1*blob.min_y)
271 |             cvSaveImage(os.path.join(folio_characters_directory,char_image_name), blob_image)            
272 |             cvRectangle(bi_image,
273 |                         cvPoint(blob.min_x, blob.min_y),
274 |                         cvPoint(blob.max_x, blob.max_y),
275 |                         CV_RGB(*(line_color_dict[line])), 1, 8, 0 
276 |                         )            
277 |         cvSaveImage(os.path.join(folio_intermediate_directory, "FINISHED.png"), bi_image)
278 |         print "line_counters: %s" % line_counters
279 | 
280 |     
281 | if __name__ == "__main__":
282 |     for folio_image_name in os.listdir(INPUT_IMAGE_DIR):
283 |         try:
284 |             folio_name = "%s_%s" % ( datetime.datetime.now().strftime("Y%Y_m%m_d%d_H%H_M%M_S%S"), folio_image_name.rsplit('.')[0] )
285 |             base_folio_directory = os.path.join(OUTPUT_IMAGE_DIR, folio_name)
286 |             folio_hp_directory = os.path.join(base_folio_directory, 'hp')
287 |             folio_vp_directory = os.path.join(base_folio_directory, 'vp')
288 |             folio_intermediate_directory = os.path.join(base_folio_directory, 'intermediate')
289 |             folio_characters_directory = os.path.join(base_folio_directory, 'characters')            
290 |             os.makedirs(base_folio_directory)
291 |             os.makedirs(folio_hp_directory)
292 |             os.makedirs(folio_vp_directory)
293 |             os.makedirs(folio_intermediate_directory)
294 |             os.makedirs(folio_characters_directory)                        
295 |         except:
296 |             print "Cannot make directories for image: %s" % folio_name
297 |         binary_image = cvLoadImage(os.path.join(INPUT_IMAGE_DIR, folio_image_name))
298 |         if binary_image:
299 |             process_image(binary_image, folio_hp_directory, folio_vp_directory, folio_intermediate_directory, folio_characters_directory, folio_name, num_lines=8)
300 |         else:
301 |             print "Cannot load image: %s from directory: %s" % (folio_image_name, INPUT_IMAGE_DIR)
302 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | INPUT_IMAGE_DIR = "/media/extra/ocr/ocr_input_image_dir_alt/"
 2 | OUTPUT_IMAGE_DIR = "/media/extra/ocr/ocr_output_image_dir/"
 3 | 
 4 | min_char_width = 20
 5 | min_char_height = 27
 6 | 
 7 | max_width_b4_cutoff = 80
 8 | max_height_b4_cutoff = 120
 9 | 
10 | min_sub_super_width = 21
11 | min_sub_super_height = 14
12 | max_sub_super_width_b4_cutoff = 66
13 | max_sub_super_height_b4_cutoff = 38
14 | 
15 | MAKE_HISTOGRAM = True
16 | MAKE_BOUNDING_BOX_IMAGE = True
17 | 


--------------------------------------------------------------------------------
/get_min_max_char_width.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import operator
 4 | import time
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from opencv.cv import *
 9 | from opencv.highgui import *
10 | 
11 | from pyblobs.BlobResult import CBlobResult
12 | from pyblobs.Blob import CBlob # Note: This must be imported in order to destroy blobs and use other methods
13 | 
14 | def get_min_max_char_width():
15 |     cwd = os.getcwd()    
16 |     char_dir_path = cwd + '/characters/WIDTH/'
17 |     for char in os.listdir(char_dir_path):
18 |         bi_image  = cvLoadImage(char_dir_path+char)
19 |         gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
20 |         cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
21 |         mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
22 |         cvSet(mask,1)
23 |         initial_blobs = CBlobResult(gray_image, mask, 100, False)
24 |         initial_blob_count = initial_blobs.GetNumBlobs()
25 |         print "initial_blob_count: ", initial_blob_count
26 |         time.sleep(3)
27 | 
28 |     
29 | if __name__ == '__main__':
30 |     get_min_max_char_width()
31 | 
32 |     
33 | 


--------------------------------------------------------------------------------
/legacy_functions/2009_07_14.txt:
--------------------------------------------------------------------------------
 1 | run the boxing algo
 2 | and get top of lines
 3 | then run the vertical projection code from top of line to another Y pixels down , where Y is the avg line height
 4 | then do that all the way to Y/2
 5 | get the avg of those vertical projections
 6 | the min of that will indicate character boundaries.
 7 | the code fragment to do vpp in C will help
 8 | just to do better character segmenting
 9 | 
10 | 


--------------------------------------------------------------------------------
/legacy_functions/avg_box_width.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | from stats import mean, lmean, stdev, lstdev
 4 | 
 5 | from opencv.cv import *
 6 | from opencv.highgui import *
 7 | 
 8 | def get_avg_box_width():
 9 |     box_widths = []
10 |     
11 |     filename = './image/test_bi3.jpg'
12 |     image = cvLoadImage(filename, CV_8UC1)
13 |     storage = cvCreateMemStorage(0)
14 |     input_image = cvCloneImage(image)
15 | #    output_image = cvCloneImage(image)
16 |     output_image = cvCreateImage(cvGetSize(input_image), 8, 3)
17 |     cvCvtColor(input_image, output_image, CV_GRAY2BGR)
18 |     count, contours = cvFindContours (input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint (0,0))
19 |     for contour in contours.hrange():
20 |         bbox = cvBoundingRect(contour, 0)
21 |         box_width = bbox.width
22 |         if 100 > box_width > 10:
23 |             box_widths.append(box_width)
24 | #    return box_widths
25 |     width_mean = mean(box_widths)
26 |     width_lmean = lmean(box_widths)
27 |     width_stdev = stdev(box_widths)
28 |     width_lstdev = lstdev(box_widths)    
29 |     return (width_mean,width_lmean,width_stdev,width_lstdev)
30 | 


--------------------------------------------------------------------------------
/legacy_functions/count_chars.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def count_chars():
 4 |     cwd = os.getcwd()    
 5 |     char_dir_path = cwd + '/characters/'
 6 |     char_dirs = os.listdir(char_dir_path)
 7 |     for char_dir in char_dirs:
 8 |         print char_dir, ' -> ', len(os.listdir(char_dir_path+char_dir))
 9 | 
10 | if __name__ == '__main__':
11 |     count_chars()
12 | 
13 |     
14 | 


--------------------------------------------------------------------------------
/legacy_functions/create_bbox.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | 
  5 | from opencv.cv import *
  6 | from opencv.highgui import *
  7 | 
  8 | from pyblobs.BlobResult import CBlobResult
  9 | from pyblobs.Blob import CBlob # Note: This must be imported in order to destroy blobs and use other methods
 10 | 
 11 | def output_images(input_image_filename, output_image_directory):
 12 |     bi_image  = cvLoadImage(input_image_filename)
 13 |     output_image = cvCloneImage(bi_image)
 14 |     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
 15 |     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
 16 |     
 17 |     mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
 18 |     cvSet(mask,1)
 19 |     initial_blobs = CBlobResult(gray_image, mask, 100, False)
 20 |     #initial_blobs.filter_blobs(10, 10000)
 21 |     initital_blob_count = initial_blobs.GetNumBlobs()
 22 |     print initital_blob_count
 23 |     
 24 |     for i in range(initital_blob_count):
 25 |         blob = initial_blobs.GetBlob(i)
 26 | ##         if 50 < blob.area < 9500:        
 27 | ##             rect_dims.append((blob.minx,blob.miny,blob.maxx,blob.maxy,i))
 28 | ##     for i in range(initital_blob_count):
 29 | ##         blob = initial_blobs.GetBlob(i)
 30 | ##         for rect_dim in rect_dims:
 31 | ##             if blob.minx > rect_dim[0] and blob.miny > rect_dim[1] and blob.maxx < rect_dim[2] and blob.maxy < rect_dim[3]:
 32 | ##                 bigger_blob = initial_blobs.GetBlob(rect_dim[4])
 33 | ##                 new_blob = bigger_blob.CopyEdges(blob)
 34 | ##                 final_blobs.AddBlob(new_blob)
 35 | ##             else:
 36 | ##                 final_blobs.AddBlob(blob)
 37 | ##     new_blob_count = final_blobs.GetNumBlobs()
 38 | ##     print new_blob_count
 39 | ##     for i in range(new_blob_count):
 40 | ##         blob = final_blobs.GetBlob(i)
 41 |         
 42 |             #print "%d: Area = %d" % (i, my_enumerated_blob.Area())
 43 |             #blob.FillBlob(output_image, CV_RGB(255,0,0), 0, 0)    
 44 |         cvRectangle(output_image,
 45 |                     cvPoint(int(blob.minx),int(blob.miny)),
 46 |                     cvPoint(int(blob.maxx),int(blob.maxy)),
 47 |                     CV_RGB(0,255,0), 1, 8, 0 
 48 |                     )
 49 | 
 50 |     cvSaveImage(output_image_directory+"input.jpg", gray_image)
 51 |     cvSaveImage(output_image_directory+"output.jpg", output_image)
 52 | 
 53 | def draw_bounding_boxes(input_image_filename, output_image_filename):
 54 |     bi_image  = cvLoadImage(input_image_filename)
 55 |     output_image = cvCloneImage(bi_image)
 56 |     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
 57 |     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
 58 |     
 59 |     mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
 60 |     cvSet(mask,1)
 61 |     initial_blobs = CBlobResult(gray_image, mask, 100, False)
 62 |     #initial_blobs.filter_blobs(10, 10000)
 63 |     initital_blob_count = initial_blobs.GetNumBlobs()
 64 |     print initital_blob_count
 65 |     
 66 |     for i in range(initital_blob_count):
 67 |         blob = initial_blobs.GetBlob(i)
 68 | ##         if 50 < blob.area < 9500:        
 69 | ##             rect_dims.append((blob.minx,blob.miny,blob.maxx,blob.maxy,i))
 70 | ##     for i in range(initital_blob_count):
 71 | ##         blob = initial_blobs.GetBlob(i)
 72 | ##         for rect_dim in rect_dims:
 73 | ##             if blob.minx > rect_dim[0] and blob.miny > rect_dim[1] and blob.maxx < rect_dim[2] and blob.maxy < rect_dim[3]:
 74 | ##                 bigger_blob = initial_blobs.GetBlob(rect_dim[4])
 75 | ##                 new_blob = bigger_blob.CopyEdges(blob)
 76 | ##                 final_blobs.AddBlob(new_blob)
 77 | ##             else:
 78 | ##                 final_blobs.AddBlob(blob)
 79 | ##     new_blob_count = final_blobs.GetNumBlobs()
 80 | ##     print new_blob_count
 81 | ##     for i in range(new_blob_count):
 82 | ##         blob = final_blobs.GetBlob(i)
 83 |         
 84 |             #print "%d: Area = %d" % (i, my_enumerated_blob.Area())
 85 |             #blob.FillBlob(output_image, CV_RGB(255,0,0), 0, 0)    
 86 |         cvRectangle(output_image,
 87 |                     cvPoint(int(blob.minx),int(blob.miny)),
 88 |                     cvPoint(int(blob.maxx),int(blob.maxy)),
 89 |                     CV_RGB(0,255,0), 1, 8, 0 
 90 |                     )
 91 | 
 92 |     cvSaveImage(output_image_filename, gray_image)
 93 |     cvSaveImage(output_image_filename, output_image)    
 94 | 
 95 | if __name__ == "__main__":
 96 |     if sys.argv[0]=="output_images":
 97 |         input_image_filename = sys.argv[1]
 98 |         output_image_directory  = sys.argv[2]
 99 |         output_images(input_image_filename, output_image_directory)
100 |     elif sys.argv[0]=="draw_bounding_boxes":
101 |         input_image_filename = sys.argv[1]
102 |         output_image_filename  = sys.argv[2]
103 |         draw_bounding_boxes(input_image_filename, output_image_filename)
104 |     else:
105 |         print """
106 |         Options:
107 |         1. output_images [input_image_filename] [output_image_directory]
108 |         2. draw_bounding_boxes [input_image_filename] [output_image_filename]
109 |         """
110 |         
111 | ### Get the size of the contour
112 | ##size = abs(cvContourArea(contour))
113 | 
114 | ### Is convex
115 | ##is_convex = cvCheckContourConvexity(contour)
116 | 


--------------------------------------------------------------------------------
/legacy_functions/create_bbox_contours.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | from opencv.cv import *
 4 | from opencv.highgui import *
 5 | 
 6 | ##abc = ['__class__', '__del__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattr__', '__getattribute__', '__getitem__', '__hash__', '__init__', '__iter__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__swig_destroy__', '__swig_getmethods__', '__swig_setmethods__', '__weakref__', '_s', 'append', 'block_max', 'cast', 'delta_elems', 'elem_size', 'first', 'flags', 'free_blocks', 'h_next', 'h_prev', 'header_size', 'hrange', 'pop', 'ptr', 'storage', 'this', 'total', 'v_next', 'v_prev', 'vrange']
 7 | ### Get the size of the contour
 8 | ##size = abs(cvContourArea(contour))
 9 | ##
10 | ### Is convex
11 | ##is_convex = cvCheckContourConvexity(contour)
12 | ##for i in range(a[91].total):
13 | ##	print a[91][i]
14 | def create_bbox():
15 |     i=1
16 |     d=[]
17 |     #f=open('./abc.txt','w')
18 |     
19 |     raw_image_filename = './image/test_raw.jpg'
20 |     bi_image_filename = './image/test_bi2.jpg'
21 | 
22 |     raw_image = cvLoadImage(raw_image_filename)
23 |     bi_image = cvLoadImage(bi_image_filename)
24 | 
25 |     output_image = cvCloneImage(raw_image)
26 |     gray = cvCreateImage(cvGetSize(bi_image), 8, 1)
27 |     #output_image = cvCloneImage(white_image)
28 |     #output_image = cvCloneImage(raw_image)
29 |     cvCvtColor(bi_image, gray, cv.CV_BGR2GRAY)
30 |     
31 |     #cv.cvAdaptiveThreshold(bi_image, gray, 255, cv.CV_ADAPTIVE_THRESH_MEAN_C, cv.CV_THRESH_BINARY) 
32 |     storage = cvCreateMemStorage(0)
33 |     count, contours = cvFindContours (gray, storage, sizeof_CvContour, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, cvPoint (0,0))
34 | ##  Alt method
35 | #    contours = cvApproxPoly(contours, sizeof_CvContour, storage, CV_POLY_APPROX_DP, 0, 1)
36 |     for contour in contours.hrange():
37 |         contour_size = abs(cvContourArea(contour))
38 |         if 50 < contour_size < 9500:
39 |             bbox = cvBoundingRect(contour, 0)
40 | ##        box_width = bbox.width
41 | ##        box_height = bbox.height
42 | ##        if 100 > box_width > 10:
43 | ##            if box_height > 15:  
44 |             cvRectangle(output_image, cvPoint(int(bbox.x), int(bbox.y)),
45 |                      cvPoint(int(bbox.x+bbox.width), int(bbox.y+bbox.height)),
46 |                      CV_RGB(0,255,0), 1, 8, 0)      
47 | 
48 |     #cvDrawContours(output_image, contours, CV_RGB(255,0,0), CV_RGB(0,0,255), 2, 1, 8, cvPoint (0,0))
49 |     cvSaveImage("./image/input.jpg", gray)
50 |     cvSaveImage("./image/output.jpg", output_image)
51 |     
52 | 
53 | if __name__ == "__main__":
54 |     create_bbox()
55 | 


--------------------------------------------------------------------------------
/legacy_functions/cum_sum.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image, ImageDraw
  2 | 
  3 | import numpy as np
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | from matplotlib import pyplot
  7 | 
  8 | from opencv.cv import *
  9 | from opencv.highgui import *
 10 | 
 11 | def plot_y(x,y):
 12 |     fig = pyplot.figure()
 13 |     ax = fig.add_subplot(111)
 14 |     ax.plot(x,y)
 15 | 
 16 |     ax.set_xlabel('y value')
 17 |     ax.set_ylabel('cum_sum_count')
 18 |     ax.grid(True)
 19 | 
 20 |     fig.savefig('/home/ryan/ocr/openCV/test/image/y_value_hist_04.png')
 21 | 
 22 | def get_counts(array):
 23 |     y_vals_with_counts = [(y,array.count(y)) for y in array]
 24 |     y_vals_with_counts_distinct = set(y_vals_with_counts)
 25 |     y_vals_with_counts_distinct_list = list(y_vals_with_counts_distinct)
 26 |     y_vals_with_counts_distinct_list.sort()
 27 |     return y_vals_with_counts_distinct_list                          
 28 | 
 29 | def cum_sum(array):
 30 |     set_with_counts = get_counts(array)
 31 |     last_five = [0,0,0,0,0]
 32 |     cum_sum_x = []
 33 |     cum_sum_y = []
 34 |     for (y,y_val_count) in set_with_counts:
 35 |         assert len(last_five) == 5
 36 | 	new_sum = sum(last_five)+y_val_count
 37 | 	cum_sum_x.append(y)
 38 | 	cum_sum_y.append(new_sum)
 39 | 	last_five.append(y_val_count)
 40 | 	last_five.pop(0)
 41 | ##    cum_summed.sort()
 42 | ##    cum_summed.reverse()
 43 |     plot_y(cum_sum_x,cum_sum_y)
 44 | 
 45 | def draw_lines_2(line_num_anchors_dict, input_image_path = "/home/ryan/ocr/test/input_image/2009_10_19/thresh48_BUM_BA_0010_01.png", min_dist_betw_lines=70, num_of_lines=8):
 46 |     im = Image.open(input_image_path)
 47 |     draw = ImageDraw.Draw(im)
 48 |     for line_num, anchors in line_num_anchors_dict.items():
 49 |         draw.line(anchors, fill=128)
 50 |     del draw 
 51 | 
 52 |     im.save("/home/ryan/ocr/test/input_image/2009_10_19/thresh48_BUM_BA_0010_01_poly.png")
 53 |     print "drawn"
 54 | 
 55 | def get_lines(array, bi_image, min_dist_betw_lines=70, num_of_lines=8):
 56 |     drawn_ys = []
 57 |     def should_draw_line(y1):
 58 |         for drawn_y in drawn_ys:
 59 |             if not abs(drawn_y-y1) > min_dist_betw_lines:
 60 |                 return False
 61 |         return True
 62 |     
 63 |     set_with_counts = get_counts(array)
 64 |     last_five = [0,0,0,0,0]
 65 |     cum_sum_with_y = []
 66 |     for (y,y_val_count) in set_with_counts:
 67 |         assert len(last_five) == 5
 68 | 	new_sum = sum(last_five)+y_val_count
 69 | 	cum_sum_with_y.append((new_sum,y))
 70 | 	last_five.append(y_val_count)
 71 | 	last_five.pop(0)
 72 |     cum_sum_with_y.sort()
 73 |     cum_sum_with_y.reverse()
 74 | 
 75 |     width = int(bi_image.width)
 76 |     for (count,y1) in cum_sum_with_y:
 77 |         if len(drawn_ys) >= num_of_lines:
 78 |             break
 79 |         if should_draw_line(y1):
 80 |             point1 = cvPoint(0,y1-3)
 81 |             point2 = cvPoint(width,y1-3)                
 82 |             drawn_ys.append(y1)
 83 |     drawn_ys.sort()
 84 |     print "drawn_ys: ",drawn_ys
 85 |     return drawn_ys
 86 | 
 87 | ##     lines_dict = dict([(i+1,y) for i,y in enumerate(drawn_ys)])
 88 | ##     print "lines_dict: ", lines_dict
 89 | ##     return lines_dict
 90 | 
 91 | 
 92 | ## def draw_lines(array, path_to_image_to_draw_on="/home/ryan/ocr/test/image/test_raw.png", min_dist_betw_lines=70):
 93 | ##     drawn_ys = []
 94 | ##     def should_draw_line(y1):
 95 | ##         for drawn_y in drawn_ys:
 96 | ##             if not abs(drawn_y-y1) > min_dist_betw_lines:
 97 | ##                 return False
 98 | ##         return True
 99 |     
100 | ##     set_with_counts = get_counts(array)
101 | ##     last_five = [0,0,0,0,0]
102 | ##     cum_sum_with_y = []
103 | ##     for (y,y_val_count) in set_with_counts:
104 | ##         assert len(last_five) == 5
105 | ## 	new_sum = sum(last_five)+y_val_count
106 | ## 	cum_sum_with_y.append((new_sum,y))
107 | ## 	last_five.append(y_val_count)
108 | ## 	last_five.pop(0)
109 | ##     cum_sum_with_y.sort()
110 | ##     cum_sum_with_y.reverse()
111 | 
112 | ##     im = Image.open(path_to_image_to_draw_on)
113 | ##     width = im.size[0]
114 | ##     draw = ImageDraw.Draw(im)    
115 | ##     for (count,y1) in cum_sum_with_y:
116 | ##         if len(drawn_ys) == 16:
117 | ##             break
118 | ##         if should_draw_line(y1):
119 | ##             line_dims = [0,y1-3,width,y1-3]
120 | ##             draw.line(line_dims, fill="red")
121 | ##             drawn_ys.append(y1)
122 | ##     del draw 
123 | ##     im.save("/home/ryan/ocr/test/output_image/image_with_lines_raw.png", "PNG")
124 | ##     print "drawn_ys: ",drawn_ys
125 | 
126 | ## def draw_lines_2(array, bi_image, min_dist_betw_lines=70, num_of_lines=8):
127 | ##     drawn_ys = []
128 | ##     def should_draw_line(y1):
129 | ##         for drawn_y in drawn_ys:
130 | ##             if not abs(drawn_y-y1) > min_dist_betw_lines:
131 | ##                 return False
132 | ##         return True
133 |     
134 | ##     set_with_counts = get_counts(array)
135 | ##     last_five = [0,0,0,0,0]
136 | ##     cum_sum_with_y = []
137 | ##     for (y,y_val_count) in set_with_counts:
138 | ##         assert len(last_five) == 5
139 | ## 	new_sum = sum(last_five)+y_val_count
140 | ## 	cum_sum_with_y.append((new_sum,y))
141 | ## 	last_five.append(y_val_count)
142 | ## 	last_five.pop(0)
143 | ##     cum_sum_with_y.sort()
144 | ##     cum_sum_with_y.reverse()
145 | 
146 | ##     width = int(bi_image.width)
147 | ##     for (count,y1) in cum_sum_with_y:
148 | ##         if len(drawn_ys) == num_of_lines:
149 | ##             break
150 | ##         if should_draw_line(y1):
151 | ##             point1 = cvPoint(0,y1-3)
152 | ##             point2 = cvPoint(width,y1-3)                
153 | ##             cvDrawLine(bi_image, point1, point2, CV_RGB(255,0,0), 1, 8, 0)
154 | ##             drawn_ys.append(y1)
155 | ##     cvSaveImage("/home/ryan/ocr/test/output_image/vp_lines_boxes.png", bi_image)
156 | ##     print "drawn_ys: ",drawn_ys
157 | 


--------------------------------------------------------------------------------
/legacy_functions/cut_images.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | 
 3 | from PIL import Image
 4 | 
 5 | def cut_images(input_dir, output_dir):
 6 |     for item in os.listdir(input_dir):
 7 |         item_path = os.path.join(input_dir,item)
 8 |         if os.path.isdir(item_path):
 9 |             print "Could not cut. Reason: Is a Directory: ", item
10 | 
11 |         else:
12 |             orig_image_name, orig_image_ext = os.path.splitext(item)
13 |             pill_image = Image.open(os.path.join(item_path))
14 |             w = pill_image.size[0]
15 |             h = pill_image.size[1]        
16 |         
17 |             top_half_region = pill_image.crop((0,0,w,h/2))
18 |             top_half_image = Image.new("RGB", top_half_region.size, color=None)
19 |             top_half_image.paste(top_half_region)
20 |             top_half_image.save(os.path.join(output_dir,orig_image_name+"_01"+orig_image_ext))
21 |         
22 |             bottom_half_region = pill_image.crop((0,h/2,w,h))
23 |             bottom_half_image = Image.new("RGB", bottom_half_region.size, color=None)
24 |             bottom_half_image.paste(bottom_half_region)            
25 |             bottom_half_image.save(os.path.join(output_dir,orig_image_name+"_02"+orig_image_ext))
26 | 
27 | if __name__ == "__main__":
28 |     abort = False    
29 |     num_args = len(sys.argv)
30 | 
31 |     if num_args == 2:
32 |         input_dir = output_dir = sys.argv[1]
33 |     elif num_args == 3:
34 |         input_dir = sys.argv[1]
35 |         output_dir = sys.argv[2]
36 |     else:
37 |         abort = True
38 | 
39 |     if not abort:
40 |         cut_images(input_dir, output_dir)
41 |         print "\nImages placed in: ", output_dir, "\n"
42 |     else:
43 |         print "\nUsage: python cut_images.py input_dir [output_dir]\n"
44 | 


--------------------------------------------------------------------------------
/legacy_functions/draw_lines.py:
--------------------------------------------------------------------------------
 1 | if not lines:
 2 | ## OUTPUT CHARACTER IMAGES
 3 |             from cum_sum import get_lines
 4 |             y1_array = []
 5 |             right_margin = image_width - left_margin
 6 |             bottom_margin = image_height - top_margin
 7 |             for i in range(1,initial_blob_count):
 8 |                 blob = initial_blobs.GetBlob(i)
 9 |                 minx = int(blob.minx)                
10 |                 maxx = int(blob.maxx)
11 |                 miny = int(blob.miny)
12 |                 maxy = int(blob.maxy)                
13 |                 width = maxx - minx
14 |                 height = maxy - miny
15 |                 if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
16 |                     if (minx > left_margin) and (maxx < right_margin) and (miny > top_margin) and (maxy < bottom_margin):                
17 |                         y1_array.append(miny)
18 |             lines = get_lines(y1_array, bi_image, num_of_lines=num_lines_on_pecha)
19 |             process_image(bi_image, lines=lines)
20 | 


--------------------------------------------------------------------------------
/legacy_functions/extra_statements.py:
--------------------------------------------------------------------------------
 1 | ## Get the size of the contour
 2 | size = abs(cvContourArea(contour))
 3 | 
 4 | ## Find out if is convex
 5 | is_convex = cvCheckContourConvexity(contour)
 6 | 
 7 | ## Draw Rectangle
 8 | cvRectangle(output_image,
 9 |             cvPoint(int(blob.minx),int(blob.miny)),
10 |             cvPoint(int(blob.maxx),int(blob.maxy)),
11 |             CV_RGB(0,255,0), 1, 8, 0 
12 |             )
13 | 
14 | ## Sort by blob attribute
15 | sorted(blobs_big_and_small,key=operator.attrgetter('area'))
16 | 
17 | ## Save image
18 | cvSaveImage("/home/ryan/ocr/openCV/test/image/input.png", gray_image)
19 | 
20 | ## DRAW POLYGONS
21 |             from cum_sum import draw_lines_2
22 |             line_num__empty_list_tuples = [(k,[]) for k in line_anchors.keys()]
23 |             d = dict(line_num__empty_list_tuples)
24 |             right_margin = image_width - left_margin
25 |             bottom_margin = image_height - top_margin
26 |             for line_num, anchors in line_anchors.items():
27 |                 for i,blob in enumerate(sorted(anchors,key=operator.attrgetter('minx'))):
28 |                     minx = int(blob.minx)
29 |                     maxx = int(blob.maxx)
30 |                     miny = int(blob.miny)
31 |                     maxy = int(blob.maxy)                
32 |                     width = maxx - minx
33 |                     height = maxy - miny
34 |                     if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
35 |                         if (minx > left_margin) and (maxx < right_margin) and (miny > top_margin) and (maxy < bottom_margin):                
36 |                             d[line_num].append((blob.minx, blob.miny))
37 |                             d[line_num].append((blob.maxx, blob.miny))
38 | 
39 |             print "draw lines"
40 |             draw_lines_2(d)
41 |             print 'draw lines done'
42 | 


--------------------------------------------------------------------------------
/legacy_functions/hp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import operator
 3 | import time
 4 | 
 5 | from PIL import Image
 6 | 
 7 | from opencv.cv import *
 8 | from opencv.highgui import *
 9 | 
10 | from pyblobs.BlobResult import CBlobResult
11 | from pyblobs.Blob import CBlob
12 | 
13 | def hp(blob, suspected_num_chars=2, min_char_height=27):
14 |     rowsum_rownum_tuples = []
15 |     hp_rows = []
16 |     hp_point_pairs = []
17 | 
18 |     blob_height = int(blob.maxy-blob.miny)
19 |     blob_width = int(blob.maxx-blob.minx)
20 |     blob_size = cvSize(blob_width,blob_height)
21 |     blob_image = cvCreateImage(blob_size, 8, 1)
22 |     cvZero(blob_image)
23 | 
24 | ## FOR WHITE BG
25 |     blob_image_2 = cvCreateImage(blob_size, 8, 3)    
26 |     cvRectangle(blob_image_2, cvPoint(0,0), cvPoint(blob_width, blob_height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
27 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
28 |     
29 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
30 |     num_rows = blob_image.rows
31 |     num_rows_minus_min_char_height = num_rows - min_char_height
32 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN (NOT BASED ON VARAIBLE)
33 | ##     num_rows = blob_image.rows
34 | ##     num_rows_minus_arb_value = num_rows - 5
35 | 
36 |     for (i, row) in enumerate(blob_image.rowrange()):
37 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
38 |         if i+1 < min_char_height or i > num_rows_minus_min_char_height:
39 |             continue
40 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN (NOT BASED ON VARAIBLE)
41 | ##         if i < 5 or i > num_rows_minus_arb_value:
42 | ##             continue
43 |         row_sum = cvSum(row)
44 |         rowsum_rownum_tuples.append((int(row_sum[0]),i+1))
45 | 
46 |     rowsum_rownum_tuples.sort()
47 |     chars_cut = 1
48 | 
49 |     for (i, rowsum_rownum) in enumerate(rowsum_rownum_tuples):
50 |         if chars_cut >= suspected_num_chars:
51 |             break
52 |         if i == 0:
53 |             new_hp_row = rowsum_rownum[1]
54 |             print "first_hp_row ", new_hp_row
55 |             point1 = cvPoint(int(blob.minx), int(blob.miny)+int(new_hp_row))
56 |             point2 = cvPoint(int(blob.maxx), int(blob.miny)+int(new_hp_row))            
57 |             hp_point_pairs.append([point1,point2])
58 |             hp_rows.append(new_hp_row)
59 |             chars_cut += 1            
60 |         else:
61 |             new_hp_row = rowsum_rownum[1]            
62 |             for hp_row in hp_rows:
63 |                 if not abs(hp_row-new_hp_row) < min_char_height:
64 |                     print "new_hp_row ", new_hp_row
65 |                     point1 = cvPoint(int(blob.minx), int(blob.miny)+int(new_hp_row))
66 |                     point2 = cvPoint(int(blob.maxx), int(blob.miny)+int(new_hp_row))
67 |                     hp_point_pairs.append([point1,point2])
68 |                     hp_rows.append(new_hp_row)
69 |                     chars_cut += 1
70 | 
71 |     if chars_cut != suspected_num_chars:
72 |         info_string = "%04d__%04d__suspected_%02d__cut_%02d.jpg"
73 |         info = info_string % (blob_width, blob_height, suspected_num_chars, chars_cut)
74 |         cvSaveImage("/home/ryan/openCV/hp_blobs/"+info, blob_image)
75 |     cvSaveImage("/home/ryan/openCV/output_images/7.jpg", blob_image)
76 |     return hp_point_pairs
77 | 
78 | input_image_filename = "/home/ryan/ocr/test/image/thresh48.jpg"
79 | bi_image  = cvLoadImage(input_image_filename)
80 | gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
81 | cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
82 | mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
83 | cvSet(mask,1)
84 | initial_blobs = CBlobResult(gray_image, mask, 100, False)
85 | initial_blob_count = initial_blobs.GetNumBlobs()
86 | 
87 | hp_point_pairs = hp(initial_blobs.GetBlob(2865), 2)
88 | drawing_image = cvCloneImage(bi_image)
89 | 
90 | for point_pair in hp_point_pairs:
91 |     cvDrawLine(drawing_image, point_pair[0], point_pair[1], CV_RGB(0,245,0), 2, 8, 0)
92 | 
93 | cvSaveImage("/home/ryan/openCV/output_images/5.jpg", bi_image)
94 | cvSaveImage("/home/ryan/openCV/output_images/6.jpg", drawing_image)
95 | 


--------------------------------------------------------------------------------
/legacy_functions/make_hist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | matplotlib.use('Agg')
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | box_widths = [12, 17, 27, 12, 14, 17, 13, 18, 11, 12, 21, 15, 11, 18, 14, 12, 12, 53, 14, 14, 12, 14, 12, 15, 19, 12, 11, 12, 25, 19, 17, 20, 27, 12, 30, 11, 11, 44, 23, 11, 24, 22, 17, 14, 13, 11, 18, 14, 14, 12, 11, 28, 11, 32, 38, 36, 12, 29, 33, 32, 31, 32, 37, 37, 66, 37, 33, 28, 42, 37, 29, 87, 30, 34, 35, 30, 31, 36, 35, 27, 36, 29, 28, 28, 14, 28, 29, 38, 36, 43, 79, 17, 41, 36, 66, 29, 30, 48, 39, 57, 22, 21, 36, 36, 45, 28, 73, 29, 26, 12, 31, 92, 40, 39, 32, 27, 43, 69, 88, 20, 33, 37, 36, 36, 29, 33, 31, 35, 66, 39, 39, 11, 30, 34, 11, 28, 35, 27, 35, 43, 14, 57, 35, 64, 65, 72, 36, 34, 19, 36, 28, 43, 25, 33, 77, 31, 45, 53, 34, 11, 39, 30, 36, 32, 34, 70, 35, 41, 57, 72, 29, 29, 13, 30, 46, 29, 48, 51, 36, 41, 59, 13, 32, 27, 32, 12, 43, 35, 57, 53, 19, 31, 30, 83, 29, 50, 65, 88, 52, 38, 94, 39, 13, 83, 30, 46, 39, 36, 38, 37, 30, 11, 34, 67, 37, 12, 13, 38, 29, 85, 97, 44, 44, 31, 32, 35, 26, 34, 47, 45, 32, 34, 42, 89, 50, 63, 37, 52, 42, 79, 60, 28, 20, 23, 26, 31, 26, 39, 34, 37, 36, 65, 34, 41, 34, 36, 39, 40, 31, 11, 34, 30, 35, 32, 35, 36, 33, 28, 30, 35, 33, 41, 60, 28, 30, 38, 33, 62, 29, 27, 30, 39, 59, 82, 98, 37, 92, 67, 59, 39, 49, 42, 34, 58, 65, 37, 30, 71, 28, 38, 49, 34, 14, 46, 34, 37, 35, 45, 52, 44, 49, 31, 40, 32, 38, 35, 68, 40, 30, 30, 68, 33, 28, 31, 34, 29, 78, 42, 28, 31, 76, 61, 30, 31, 26, 30, 44, 62, 31, 36, 37, 26, 34, 43, 30, 31, 44, 59, 66, 90, 56, 40, 96, 36, 36, 51, 70, 35, 35, 34, 36, 39, 37, 33, 73, 40, 51, 35, 31, 56, 38, 33, 63, 40, 70, 39, 36, 35, 11, 34, 29, 36, 33, 81, 72, 44, 41, 30, 38, 30, 11, 33, 81, 51, 25, 38, 33, 30, 11, 15, 60, 59, 58, 87, 50, 55, 54, 65, 77, 67, 25, 15, 75, 30, 13, 77, 36, 32, 76, 30, 40, 30, 35, 49, 35, 30, 32, 38, 31, 26, 41, 33, 92, 37, 37, 73, 40, 43, 16, 32, 64, 69, 90, 36, 33, 95, 65, 53, 67, 18, 72, 39, 22, 11, 35, 39, 26, 33, 31, 39, 89, 59, 11, 11, 23, 12, 11, 14, 41, 11, 12, 11, 12, 12, 16, 13, 12, 14, 11, 14, 23, 11, 15, 35, 13, 20, 54, 19, 38, 18, 13, 39, 14, 15, 13, 12, 35, 12, 11, 51, 15, 38, 37, 22, 58, 47, 40, 35, 31, 34, 32, 35, 39, 31, 31, 32, 28, 30, 30, 36, 36, 68, 37, 38, 40, 45, 30, 81, 76, 17, 35, 45, 30, 48, 33, 37, 35, 35, 35, 31, 57, 30, 84, 35, 37, 37, 57, 55, 28, 27, 24, 62, 60, 88, 36, 59, 59, 80, 59, 17, 87, 75, 27, 38, 11, 35, 24, 50, 67, 38, 74, 35, 74, 36, 31, 37, 54, 43, 41, 33, 52, 39, 28, 30, 52, 36, 37, 56, 34, 32, 39, 31, 29, 11, 29, 30, 85, 33, 79, 33, 45, 32, 37, 19, 38, 84, 62, 41, 71, 68, 39, 26, 27, 16, 33, 14, 53, 79, 52, 33, 34, 42, 26, 38, 11, 12, 26, 32, 35, 64, 36, 37, 36, 71, 34, 27, 31, 15, 42, 31, 31, 58, 40, 59, 43, 64, 85, 38, 36, 61, 34, 26, 44, 35, 33, 59, 39, 31, 11, 33, 35, 53, 65, 31, 37, 96, 38, 79, 36, 30, 26, 58, 24, 19, 55, 57, 58, 34, 60, 84, 30, 32, 53, 53, 33, 31, 42, 48, 34, 65, 62, 48, 15, 32, 31, 35, 26, 37, 32, 32, 33, 33, 41, 29, 37, 88, 42, 44, 71, 42, 39, 36, 28, 64, 39, 43, 34, 36, 31, 38, 67, 37, 52, 30, 38, 37, 42, 30, 38, 85, 34, 51, 30, 25, 67, 29, 14, 79, 59, 64, 33, 16, 65, 63, 56, 16, 46, 11, 32, 34, 76, 71, 27, 36, 33, 30, 33, 38, 33, 44, 42, 31, 83, 29, 29, 33, 38, 40, 37, 29, 30, 41, 34, 55, 11, 34, 31, 34, 85, 56, 36, 42, 39, 73, 32, 37, 95, 77, 44, 38, 95, 37, 41, 26, 62, 34, 36, 32, 62, 63, 77, 75, 41, 67, 37, 40, 30, 70, 46, 35, 34, 62, 95, 61, 30, 34, 31, 34, 34, 44, 26, 39, 29, 36, 95, 43, 44, 40, 28, 37, 36, 38, 64, 44, 40, 31, 77, 37, 31, 41, 79, 35, 30, 15, 39, 68, 35, 32, 35, 32, 34, 66, 11, 36, 65, 36, 36, 96, 11, 86, 35, 36, 30, 29, 82, 34, 34, 31, 47, 46, 39, 40, 47, 34, 69, 36, 77, 71, 47, 41, 76, 84, 64, 54, 57, 32, 69, 61, 66, 11, 68, 39, 39, 38, 89, 40, 35, 32, 14, 37, 11, 35, 29, 33, 99, 74, 45, 46, 40, 11, 36, 70, 31, 37, 37, 95, 30, 77, 70, 94, 64, 70, 32, 37, 32, 39, 15, 18, 18, 11, 57, 11, 13, 13, 39, 29, 17, 14, 12, 16, 27, 30, 19, 18, 18, 43, 11, 12, 19, 17, 20, 90, 27, 23, 44, 25, 11, 26, 19, 11, 19, 16, 12, 12, 58, 20, 11, 18, 11, 13, 12, 26, 19, 15, 17, 21, 11, 11, 17, 18, 13, 13, 13, 13]
 7 | x1s = [2511, 2493, 2487, 2486, 2485, 2477, 2461, 2452, 2449, 2419, 2399, 2397, 2394, 2374, 2357, 2338, 2337, 2323, 2305, 2258, 2226, 2225, 2224, 2224, 2224, 2224, 2224, 2224, 2223, 2222, 2222, 2221, 2220, 2220, 2220, 2220, 2216, 2215, 2214, 2214, 2213, 2213, 2212, 2212, 2211, 2211, 2211, 2210, 2209, 2208, 2208, 2208, 2207, 2206, 2206, 2206, 2205, 2201, 2200, 2197, 2194, 2194, 2193, 2192, 2188, 2184, 2156, 2142, 2140, 2140, 2140, 2140, 2139, 2139, 2139, 2138, 2138, 2138, 2138, 2137, 2136, 2136, 2134, 2133, 2133, 2132, 2132, 2132, 2131, 2131, 2131, 2130, 2130, 2128, 2126, 2125, 2123, 2123, 2121, 2121, 2119, 2119, 2118, 2117, 2114, 2112, 2111, 2111, 2108, 2108, 2106, 2104, 2100, 2097, 2091, 2065, 2057, 2057, 2057, 2056, 2056, 2056, 2056, 2056, 2055, 2055, 2055, 2055, 2054, 2054, 2054, 2054, 2054, 2054, 2054, 2054, 2053, 2052, 2051, 2050, 2048, 2048, 2047, 2045, 2044, 2044, 2044, 2044, 2043, 2042, 2042, 2042, 2033, 2032, 2032, 2030, 2027, 2026, 2022, 2018, 2015, 2007, 1994, 1976, 1976, 1976, 1974, 1974, 1974, 1971, 1970, 1970, 1970, 1968, 1968, 1967, 1965, 1964, 1964, 1963, 1963, 1963, 1962, 1961, 1961, 1960, 1955, 1947, 1945, 1944, 1943, 1942, 1940, 1940, 1940, 1940, 1923, 1904, 1899, 1892, 1892, 1892, 1891, 1891, 1891, 1890, 1890, 1890, 1889, 1889, 1888, 1887, 1886, 1886, 1886, 1886, 1885, 1885, 1884, 1882, 1882, 1882, 1880, 1880, 1880, 1880, 1879, 1879, 1878, 1878, 1877, 1876, 1875, 1870, 1868, 1868, 1868, 1866, 1865, 1864, 1864, 1862, 1861, 1861, 1860, 1859, 1857, 1856, 1856, 1854, 1847, 1825, 1812, 1804, 1804, 1804, 1803, 1803, 1803, 1803, 1803, 1803, 1803, 1803, 1802, 1802, 1802, 1802, 1802, 1802, 1802, 1801, 1801, 1801, 1801, 1801, 1801, 1800, 1797, 1797, 1797, 1797, 1797, 1797, 1796, 1795, 1795, 1794, 1794, 1794, 1793, 1793, 1793, 1792, 1792, 1781, 1781, 1780, 1780, 1777, 1777, 1776, 1775, 1774, 1770, 1767, 1766, 1764, 1718, 1718, 1718, 1718, 1718, 1718, 1718, 1717, 1717, 1717, 1716, 1716, 1716, 1716, 1716, 1716, 1716, 1715, 1715, 1715, 1714, 1713, 1713, 1713, 1712, 1712, 1712, 1712, 1712, 1712, 1711, 1710, 1710, 1709, 1707, 1701, 1698, 1696, 1696, 1696, 1696, 1693, 1691, 1690, 1690, 1688, 1686, 1667, 1649, 1635, 1635, 1635, 1635, 1635, 1634, 1634, 1634, 1634, 1634, 1634, 1634, 1633, 1633, 1633, 1632, 1632, 1632, 1632, 1631, 1629, 1629, 1629, 1628, 1626, 1622, 1620, 1614, 1614, 1612, 1612, 1610, 1609, 1608, 1608, 1605, 1604, 1604, 1604, 1603, 1603, 1558, 1534, 1484, 1464, 1380, 1359, 1333, 1309, 1294, 1236, 1216, 1201, 1196, 1152, 1124, 1103, 1040, 1027, 1017, 1010, 1003, 1002, 1001, 999, 998, 998, 997, 997, 996, 996, 996, 995, 994, 992, 991, 990, 990, 989, 989, 989, 988, 986, 985, 985, 985, 984, 984, 984, 984, 984, 984, 984, 984, 984, 984, 984, 984, 983, 983, 982, 974, 973, 973, 972, 964, 964, 964, 960, 960, 960, 960, 958, 958, 956, 955, 955, 919, 917, 916, 915, 912, 911, 908, 907, 907, 906, 905, 904, 903, 903, 903, 903, 903, 902, 901, 901, 901, 900, 900, 900, 900, 900, 900, 899, 899, 899, 899, 899, 899, 899, 899, 899, 898, 898, 898, 898, 897, 894, 892, 881, 880, 880, 879, 879, 878, 874, 872, 867, 831, 830, 828, 827, 827, 827, 826, 825, 824, 824, 823, 822, 822, 819, 818, 817, 817, 817, 817, 817, 816, 816, 816, 816, 815, 815, 815, 814, 814, 814, 814, 814, 813, 813, 813, 813, 813, 812, 812, 808, 805, 805, 802, 797, 796, 796, 792, 792, 792, 792, 792, 791, 791, 789, 788, 787, 786, 786, 754, 747, 747, 747, 746, 745, 743, 743, 742, 741, 741, 739, 739, 735, 730, 730, 729, 729, 729, 728, 728, 728, 728, 728, 728, 727, 727, 727, 726, 726, 726, 725, 725, 725, 722, 721, 719, 717, 712, 706, 706, 705, 705, 700, 700, 697, 670, 660, 656, 655, 655, 655, 654, 653, 652, 651, 650, 650, 648, 647, 646, 646, 646, 645, 645, 644, 644, 644, 644, 644, 643, 642, 642, 642, 641, 641, 641, 641, 640, 640, 640, 640, 640, 640, 640, 639, 639, 638, 638, 638, 637, 636, 632, 631, 631, 616, 613, 610, 607, 581, 578, 577, 576, 575, 573, 572, 569, 569, 568, 564, 563, 562, 562, 561, 561, 561, 560, 560, 560, 559, 558, 558, 558, 557, 557, 556, 556, 555, 555, 555, 555, 555, 554, 554, 554, 554, 554, 554, 554, 553, 549, 545, 545, 540, 536, 492, 491, 490, 486, 484, 480, 478, 477, 475, 474, 474, 472, 472, 471, 471, 470, 470, 470, 469, 468, 468, 467, 465, 460, 458, 449, 448, 445, 407, 402, 399, 398, 395, 391, 391, 389, 389, 389, 388, 388, 387, 387, 387, 386, 384, 384, 384, 384, 384, 383, 382, 379, 378, 368, 362, 359, 359, 355, 350, 153, 143, 140, 120, 120]
 8 | cum_summed = [(31, 1804), (29, 1803), (28, 903), (26, 1825), (26, 1812), (25, 904), (24, 1718), (24, 902), (24, 901), (24, 817), (23, 1802), (23, 1717), (23, 1635), (23, 818), (22, 2091), (22, 2065), (22, 2057), (22, 1766), (22, 1764), (22, 990), (22, 989), (22, 900), (22, 645), (22, 644), (21, 1847), (21, 1716), (21, 1667), (21, 1649), (21, 985), (20, 2056), (20, 1767), (20, 988), (20, 986), (20, 816), (20, 730), (20, 643), (20, 642), (20, 559), (20, 558), (19, 1801), (19, 1634), (19, 984), (19, 819), (19, 729), (18, 1686), (18, 815), (18, 735), (18, 646), (18, 641), (18, 557), (18, 556), (17, 1797), (17, 905), (17, 899), (17, 739), (17, 728), (17, 555), (16, 2055), (16, 1886), (16, 1800), (16, 1715), (16, 1688), (16, 822), (16, 814), (16, 741), (16, 560), (16, 391), (15, 2225), (15, 2224), (15, 2142), (15, 2140), (15, 2097), (15, 1887), (15, 1854), (15, 647), (15, 640), (15, 389), (14, 2184), (14, 2156), (14, 2054), (14, 1891), (14, 1885), (14, 1795), (14, 1794), (14, 1770), (14, 1714), (14, 1713), (14, 823), (14, 650), (14, 562), (14, 561), (13, 2139), (13, 2138), (13, 2136), (13, 1994), (13, 1976), (13, 1940), (13, 1904), (13, 1899), (13, 1892), (13, 1890), (13, 1884), (13, 1882), (13, 1796), (13, 1633), (13, 974), (13, 973), (13, 972), (13, 964), (13, 805), (13, 648), (13, 563), (13, 554), (13, 388), (12, 2258), (12, 2226), (12, 2220), (12, 2213), (12, 2211), (12, 2137), (12, 2134), (12, 2133), (12, 2100), (12, 2048), (12, 2045), (12, 2044), (12, 2007), (12, 1970), (12, 1923), (12, 1888), (12, 1793), (12, 1774), (12, 1712), (12, 982), (12, 906), (12, 824), (12, 813), (12, 808), (12, 802), (12, 797), (12, 796), (12, 792), (12, 727), (12, 474), (12, 398), (12, 395), (12, 387), (11, 2323), (11, 2305), (11, 2222), (11, 2221), (11, 2216), (11, 2215), (11, 2214), (11, 2212), (11, 2188), (11, 2132), (11, 2047), (11, 1974), (11, 1942), (11, 1889), (11, 1880), (11, 1690), (11, 1632), (11, 991), (11, 960), (11, 908), (11, 907), (11, 742), (11, 568), (11, 564), (11, 477), (11, 475), (11, 472), (10, 2223), (10, 2210), (10, 2209), (10, 2208), (10, 2131), (10, 2051), (10, 2050), (10, 2018), (10, 2015), (10, 1971), (10, 1968), (10, 1967), (10, 1965), (10, 1964), (10, 1868), (10, 1792), (10, 1710), (10, 1701), (10, 1698), (10, 1696), (10, 1612), (10, 1610), (10, 1609), (10, 1608), (10, 1002), (10, 1001), (10, 999), (10, 998), (10, 983), (10, 898), (10, 827), (10, 786), (10, 747), (10, 743), (10, 726), (10, 651), (10, 639), (10, 569), (10, 471), (10, 399), (10, 386), (10, 384), (9, 2206), (9, 2192), (9, 2130), (9, 2128), (9, 2126), (9, 2125), (9, 2123), (9, 2043), (9, 2042), (9, 1963), (9, 1947), (9, 1945), (9, 1944), (9, 1943), (9, 1878), (9, 1875), (9, 1870), (9, 1856), (9, 1781), (9, 1709), (9, 1707), (9, 1629), (9, 1620), (9, 1614), (9, 1605), (9, 1604), (9, 997), (9, 996), (9, 995), (9, 994), (9, 992), (9, 830), (9, 828), (9, 812), (9, 789), (9, 788), (9, 787), (9, 754), (9, 746), (9, 745), (9, 719), (9, 717), (9, 712), (9, 706), (9, 700), (9, 655), (9, 652), (9, 638), (9, 478), (9, 470), (9, 402), (8, 2207), (8, 2121), (8, 2119), (8, 2118), (8, 2117), (8, 2114), (8, 2112), (8, 2111), (8, 2104), (8, 2022), (8, 1879), (8, 1877), (8, 1876), (8, 1866), (8, 1865), (8, 1864), (8, 1862), (8, 1861), (8, 1780), (8, 1631), (8, 1628), (8, 1626), (8, 1622), (8, 1003), (8, 958), (8, 897), (8, 894), (8, 892), (8, 881), (8, 880), (8, 872), (8, 867), (8, 831), (8, 826), (8, 825), (8, 791), (8, 725), (8, 721), (8, 705), (8, 697), (8, 670), (8, 660), (8, 656), (8, 572), (8, 480), (7, 2205), (7, 2201), (7, 2200), (7, 2197), (7, 2194), (7, 2108), (7, 2053), (7, 2052), (7, 2033), (7, 2032), (7, 1962), (7, 1961), (7, 1860), (7, 1859), (7, 1857), (7, 1777), (7, 1711), (7, 1693), (7, 1691), (7, 1603), (7, 1010), (7, 956), (7, 955), (7, 916), (7, 915), (7, 912), (7, 911), (7, 879), (7, 722), (7, 654), (7, 653), (7, 637), (7, 636), (7, 632), (7, 631), (7, 577), (7, 576), (7, 575), (7, 573), (7, 553), (7, 549), (7, 545), (7, 484), (7, 469), (7, 468), (7, 407), (7, 382), (7, 379), (7, 378), (7, 368), (7, 362), (7, 359), (7, 355), (6, 2511), (6, 2493), (6, 2487), (6, 2486), (6, 2485), (6, 2477), (6, 2461), (6, 2452), (6, 2449), (6, 2419), (6, 2399), (6, 2397), (6, 2394), (6, 2374), (6, 2357), (6, 2338), (6, 2337), (6, 2193), (6, 2106), (6, 2030), (6, 2027), (6, 2026), (6, 1960), (6, 1955), (6, 1776), (6, 1775), (6, 1558), (6, 1534), (6, 1484), (6, 1464), (6, 1380), (6, 1359), (6, 1333), (6, 1309), (6, 1294), (6, 1236), (6, 1216), (6, 1201), (6, 1196), (6, 1152), (6, 1124), (6, 1103), (6, 1040), (6, 1027), (6, 1017), (6, 919), (6, 917), (6, 878), (6, 874), (6, 616), (6, 613), (6, 610), (6, 607), (6, 581), (6, 578), (6, 540), (6, 536), (6, 492), (6, 491), (6, 490), (6, 486), (6, 467), (6, 465), (6, 460), (6, 458), (6, 449), (6, 448), (6, 445), (6, 383), (6, 350), (5, 153), (4, 143), (3, 140), (2, 120)]
 9 | 
10 | fig = plt.figure()
11 | ax = fig.add_subplot(111)
12 | 
13 | # the histogram of the data
14 | n, bins, patches = ax.hist(cum_summed, len(cum_summed), normed=1, facecolor='blue')
15 | 
16 | # hist uses np.histogram under the hood to create 'n' and 'bins'.
17 | # np.histogram returns the bin edges, so there will be 50 probability
18 | # density values in n, 51 bin edges in bins and 50 patches.  To get
19 | # everything lined up, we'll compute the bin centers
20 | #bincenters = 0.5*(bins[1:]+bins[:-1])
21 | # add a 'best fit' line for the normal PDF
22 | #y = mlab.normpdf( bincenters, mu, sigma)
23 | #l = ax.plot(bincenters, y, 'r--', linewidth=1)
24 | 
25 | ax.set_xlabel('y value')
26 | ax.set_ylabel('Occurence')
27 | #ax.set_title(r'$\mathrm{Histogram\ of\ IQ:}\ \mu=100,\ \sigma=15$')
28 | ax.grid(True)
29 | 
30 | fig.savefig('image/y_value_hist.png')
31 | 


--------------------------------------------------------------------------------
/legacy_functions/make_plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | matplotlib.use('Agg')
 4 | from matplotlib import pyplot
 5 | 
 6 | cum_summed = [120, 2, 140, 3, 143, 4, 153, 5, 350, 6, 355, 7, 359, 7, 362, 7, 368, 7, 378, 7, 379, 7, 382, 7, 383, 6, 384, 10, 386, 10, 387, 12, 388, 13, 389, 15, 391, 16, 395, 12, 398, 12, 399, 10, 402, 9, 407, 7, 445, 6, 448, 6, 449, 6, 458, 6, 460, 6, 465, 6, 467, 6, 468, 7, 469, 7, 470, 9, 471, 10, 472, 11, 474, 12, 475, 11, 477, 11, 478, 9, 480, 8, 484, 7, 486, 6, 490, 6, 491, 6, 492, 6, 536, 6, 540, 6, 545, 7, 549, 7, 553, 7, 554, 13, 555, 17, 556, 18, 557, 18, 558, 20, 559, 20, 560, 16, 561, 14, 562, 14, 563, 13, 564, 11, 568, 11, 569, 10, 572, 8, 573, 7, 575, 7, 576, 7, 577, 7, 578, 6, 581, 6, 607, 6, 610, 6, 613, 6, 616, 6, 631, 7, 632, 7, 636, 7, 637, 7, 638, 9, 639, 10, 640, 15, 641, 18, 642, 20, 643, 20, 644, 22, 645, 22, 646, 18, 647, 15, 648, 13, 650, 14, 651, 10, 652, 9, 653, 7, 654, 7, 655, 9, 656, 8, 660, 8, 670, 8, 697, 8, 700, 9, 705, 8, 706, 9, 712, 9, 717, 9, 719, 9, 721, 8, 722, 7, 725, 8, 726, 10, 727, 12, 728, 17, 729, 19, 730, 20, 735, 18, 739, 17, 741, 16, 742, 11, 743, 10, 745, 9, 746, 9, 747, 10, 754, 9, 786, 10, 787, 9, 788, 9, 789, 9, 791, 8, 792, 12, 796, 12, 797, 12, 802, 12, 805, 13, 808, 12, 812, 9, 813, 12, 814, 16, 815, 18, 816, 20, 817, 24, 818, 23, 819, 19, 822, 16, 823, 14, 824, 12, 825, 8, 826, 8, 827, 10, 828, 9, 830, 9, 831, 8, 867, 8, 872, 8, 874, 6, 878, 6, 879, 7, 880, 8, 881, 8, 892, 8, 894, 8, 897, 8, 898, 10, 899, 17, 900, 22, 901, 24, 902, 24, 903, 28, 904, 25, 905, 17, 906, 12, 907, 11, 908, 11, 911, 7, 912, 7, 915, 7, 916, 7, 917, 6, 919, 6, 955, 7, 956, 7, 958, 8, 960, 11, 964, 13, 972, 13, 973, 13, 974, 13, 982, 12, 983, 10, 984, 19, 985, 21, 986, 20, 988, 20, 989, 22, 990, 22, 991, 11, 992, 9, 994, 9, 995, 9, 996, 9, 997, 9, 998, 10, 999, 10, 1001, 10, 1002, 10, 1003, 8, 1010, 7, 1017, 6, 1027, 6, 1040, 6, 1103, 6, 1124, 6, 1152, 6, 1196, 6, 1201, 6, 1216, 6, 1236, 6, 1294, 6, 1309, 6, 1333, 6, 1359, 6, 1380, 6, 1464, 6, 1484, 6, 1534, 6, 1558, 6, 1603, 7, 1604, 9, 1605, 9, 1608, 10, 1609, 10, 1610, 10, 1612, 10, 1614, 9, 1620, 9, 1622, 8, 1626, 8, 1628, 8, 1629, 9, 1631, 8, 1632, 11, 1633, 13, 1634, 19, 1635, 23, 1649, 21, 1667, 21, 1686, 18, 1688, 16, 1690, 11, 1691, 7, 1693, 7, 1696, 10, 1698, 10, 1701, 10, 1707, 9, 1709, 9, 1710, 10, 1711, 7, 1712, 12, 1713, 14, 1714, 14, 1715, 16, 1716, 21, 1717, 23, 1718, 24, 1764, 22, 1766, 22, 1767, 20, 1770, 14, 1774, 12, 1775, 6, 1776, 6, 1777, 7, 1780, 8, 1781, 9, 1792, 10, 1793, 12, 1794, 14, 1795, 14, 1796, 13, 1797, 17, 1800, 16, 1801, 19, 1802, 23, 1803, 29, 1804, 31, 1812, 26, 1825, 26, 1847, 21, 1854, 15, 1856, 9, 1857, 7, 1859, 7, 1860, 7, 1861, 8, 1862, 8, 1864, 8, 1865, 8, 1866, 8, 1868, 10, 1870, 9, 1875, 9, 1876, 8, 1877, 8, 1878, 9, 1879, 8, 1880, 11, 1882, 13, 1884, 13, 1885, 14, 1886, 16, 1887, 15, 1888, 12, 1889, 11, 1890, 13, 1891, 14, 1892, 13, 1899, 13, 1904, 13, 1923, 12, 1940, 13, 1942, 11, 1943, 9, 1944, 9, 1945, 9, 1947, 9, 1955, 6, 1960, 6, 1961, 7, 1962, 7, 1963, 9, 1964, 10, 1965, 10, 1967, 10, 1968, 10, 1970, 12, 1971, 10, 1974, 11, 1976, 13, 1994, 13, 2007, 12, 2015, 10, 2018, 10, 2022, 8, 2026, 6, 2027, 6, 2030, 6, 2032, 7, 2033, 7, 2042, 9, 2043, 9, 2044, 12, 2045, 12, 2047, 11, 2048, 12, 2050, 10, 2051, 10, 2052, 7, 2053, 7, 2054, 14, 2055, 16, 2056, 20, 2057, 22, 2065, 22, 2091, 22, 2097, 15, 2100, 12, 2104, 8, 2106, 6, 2108, 7, 2111, 8, 2112, 8, 2114, 8, 2117, 8, 2118, 8, 2119, 8, 2121, 8, 2123, 9, 2125, 9, 2126, 9, 2128, 9, 2130, 9, 2131, 10, 2132, 11, 2133, 12, 2134, 12, 2136, 13, 2137, 12, 2138, 13, 2139, 13, 2140, 15, 2142, 15, 2156, 14, 2184, 14, 2188, 11, 2192, 9, 2193, 6, 2194, 7, 2197, 7, 2200, 7, 2201, 7, 2205, 7, 2206, 9, 2207, 8, 2208, 10, 2209, 10, 2210, 10, 2211, 12, 2212, 11, 2213, 12, 2214, 11, 2215, 11, 2216, 11, 2220, 12, 2221, 11, 2222, 11, 2223, 10, 2224, 15, 2225, 15, 2226, 12, 2258, 12, 2305, 11, 2323, 11, 2337, 6, 2338, 6, 2357, 6, 2374, 6, 2394, 6, 2397, 6, 2399, 6, 2419, 6, 2449, 6, 2452, 6, 2461, 6, 2477, 6, 2485, 6, 2486, 6, 2487, 6, 2493, 6, 2511, 6]
 7 | 
 8 | fig = pyplot.figure()
 9 | ax = fig.add_subplot(111)
10 | 
11 | ax.plot(cum_summed)
12 | 
13 | ax.set_xlabel('y value')
14 | ax.set_ylabel('cum_sum_count')
15 | 
16 | ax.grid(True)
17 | 
18 | fig.savefig('image/y_value_hist.png')
19 | 


--------------------------------------------------------------------------------
/legacy_functions/numpy1.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import opencv, numpy
 4 | from opencv import highgui as hg
 5 | 
 6 | def run():
 7 |     filename = 'C:\\Documents and Settings\\rmccormack\\Desktop\\work_projects\\openCV\\test\\test1.jpg'
 8 |     im = hg.cvLoadImage(filename)
 9 |     if not im:
10 |         print "Error opening %s" % filename
11 |         sys.exit(-1)
12 |     im2 = opencv.cvCreateImage(opencv.cvGetSize(im),8, 4)
13 |     opencv.cvCvtColor(im,im2,opencv.CV_BGR2BGRA)
14 |     buffer = numpy.fromstring(im2.imageData,dtype=numpy.uint32).astype(numpy.float32)
15 |     buffer.shape=(im2.width, im2.height)
16 |     return buffer
17 | 


--------------------------------------------------------------------------------
/legacy_functions/ocr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import operator
  6 | import time
  7 | import datetime
  8 | 
  9 | from PIL import Image
 10 | 
 11 | from opencv.cv import *
 12 | from opencv.highgui import *
 13 | 
 14 | from pyblobs.BlobResult import CBlobResult
 15 | from pyblobs.Blob import CBlob # Note: This must be imported in order to destroy blobs and use other methods
 16 | 
 17 | #num_blobs_found = True
 18 | 
 19 | def output_blob(blob,output_dir="/home/ryan/openCV/b/"):
 20 |     blob_height = int(blob.maxy-blob.miny)
 21 |     blob_width = int(blob.maxx-blob.minx)
 22 |     blob_size = cvSize(blob_width,blob_height)
 23 |     blob_image = cvCreateImage(blob_size, 8, 1)
 24 |     cvZero(blob_image)
 25 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
 26 |     info_string = "w_%04d__h_%04d__miny_%04d.png"
 27 |     info = info_string % (blob_width, blob_height, int(blob.miny))
 28 |     cvSaveImage(output_dir+info, blob_image)
 29 |     
 30 | def vp(blob, min_char_width, show_blobs=True):
 31 |     def get_col_miny_maxy(column):
 32 |         miny = 0
 33 |         maxy = 0
 34 |         for i,pixel_value in enumerate(column):
 35 |             if pixel_value == 255:
 36 |                 maxy = i
 37 |                 if not miny:
 38 |                     miny = i
 39 |         return miny,maxy
 40 | 
 41 |     colsum_colnum_col_tuples = []
 42 |     vp_point_pairs = []
 43 | 
 44 |     blob_height = int(blob.maxy-blob.miny)
 45 |     blob_width = int(blob.maxx-blob.minx)
 46 |     blob_size = cvSize(blob_width,blob_height)
 47 |     blob_image = cvCreateImage(blob_size, 8, 1)
 48 |     cvZero(blob_image)
 49 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
 50 | 
 51 |     if show_blobs:
 52 |         blob_image_2 = cvCreateImage(blob_size, 8, 3)    
 53 |         cvRectangle(blob_image_2, cvPoint(0,0), cvPoint(blob_width, blob_height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
 54 |         blob.FillBlob(blob_image_2, CV_RGB(0,255,0), -1*int(blob.minx), -1*int(blob.miny))        
 55 | 
 56 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
 57 |     num_cols = int(blob_image.cols)
 58 |     num_cols_minus_min_char_width = num_cols - min_char_width
 59 | 
 60 |     for (i, col) in enumerate(blob_image.colrange()):
 61 | ## TO SKIP PIXELS AT LEFT AND RIGHT MARGIN
 62 |         if (i+1 < min_char_width) or (i > num_cols_minus_min_char_width):
 63 |             continue
 64 |         col_sum = cvSum(col)
 65 |         colsum_colnum_col_tuples.append((int(col_sum[0]),i,col))
 66 | 
 67 |     colsum_colnum_col_tuples.sort()
 68 | #    print "colsum_colnum_col_tuples", colsum_colnum_col_tuples
 69 | 
 70 |     new_vp_colsum_colnum_col = colsum_colnum_col_tuples[0]
 71 |     new_vp_col = new_vp_colsum_colnum_col[1]
 72 |     delta_miny, delta_maxy = get_col_miny_maxy(new_vp_colsum_colnum_col[2])
 73 |     point1 = cvPoint(int(blob.minx)+new_vp_col, int(blob.miny)+delta_miny)
 74 |     point2 = cvPoint(int(blob.minx)+new_vp_col, int(blob.miny)+delta_maxy)
 75 |     vp_point_pairs = (point1,point2)
 76 | 
 77 |     if show_blobs:
 78 |         point1 = cvPoint(0+new_vp_col, 0+delta_miny)
 79 |         point2 = cvPoint(0+new_vp_col, 0+delta_maxy)
 80 |         cvDrawLine(blob_image_2, point1, point2, CV_RGB(255,0,255), 2, 8, 0)
 81 |         info_string = "%s__%04d__%04d.png"
 82 |         info = info_string % (str(datetime.datetime.now()), blob_width, blob_height)
 83 |         cvSaveImage("/home/ryan/openCV/vp_blobs_time/"+info, blob_image_2)
 84 |         info_string = "%04d__%04d__%s.png"
 85 |         info = info_string % (blob_width, blob_height, str(datetime.datetime.now())) 
 86 |         cvSaveImage("/home/ryan/openCV/vp_blobs_dimensions/"+info, blob_image_2)        
 87 | #    cvReleaseImage(blob_image)    
 88 |     return vp_point_pairs
 89 | 
 90 | def hp(blob, acceptable_row_ranges, min_char_height=27, show_blobs=True):
 91 |     print "NEW HP BLOB"
 92 |     def get_row_minx_maxx(row):
 93 |         minx = 0
 94 |         maxx = 0
 95 |         for i,pixel_value in enumerate(row):
 96 |             if pixel_value == 255:
 97 |                 maxx = i
 98 |                 if not minx:
 99 |                     minx = i
100 |         return minx,maxx
101 | 
102 |     hp_point_pairs = []
103 | 
104 |     blob_height = int(blob.maxy-blob.miny)
105 |     blob_width = int(blob.maxx-blob.minx)
106 |     blob_size = cvSize(blob_width,blob_height)
107 |     blob_image = cvCreateImage(blob_size, 8, 1)
108 |     cvZero(blob_image)
109 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
110 | 
111 |     if show_blobs:
112 |         hp_point_pairs_2 = []        
113 |         blob_image_2 = cvCreateImage(blob_size, 8, 3)    
114 |         cvRectangle(blob_image_2, cvPoint(0,0), cvPoint(blob_width, blob_height), CV_RGB(255,255,255), CV_FILLED, 8, 0)
115 |         blob.FillBlob(blob_image_2, CV_RGB(0,255,0), -1*int(blob.minx), -1*int(blob.miny))
116 |     
117 |     for acceptable_row_range in acceptable_row_ranges:
118 |         rowsum_rownum_row_tuples = []
119 |         hp_rows = []
120 | 
121 |         for (i, row) in enumerate(blob_image.rowrange()):
122 |             if acceptable_row_range[0] <= i <= acceptable_row_range[1]:
123 |                 row_sum = cvSum(row)
124 |                 rowsum_rownum_row_tuples.append((int(row_sum[0]),i,row))
125 |         rowsum_rownum_row_tuples.sort()
126 |         lowest_rowsum_tuple = rowsum_rownum_row_tuples[0]
127 |         new_hp_row = lowest_rowsum_tuple[1]
128 |         delta_minx, delta_maxx = get_row_minx_maxx(lowest_rowsum_tuple[2])
129 |         point1 = cvPoint(int(blob.minx)+delta_minx, int(blob.miny)+new_hp_row)
130 |         point2 = cvPoint(int(blob.minx)+delta_maxx, int(blob.miny)+new_hp_row)
131 |         hp_point_pairs.append([point1,point2])
132 |     if show_blobs:
133 |         for acceptable_row_range in acceptable_row_ranges:
134 |             rowsum_rownum_row_tuples = []
135 |             hp_rows = []
136 | 
137 |             for (i, row) in enumerate(blob_image.rowrange()):
138 |                 if acceptable_row_range[0] <= i <= acceptable_row_range[1]:
139 |                     row_sum = cvSum(row)
140 |                     rowsum_rownum_row_tuples.append((int(row_sum[0]),i,row))
141 |             rowsum_rownum_row_tuples.sort()
142 |             lowest_rowsum_tuple = rowsum_rownum_row_tuples[0]
143 |             new_hp_row = lowest_rowsum_tuple[1]
144 |             delta_minx, delta_maxx = get_row_minx_maxx(lowest_rowsum_tuple[2])
145 |             point1 = cvPoint(0+delta_minx, 0+new_hp_row)
146 |             point2 = cvPoint(0+delta_maxx, 0+new_hp_row)
147 |             hp_point_pairs_2.append([point1,point2])        
148 |         for point_pair in hp_point_pairs_2:
149 |             cvDrawLine(blob_image_2, point_pair[0], point_pair[1], CV_RGB(255,0,0), 2, 8, 0)
150 |         info_string = "w_%04d__h_%04d__rowranges_%02d.png"
151 |         info = info_string % (blob_width, blob_height, len(acceptable_row_ranges))        
152 |         cvSaveImage("/home/ryan/openCV/hp_blobs/"+info, blob_image_2)            
153 |     return hp_point_pairs
154 | 
155 | def process_image(binary_image, min_char_width=20, min_char_height=27, max_width_b4_cutoff=80, max_height_b4_cutoff=120, max_blob_area=2800, min_sub_super_width = 21, min_sub_super_height = 14, max_sub_super_width_b4_cutoff = 66, max_sub_super_height_b4_cutoff = 38, miny_linetop_var=25, range_above_below_line=15, draw_lines=False, lines=[], left_margin=250, top_margin=80, num_lines_on_pecha=8):
156 | 
157 |     global num_blobs_found
158 | ## FIX LINE VAR VARAIBLE NAMES
159 |     def find_corresponding_anchor(sup_or_sub,anchors,line_num):
160 |         for anchor in anchors:
161 |             if (sup_or_sub.minx >= anchor.minx) and (sup_or_sub.maxx <= anchor.maxx):
162 |                 anchor.sup = sup_or_sub
163 |     bi_image  = binary_image
164 |     image_size = cvGetSize(bi_image)
165 |     image_width = bi_image.width
166 |     image_height = bi_image.height
167 |     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
168 |     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
169 |     mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
170 |     cvSet(mask,1)
171 |     initial_blobs = CBlobResult(gray_image, mask, 100, False)
172 |     initial_blob_count = initial_blobs.GetNumBlobs()
173 |     print "initial_blob_count: ", initial_blob_count
174 | 
175 | ##     if initial_blob_count == num_blobs_found:
176 | ##         can_rerun = False
177 | ##     else:
178 | ##         can_rerun = True
179 | ##     num_blobs_found = initial_blob_count        
180 |         
181 |     need_to_rerun = False
182 |     vp_blobs = []
183 |     hp_blobs = [] # Blobs spanning more than one topline
184 | 
185 |     if lines:
186 |         for i in range(1,initial_blob_count): # if checking area, don't need to ignore 0
187 |             blob = initial_blobs.GetBlob(i)
188 |             width = int(blob.maxx-blob.minx)
189 |             height = int(blob.maxy-blob.miny)
190 |             blob_miny = int(blob.miny)
191 |             blob_maxy = int(blob.maxy)            
192 |             area = int(blob.area)
193 |             acceptable_row_ranges = []
194 |             if area < max_blob_area:
195 |                 if (height > max_height_b4_cutoff) and (width >= min_char_width):                
196 |                     for line in lines:
197 |                         if blob_miny <= line <= blob_maxy:
198 |                             line_dist_from_miny = line-blob_miny
199 |                             acceptable_row_ranges.append((line_dist_from_miny-range_above_below_line, line_dist_from_miny+range_above_below_line))
200 |             if acceptable_row_ranges:
201 |                 hp_blobs.append((blob,acceptable_row_ranges))
202 |     if hp_blobs:
203 |         need_to_rerun = True
204 |         for blob,acceptable_row_ranges in hp_blobs:
205 |             hp_point_pairs = hp(blob,acceptable_row_ranges)
206 |             for point_pair in hp_point_pairs:
207 |                 cvDrawLine(bi_image, point_pair[0], point_pair[1], CV_RGB(255,255,255), 2, 8, 0)
208 | 
209 |     for i in range(1,initial_blob_count): # if checking area, don't need to ignore 0
210 |         blob = initial_blobs.GetBlob(i)
211 |         width = int(blob.maxx-blob.minx)
212 |         height = int(blob.maxy-blob.miny)
213 |         area = int(blob.area)
214 |         if area < max_blob_area:
215 |             if (width > max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
216 | ##                suspected_num_chars = (width/max_width_b4_cutoff)+1
217 |                 vp_blobs.append(blob)
218 | 
219 |     if vp_blobs:
220 |         need_to_rerun = True
221 |         for blob in vp_blobs:
222 |             vp_point_0, vp_point_1 = vp(blob, min_char_width)
223 |             cvDrawLine(bi_image, vp_point_0, vp_point_1, CV_RGB(255,255,255), 2, 8, 0)
224 | ##                cvDrawLine(binary_image_2, point_pair[0], point_pair[1], CV_RGB(255,0,255), 1, 8, 0)
225 | 
226 |     if need_to_rerun:
227 |         process_image(bi_image,lines=lines)
228 |     else:
229 |         if not lines:
230 | ## OUTPUT CHARACTER IMAGES
231 |             from cum_sum import get_lines
232 |             y1_array = []
233 |             right_margin = image_width - left_margin
234 |             bottom_margin = image_height - top_margin
235 |             for i in range(1,initial_blob_count):
236 |                 blob = initial_blobs.GetBlob(i)
237 |                 minx = int(blob.minx)                
238 |                 maxx = int(blob.maxx)
239 |                 miny = int(blob.miny)
240 |                 maxy = int(blob.maxy)                
241 |                 width = maxx - minx
242 |                 height = maxy - miny
243 |                 if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
244 |                     if (minx > left_margin) and (maxx < right_margin) and (miny > top_margin) and (maxy < bottom_margin):                
245 |                         y1_array.append(miny)
246 |             lines = get_lines(y1_array, bi_image, num_of_lines=num_lines_on_pecha)
247 |             process_image(bi_image, lines=lines)
248 | ## IF WE GET HERE, WE HAVE FINISHED CHOPPING BLOBS
249 |         else:
250 |             print "FINISHED CHOPPING BLOBS"
251 |             cvSaveImage("/home/ryan/openCV/bi_image.png", bi_image)
252 |             output_dir = "/home/ryan/openCV/final_output/"
253 |             char_string = "text_%02d__folio_%02d__line_%02d__char_%02d.png"
254 |             pecha_line_empty_list_tuples = [(i+1,[]) for i in range(num_lines_on_pecha)]
255 |             line_anchors = dict(pecha_line_empty_list_tuples)
256 |             line_supers = dict(pecha_line_empty_list_tuples)
257 |             line_subs = dict(pecha_line_empty_list_tuples)
258 |             right_margin = image_width - left_margin
259 |             bottom_margin = image_height - top_margin
260 |             for i in range(1,initial_blob_count): # if checking area, don't need to ignore 0
261 |                 blob = initial_blobs.GetBlob(i)
262 |                 minx = int(blob.minx)                
263 |                 maxx = int(blob.maxx)
264 |                 miny = int(blob.miny)
265 |                 maxy = int(blob.maxy)                
266 |                 width = maxx - minx
267 |                 height = maxy - miny
268 |                 if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
269 |                     if (minx > left_margin) and (maxx < right_margin) and (miny > top_margin) and (maxy < bottom_margin):                
270 | ## ANCHOR                        
271 |                         for i, line_value in enumerate(lines):
272 |                             if (miny <= line_value <= maxy): # or (miny-line_value <= miny_linetop_var):
273 |                                 line_anchors[i+1].append(blob)
274 |                                 break
275 | ## POTENTIAL SUB, SUPER, OR VOWEL
276 |                     elif (min_sub_super_width <= width <= max_sub_super_width_b4_cutoff) and (min_sub_super_height <= height <= max_sub_super_height_b4_cutoff):
277 |                         pass
278 |                     else:
279 |                         output_blob(blob)
280 | ## DRAW POLYGONS
281 |             from cum_sum import draw_lines_2
282 |             line_num__empty_list_tuples = [(k,[]) for k in line_anchors.keys()]
283 |             d = dict(line_num__empty_list_tuples)
284 |             right_margin = image_width - left_margin
285 |             bottom_margin = image_height - top_margin
286 |             for line_num, anchors in line_anchors.items():
287 |                 for i,blob in enumerate(sorted(anchors,key=operator.attrgetter('minx'))):
288 |                     minx = int(blob.minx)
289 |                     maxx = int(blob.maxx)
290 |                     miny = int(blob.miny)
291 |                     maxy = int(blob.maxy)                
292 |                     width = maxx - minx
293 |                     height = maxy - miny
294 |                     if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
295 |                         if (minx > left_margin) and (maxx < right_margin) and (miny > top_margin) and (maxy < bottom_margin):                
296 |                             d[line_num].append((blob.minx, blob.miny))
297 |                             d[line_num].append((blob.maxx, blob.miny))
298 | 
299 |             print "draw lines"
300 |             draw_lines_2(d)
301 |             print 'draw lines done'
302 | 
303 | ## FINAL OUTPUT
304 |             for line_num, anchors in line_anchors.items():
305 |                 for i,blob in enumerate(sorted(anchors,key=operator.attrgetter('minx'))):
306 |                     if line_num == 7:
307 |                         cvRectangle(bi_image,
308 |                                 cvPoint(int(blob.minx),int(blob.miny)),
309 |                                 cvPoint(int(blob.maxx),int(blob.maxy)),
310 |                                 CV_RGB(0,255,0), 1, 8, 0 
311 |                                 )            
312 |                     char_image_name = char_string % (0,0,line_num,i)
313 |                     blob_height = int(blob.maxy-blob.miny)
314 |                     blob_width = int(blob.maxx-blob.minx)
315 |                     blob_size = cvSize(blob_width,blob_height)
316 |                     blob_image = cvCreateImage(blob_size, 8, 1)
317 |                     cvZero(blob_image)
318 |                     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
319 |                     cvSaveImage(os.path.join(output_dir,char_image_name), blob_image)
320 |                 print "line %d completed" % line_num
321 |                 cvSaveImage(os.path.join(output_dir,"SEVEN.png"), bi_image)
322 |     
323 | if __name__ == "__main__":
324 | #    f = open("/home/ryan/openCV/vars","a")
325 |     input_image_path = "/home/ryan/ocr/test/input_image/2009_10_19/thresh48_BUM_BA_0010_01.png"
326 |     binary_image = cvLoadImage(input_image_path)
327 | #    print>>f, "\nDIR\n", dir(), "\nGLOBALS\n", globals(), "\nLOCALS\n", locals(), "\nTIME\n", datetime.datetime.now()    
328 |     process_image(binary_image)
329 | #    print>>f, "\nDIR\n", dir(), "\nGLOBALS\n", globals(), "\nLOCALS\n", locals(), "\nTIME\n", datetime.datetime.now()
330 | #    f.close()
331 | #    print>>f, "\nDIR\n", dir(), "\nGLOBALS\n", globals(), "\nLOCALS\n", locals(), "\nTIME\n", datetime.datetime.now()    
332 | ##     command_name = sys.argv[1]
333 | ##     if command_name=="output_images":
334 | ##         input_image_filename = sys.argv[2]
335 | ##         output_image_directory  = sys.argv[3]
336 | ##         output_images(input_image_filename, output_image_directory)
337 | ##     elif command_name=="draw_bounding_boxes":
338 | ##         input_image_filename = sys.argv[2]
339 | ##         output_image_filename  = sys.argv[3]
340 | ##         draw_bounding_boxes(input_image_filename, output_image_filename)        
341 | ##     elif command_name=="create_y1_histogram":
342 | ##         input_image_filename = sys.argv[2]
343 | ##         create_y1_histogram(input_image_filename)
344 | ##     elif command_name=="draw_top_of_lines":
345 | ##         input_image_filename = sys.argv[2]
346 | ##         output_image_filename  = sys.argv[3]
347 | ##         draw_top_of_lines(input_image_filename, output_image_filename)    
348 | 
349 | ##     else:
350 | ##         print """
351 | ##         Options:
352 | ##         1. output_images [input_image_filename] [output_image_directory]
353 | ##         2. draw_bounding_boxes [input_image_filename] [output_image_filename]
354 | ##         3. create_y1_histogram [input_image_filename]
355 | ##         4. draw_top_of_lines [input_image_filename] [output_image_filename]
356 | 
357 | ##         """
358 |         
359 | ### Get the size of the contour
360 | ##size = abs(cvContourArea(contour))
361 | 
362 | ### Is convex
363 | ##is_convex = cvCheckContourConvexity(contour)
364 | ##             cvRectangle(output_image,
365 | ##                     cvPoint(int(blob.minx),int(blob.miny)),
366 | ##                     cvPoint(int(blob.maxx),int(blob.maxy)),
367 | ##                     CV_RGB(0,255,0), 1, 8, 0 
368 | ##                     )
369 | # #sorted(blobs_not_too_big_small,key=operator.attrgetter('area'))]
370 | 
371 | ## USES PILL AND BOXES RATHER THAN BLOBS 
372 | ## def output_images(input_image_filename, output_image_directory):
373 | ##     global rect_dims
374 | ##     bi_image  = cvLoadImage(input_image_filename)
375 | ##     pill_image = Image.open(input_image_filename)
376 | ##     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
377 | ##     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
378 |     
379 | ##     blobs = process_image(bi_image,gray_image)
380 | 
381 | ##     for (img_num,blob) in enumerate(blobs):
382 | ##         if is_box_within_box(blob):
383 | ##             pass
384 | ##         else:
385 | ##             blob_width = blob.maxx-blob.minx
386 | ##             blob_height = blob.maxy-blob.miny
387 | ##             blob_size = (blob_width,blob_height)
388 | ##             blob_box = (blob.minx,blob.miny,blob.maxx,blob.maxy)
389 | ##             blob_region = pill_image.crop(blob_box)
390 | ##             character_image = Image.new("RGB",blob_size,color=None)
391 | ## ## im.paste(image, box) -- box argument is either a 2-tuple giving the upper left corner, a 4-tuple defining the left, upper, right, and lower pixel coordinate, or None (same as (0, 0)). If a 4-tuple is given, the size of the pasted image must match the size of the region.
392 | ##             character_image.paste(blob_region,box=None)
393 | ##             character_image.save(output_image_directory+'_'+str(img_num)+'.png')
394 | ##     print 'done'
395 | 
396 | ## DRAW LINES VERSION 1 PIL
397 | ##             from cum_sum import draw_lines
398 | ##             y1_array = []
399 |             
400 | ##             for i in range(1,initial_blob_count): # if checking area, don't need to ignore 0
401 | ##                 blob = initial_blobs.GetBlob(i)
402 | ##                 width = int(blob.maxx-blob.minx)
403 | ##                 height = int(blob.maxy-blob.miny)
404 | ##                 area = int(blob.area)
405 | ##                 if area < max_blob_area:
406 | ##                     if (min_char_width <= width <= max_width_b4_cutoff) and (min_char_height <= height <= max_height_b4_cutoff):
407 | ##                         y1_array.append(blob.miny)
408 | ##             print "len(y1_array): ",len(y1_array)
409 | ##             draw_lines(y1_array)
410 | ##             print 'done'
411 | 
412 | ## def create_y1_histogram(input_image_filename):
413 | ##     from cum_sum import cum_sum
414 | ##     y1_array = []
415 | 
416 | ##     global rect_dims
417 | ##     bi_image  = cvLoadImage(input_image_filename)
418 | ##     output_image = cvCloneImage(bi_image)
419 | ##     pill_image = Image.open(input_image_filename)
420 | ##     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
421 | ##     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
422 |     
423 | ##     blobs = process_image(bi_image,gray_image)
424 | 
425 | ##     for (img_num,blob) in enumerate(blobs):
426 | ##         if is_box_within_box(blob):
427 | ##             pass
428 | ##         else:
429 | ##             y1_array.append(blob.miny)
430 | ##     print "len(y1_array): ",len(y1_array)
431 | ##     cum_sum(y1_array)
432 | ##     print 'done'
433 | 
434 | ## def draw_bounding_boxes(input_image_filename, output_image_filename):
435 | ##     global rect_dims
436 | ##     bi_image  = cvLoadImage(input_image_filename)
437 | ##     output_image = cvLoadImage(output_image_filename)
438 | ##     gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
439 | ##     cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
440 |     
441 | ##     blobs = process_image(bi_image,gray_image)
442 | 
443 | ##     for (img_num,blob) in enumerate(blobs):
444 | ##         if is_box_within_box(blob):
445 | ##             pass
446 | ## ##             blobs.pop(img_num)
447 | ## ##             print "len(blobs): ", len(blobs)
448 | ##         else:
449 | ##             cvRectangle(output_image,
450 | ##                     cvPoint(int(blob.minx),int(blob.miny)),
451 | ##                     cvPoint(int(blob.maxx),int(blob.maxy)),
452 | ##                     CV_RGB(0,255,0), 1, 8, 0 
453 | ##                     )            
454 | 
455 | ##     cvSaveImage("/home/ryan/ocr/openCV/test/image/input.png", gray_image)
456 | ##     cvSaveImage("/home/ryan/ocr/openCV/test/image/output.png", output_image)
457 | ##     print 'done'
458 | 


--------------------------------------------------------------------------------
/legacy_functions/opencv_drawing.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # Originally distributed with OpenCV source code
  4 | 
  5 | from random import Random
  6 | import colorsys
  7 | 
  8 | print "OpenCV Python version of drawing"
  9 | 
 10 | import cv
 11 | 
 12 | def random_color(random):
 13 |     """
 14 |     Return a random color
 15 |     """
 16 |     icolor = random.randint(0, 0xFFFFFF)
 17 |     return cv.Scalar(icolor & 0xff, (icolor >> 8) & 0xff, (icolor >> 16) & 0xff)
 18 | 
 19 | if __name__ == '__main__':
 20 | 
 21 |     # some "constants"
 22 |     width = 1000
 23 |     height = 700
 24 |     window_name = "Drawing Demo"
 25 |     number = 100
 26 |     delay = 5
 27 |     line_type = cv.CV_AA  # change it to 8 to see non-antialiased graphics
 28 |     
 29 |     # create the source image
 30 |     image = cv.CreateImage( (width, height), 8, 3)
 31 | 
 32 |     # create window and display the original picture in it
 33 |     cv.NamedWindow(window_name, 1)
 34 |     cv.SetZero(image)
 35 |     cv.ShowImage(window_name, image)
 36 | 
 37 |     # create the random number
 38 |     random = Random()
 39 | 
 40 |     # draw some lines
 41 |     for i in range(number):
 42 |         pt1 =  (random.randrange(-width, 2 * width),
 43 |                           random.randrange(-height, 2 * height))
 44 |         pt2 =  (random.randrange(-width, 2 * width),
 45 |                           random.randrange(-height, 2 * height))
 46 |         cv.Line(image, pt1, pt2,
 47 |                    random_color(random),
 48 |                    random.randrange(0, 10),
 49 |                    line_type, 0)
 50 |         
 51 |         cv.ShowImage(window_name, image)
 52 |         cv.WaitKey(delay)
 53 | 
 54 |     # draw some rectangles
 55 |     for i in range(number):
 56 |         pt1 =  (random.randrange(-width, 2 * width),
 57 |                           random.randrange(-height, 2 * height))
 58 |         pt2 =  (random.randrange(-width, 2 * width),
 59 |                           random.randrange(-height, 2 * height))
 60 |         cv.Rectangle(image, pt1, pt2,
 61 |                         random_color(random),
 62 |                         random.randrange(-1, 9),
 63 |                         line_type, 0)
 64 |         
 65 |         cv.ShowImage(window_name, image)
 66 |         cv.WaitKey(delay)
 67 | 
 68 |     # draw some ellipes
 69 |     for i in range(number):
 70 |         pt1 =  (random.randrange(-width, 2 * width),
 71 |                           random.randrange(-height, 2 * height))
 72 |         sz =  (random.randrange(0, 200),
 73 |                         random.randrange(0, 200))
 74 |         angle = random.randrange(0, 1000) * 0.180
 75 |         cv.Ellipse(image, pt1, sz, angle, angle - 100, angle + 200,
 76 |                         random_color(random),
 77 |                         random.randrange(-1, 9),
 78 |                         line_type, 0)
 79 |         
 80 |         cv.ShowImage(window_name, image)
 81 |         cv.WaitKey(delay)
 82 | 
 83 |     # init the list of polylines
 84 |     nb_polylines = 2
 85 |     polylines_size = 3
 86 |     pt = [0,] * nb_polylines
 87 |     for a in range(nb_polylines):
 88 |         pt [a] = [0,] * polylines_size
 89 | 
 90 |     # draw some polylines
 91 |     for i in range(number):
 92 |         for a in range(nb_polylines):
 93 |             for b in range(polylines_size):
 94 |                 pt [a][b] =  (random.randrange(-width, 2 * width),
 95 |                                      random.randrange(-height, 2 * height))
 96 |         cv.PolyLine(image, pt, 1,
 97 |                        random_color(random),
 98 |                        random.randrange(1, 9),
 99 |                        line_type, 0)
100 | 
101 |         cv.ShowImage(window_name, image)
102 |         cv.WaitKey(delay)
103 | 
104 |     # draw some filled polylines
105 |     for i in range(number):
106 |         for a in range(nb_polylines):
107 |             for b in range(polylines_size):
108 |                 pt [a][b] =  (random.randrange(-width, 2 * width),
109 |                                      random.randrange(-height, 2 * height))
110 |         cv.FillPoly(image, pt,
111 |                        random_color(random),
112 |                        line_type, 0)
113 | 
114 |         cv.ShowImage(window_name, image)
115 |         cv.WaitKey(delay)
116 | 
117 |     # draw some circles
118 |     for i in range(number):
119 |         pt1 =  (random.randrange(-width, 2 * width),
120 |                           random.randrange(-height, 2 * height))
121 |         cv.Circle(image, pt1, random.randrange(0, 300),
122 |                      random_color(random),
123 |                      random.randrange(-1, 9),
124 |                      line_type, 0)
125 |         
126 |         cv.ShowImage(window_name, image)
127 |         cv.WaitKey(delay)
128 | 
129 |     # draw some text
130 |     for i in range(number):
131 |         pt1 =  (random.randrange(-width, 2 * width),
132 |                           random.randrange(-height, 2 * height))
133 |         font = cv.InitFont(random.randrange(0, 8),
134 |                               random.randrange(0, 100) * 0.05 + 0.01,
135 |                               random.randrange(0, 100) * 0.05 + 0.01,
136 |                               random.randrange(0, 5) * 0.1,
137 |                               random.randrange(0, 10),
138 |                               line_type)
139 | 
140 |         cv.PutText(image, "Testing text rendering!",
141 |                       pt1, font,
142 |                       random_color(random))
143 |         
144 |         cv.ShowImage(window_name, image)
145 |         cv.WaitKey(delay)
146 | 
147 |     # prepare a text, and get it's properties
148 |     font = cv.InitFont(cv.CV_FONT_HERSHEY_COMPLEX,
149 |                           3, 3, 0.0, 5, line_type)
150 |     text_size, ymin = cv.GetTextSize("OpenCV forever!", font)
151 |     pt1 = ((width - text_size[0]) / 2, (height + text_size[1]) / 2)
152 |     image2 = cv.CloneImage(image)
153 | 
154 |     # now, draw some OpenCV pub ;-)
155 |     for i in range(0, 512, 2):
156 |         cv.SubS(image2, cv.ScalarAll(i), image)
157 |         (r, g, b) = colorsys.hsv_to_rgb((i % 100) / 100., 1, 1)
158 |         cv.PutText(image, "OpenCV forever!",
159 |                       pt1, font, cv.RGB(255 * r, 255 * g, 255 * b))
160 |         cv.ShowImage(window_name, image)
161 |         cv.WaitKey(delay)
162 | 
163 |     # wait some key to end
164 |     cv.WaitKey(0)
165 | 


--------------------------------------------------------------------------------
/legacy_functions/plot.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image, ImageDraw
 2 | 
 3 | import numpy as np
 4 | import matplotlib
 5 | matplotlib.use('Agg')
 6 | from matplotlib import pyplot
 7 | 
 8 | def plot_y(x,y):
 9 |     fig = pyplot.figure()
10 |     ax = fig.add_subplot(111)
11 |     ax.plot(x,y)
12 | 
13 |     ax.set_xlabel('y value')
14 |     ax.set_ylabel('cum_sum_count')
15 |     ax.grid(True)
16 | 
17 |     fig.savefig('/home/ryan/ocr/openCV/test/image/y_value_hist_02.png')
18 | 
19 | def get_counts(array):
20 |     y_vals_with_counts = [(y,array.count(y)) for y in array]
21 |     y_vals_with_counts_distinct = set(y_vals_with_counts)
22 |     y_vals_with_counts_distinct_list = list(y_vals_with_counts_distinct)
23 |     y_vals_with_counts_distinct_list.sort()
24 |     return y_vals_with_counts_distinct_list                          
25 | 
26 | def cum_sum(array):
27 |     set_with_counts = get_counts(array)
28 |     last_five = [0,0,0,0,0]
29 |     cum_sum_x = []
30 |     cum_sum_y = []
31 |     for (y,y_val_count) in set_with_counts:
32 |         assert len(last_five) == 5
33 | 	new_sum = sum(last_five)+y_val_count
34 | 	cum_sum_x.append(y)
35 | 	cum_sum_y.append(new_sum)
36 | 	last_five.append(y_val_count)
37 | 	last_five.pop(0)
38 | ##    cum_summed.sort()
39 | ##    cum_summed.reverse()
40 |     plot_y(cum_sum_x,cum_sum_y)
41 | 
42 | def draw_lines(array,image):
43 |     set_with_counts = get_counts(array)
44 |     last_five = [0,0,0,0,0]
45 |     cum_sum_with_y = []
46 |     for (y,y_val_count) in set_with_counts:
47 |         assert len(last_five) == 5
48 | 	new_sum = sum(last_five) + y_val_count
49 | 	cum_sum_with_y.append(new_sum,y)
50 | 	last_five.append(y_val_count)
51 | 	last_five.pop(0)
52 |     cum_sum_with_y.sort()
53 |     cum_sum_with_y.reverse()
54 |     y1_vals = [y for (sum,y) in cum_sum_with_y[:16]]
55 | 
56 |     im = Image.open("lena.pgm")
57 |     width = im.size[0]
58 |     draw = ImageDraw.Draw(im)
59 |     for y1 in y1_vals:
60 |         line_dims = [0,y1,width,y1]
61 |         draw.line(line_dims, fill="red")
62 |     del draw 
63 |     im.save("/home/ryan/ocr/openCV/test/image/image_with_lines.png", "PNG")
64 | 


--------------------------------------------------------------------------------
/legacy_functions/print_w_h.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | 
3 | for f in os.listdir('.'):
4 |     im = Image.open(os.path.join(os.getcwd(),f))
5 |     width,height = im.size
6 |     print "w:",width,"h:",height
7 | 
8 | 


--------------------------------------------------------------------------------
/legacy_functions/process_image_dimensions.txt:
--------------------------------------------------------------------------------
1 | min_char_width=20, min_char_height=27, max_width_b4_cutoff=80, max_height_b4_cutoff=120, max_blob_area=2800, min_sub_super_width = 21, min_sub_super_height = 14, max_sub_super_width_b4_cutoff = 66, max_sub_super_height_b4_cutoff = 38, miny_linetop_var=25, range_above_below_line=15, draw_lines=False, lines=[], margin=50, num_lines_on_pecha=8
2 | 


--------------------------------------------------------------------------------
/legacy_functions/sequential_finish.py:
--------------------------------------------------------------------------------
 1 |     def finish(blobs):
 2 |         need_to_find_first_character = True
 3 |         
 4 |         while blobs:
 5 |             if need_to_find_first_character: # First time around - point is (0,0)
 6 |                 char_num = 0
 7 |                 previous_blob = FakeBlob()
 8 |                 need_to_find_first_character = False
 9 |                 new_line = True
10 | 
11 |             blobs = get_blobs_sorted_by_distance_from_previous_blob(blobs, previous_blob)
12 |             if len(blobs) == 1:
13 |                 current_blob = blobs.pop(0)
14 |                 last_iteration = True
15 |             else:
16 |                 if not new_line:
17 |                     for i, blob in enumerate(blobs):
18 |                         if ( int(blob.maxx) > int(previous_blob.minx) ) and ( abs(int(blob.miny)-int(previous_blob.miny)) < 30 ):
19 |                             try:
20 |                                 current_blob = blobs.pop(i)
21 |                                 break
22 |                             except:
23 |                                 print "i: ", i
24 |                 else:
25 |                     current_blob = blobs.pop(0)
26 |                     new_line_miny = int(current_blob.miny)
27 |             char_image_name = char_string % (0,0,line,char_num,int(current_blob.miny))
28 |             blob_height = int(current_blob.maxy-current_blob.miny)
29 |             blob_width = int(current_blob.maxx-current_blob.minx)
30 |             blob_size = cvSize(blob_width,blob_height)
31 |             blob_image = cvCreateImage(blob_size, 8, 1)
32 |             cvZero(blob_image)
33 |             blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(current_blob.minx), -1*int(current_blob.miny))
34 |             cvSaveImage(os.path.join(folio_characters_directory,char_image_name), blob_image)            
35 |             cvRectangle(bi_image,
36 |                         cvPoint(int(current_blob.minx),int(current_blob.miny)),
37 |                         cvPoint(int(current_blob.maxx),int(current_blob.maxy)),
38 |                         CV_RGB(*(line_color_dict[line])), 1, 8, 0 
39 |                         )            
40 |             cvSaveImage(os.path.join(folio_intermediate_directory,char_image_name), bi_image)
41 |             if not last_iteration:
42 |                 blobs_for_checking_EOL = blobs[:]
43 |                 continue_on_same_line = False
44 |                 for blob in blobs_for_checking_EOL:
45 |                     if ( int(blob.maxx) > int(current_blob.minx) ) and ( abs(int(blob.miny)-int(current_blob.miny)) < 30 ):                    
46 | #                    if int(blob.maxx) > int(current_blob.minx) and int(blob.miny-35) <= int(current_blob.miny):
47 |                         previous_blob = current_blob
48 |                         char_num += 1
49 |                         continue_on_same_line = True
50 |                         new_line = False
51 |                         break
52 |                 if not continue_on_same_line:
53 |                     print "FINISHED Line %d of %s" % (line, folio_name)
54 |                     line +=1
55 |                     if line > num_lines:
56 |                         print "Excessive number of lines (%d)for %s" % (line, folio_name)
57 |                         break
58 |                     char_num = 0
59 |                     next_line_y_guess = new_line_miny+10
60 |                     if next_line_y_guess < 0:
61 |                         next_line_y_guess = 0
62 |                     previous_blob = FakeBlob(miny=next_line_y_guess)
63 | 


--------------------------------------------------------------------------------
/legacy_functions/vp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import operator
 3 | import time
 4 | 
 5 | from PIL import Image
 6 | 
 7 | from opencv.cv import *
 8 | from opencv.highgui import *
 9 | 
10 | from pyblobs.BlobResult import CBlobResult
11 | from pyblobs.Blob import CBlob
12 | 
13 | input_image_filename = "/home/ryan/ocr/test/image/thresh48.jpg"
14 | bi_image  = cvLoadImage(input_image_filename)
15 | gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
16 | cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
17 | mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
18 | cvSet(mask,1)
19 | initial_blobs = CBlobResult(gray_image, mask, 100, False)
20 | initial_blob_count = initial_blobs.GetNumBlobs()
21 | 
22 | for i in range(1,initial_blob_count):
23 |     blob = initial_blobs.GetBlob(i)
24 |     area = blob.area
25 |     blob_height = int(blob.maxy-blob.miny)
26 |     blob_width = int(blob.maxx-blob.minx)        
27 |     blob_size = cvSize(blob_width,blob_height)
28 |     blob_image = cvCreateImage(blob_size, 8, 1)
29 |     cvZero(blob_image)
30 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
31 |     zf_num = "%04d"
32 |     width_info = zf_num % blob_width
33 |     width_info = width_info + '__' + str(i)
34 |     cvSaveImage("/home/ryan/openCV/blob_width/"+width_info+".jpg", blob_image)
35 |     height_info = zf_num % blob_height
36 |     height_info = height_info + '__'+ str(i)
37 |     cvSaveImage("/home/ryan/openCV/blob_height/"+height_info+".jpg", blob_image)    
38 | ## vp_point_pairs = vp(initial_blobs.GetBlob(567), 3)
39 | ## drawing_image = cvCloneImage(bi_image)
40 | 
41 | ## for point_pair in vp_point_pairs:
42 | ##     cvDrawLine(drawing_image, point_pair[0], point_pair[1], CV_RGB(0,245,0), 2, 8, 0)
43 | 
44 | ## cvSaveImage("/home/ryan/openCV/output_images/5.jpg", bi_image)
45 | ## cvSaveImage("/home/ryan/openCV/output_images/6.jpg", drawing_image)
46 | 
47 | 


--------------------------------------------------------------------------------
/legacy_functions/wh.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import operator
 3 | import time
 4 | 
 5 | from PIL import Image
 6 | 
 7 | from opencv.cv import *
 8 | from opencv.highgui import *
 9 | 
10 | from pyblobs.BlobResult import CBlobResult
11 | from pyblobs.Blob import CBlob
12 | 
13 | input_image_filename = "/home/ryan/ocr/test/image/thresh48.jpg"
14 | bi_image  = cvLoadImage(input_image_filename)
15 | gray_image = cvCreateImage(cvGetSize(bi_image), 8, 1)
16 | cvCvtColor(bi_image, gray_image, CV_BGR2GRAY)
17 | mask = cvCreateImage(cvGetSize(bi_image), 8, 1)
18 | cvSet(mask,1)
19 | initial_blobs = CBlobResult(gray_image, mask, 100, False)
20 | initial_blob_count = initial_blobs.GetNumBlobs()
21 | 
22 | min_char_width=20
23 | min_char_height=27
24 | max_width_b4_cutoff=45
25 | max_height_b4_cutoff=112
26 | max_blob_area=2800
27 | 
28 | need_to_rerun = False
29 | vp_blobs = []
30 | hp_blobs = []
31 | 
32 | ## W_GT_45
33 | ## for i in range(1,initial_blob_count): # if checking area, don't need to ignore 0
34 | ##     blob = initial_blobs.GetBlob(i)
35 | ##     width = blob.maxx-blob.minx
36 | ##     height = blob.maxy-blob.miny
37 | ##     area = blob.area
38 | ##     if area < max_blob_area:
39 | ##         if (width > max_width_b4_cutoff) and (height >= min_char_height):
40 | ##             blob_height = int(blob.maxy-blob.miny)
41 | ##             blob_width = int(blob.maxx-blob.minx)        
42 | ##             blob_size = cvSize(blob_width,blob_height)
43 | ##             blob_image = cvCreateImage(blob_size, 8, 1)
44 | ##             cvZero(blob_image)
45 | ##             blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
46 | ##             zf_num = "%04d"
47 | ##             width_info = zf_num % blob_width
48 | ##             height_info = zf_num % blob_height
49 | ##             info = width_info + '__' + height_info + '__' + str(i)
50 | ##             cvSaveImage("/home/ryan/openCV/w/"+info+".jpg", blob_image)
51 |             
52 | 
53 | ## WH
54 | for i in range(1,initial_blob_count):
55 |     blob = initial_blobs.GetBlob(i)
56 |     area = blob.area
57 |     blob_height = int(blob.maxy-blob.miny)
58 |     blob_width = int(blob.maxx-blob.minx)        
59 |     blob_size = cvSize(blob_width,blob_height)
60 |     blob_image = cvCreateImage(blob_size, 8, 1)
61 |     cvZero(blob_image)
62 |     blob.FillBlob(blob_image, CV_RGB(255,255,255), -1*int(blob.minx), -1*int(blob.miny))
63 |     cvSaveImage("/home/ryan/openCV/blob_images/"+str(i)+".jpg", blob_image)
64 | 
65 | 
66 | 
67 | ## vp_point_pairs = vp(initial_blobs.GetBlob(567), 3)
68 | ## drawing_image = cvCloneImage(bi_image)
69 | 
70 | ## for point_pair in vp_point_pairs:
71 | ##     cvDrawLine(drawing_image, point_pair[0], point_pair[1], CV_RGB(0,245,0), 2, 8, 0)
72 | 
73 | ## cvSaveImage("/home/ryan/openCV/output_images/5.jpg", bi_image)
74 | ## cvSaveImage("/home/ryan/openCV/output_images/6.jpg", drawing_image)
75 | 
76 | 


--------------------------------------------------------------------------------
/line_drawing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from PIL import Image, ImageDraw
  3 | 
  4 | import numpy as np
  5 | import matplotlib
  6 | matplotlib.use('Agg')
  7 | from matplotlib import pyplot
  8 | 
  9 | from opencv.cv import *
 10 | from opencv.highgui import *
 11 | 
 12 | def create_line_drawing_from_black_squares(bi_image, black_squares_image, output_dir, y_values):
 13 |     filename = os.path.join(output_dir, "blob_line_drawing_from_black_squares_image.png")
 14 |     y_bottom_values = []
 15 |     
 16 |     for i, y_value1 in enumerate(y_values):
 17 |         if i+1 == len(y_values):
 18 |             break
 19 |         else:
 20 |             rowsum_rownum_tuples = []
 21 |             y_value2 = y_values[i+1]
 22 |             for (i, row) in enumerate( black_squares_image.rowrange() ):
 23 |                 if y_value1 < i < y_value2:
 24 |                     row_sum = cvSum(row)
 25 |                     rowsum_rownum_tuples.append( (int(row_sum[0]), i) )        
 26 |             rowsum_rownum_tuples.sort()
 27 |             rowsum_rownum_tuples.reverse()
 28 |             row_with_most_whitespace = rowsum_rownum_tuples[0][1]
 29 |             point1 = cvPoint(0, row_with_most_whitespace)
 30 |             point2 = cvPoint(bi_image.width, row_with_most_whitespace)
 31 |             cvDrawLine(bi_image, point1, point2, CV_RGB(255,0,0), 1, 8, 0)
 32 |             y_bottom_values.append(row_with_most_whitespace)
 33 | 
 34 |     cvSaveImage(filename, bi_image)
 35 |     y_bottom_values.sort()
 36 |     return y_bottom_values
 37 | 
 38 | def random_color(random):
 39 |     """
 40 |     Return a random color
 41 |     """
 42 |     icolor = random.randint(0, 0xFFFFFF)
 43 |     return cvScalar(icolor & 0xff, (icolor >> 8) & 0xff, (icolor >> 16) & 0xff)
 44 | 
 45 | def create_curved_bottom_line_drawing_image(bi_image, output_dir, y_bottom_values):
 46 |     import random
 47 |     filename = os.path.join(output_dir, "blob_curved_bottom_line_drawing.png")
 48 |     white_scalar = repr(cvScalar(255, 255, 255, 0))    
 49 |     lines_to_draw = []
 50 | 
 51 |     for y_bottom_value in y_bottom_values:
 52 |         previous_successful_y_value = int(y_bottom_value)
 53 |         previous_tried_y_value_init = int(y_bottom_value)        
 54 |         previous_tried_y_value_neg = int(y_bottom_value)
 55 |         previous_tried_y_value_pos = int(y_bottom_value)                
 56 |         point_of_equilibrium = int(y_bottom_value)
 57 |         line_to_draw = []
 58 | 
 59 |         for x in range(bi_image.width):
 60 |             previous_tried_y_value_init = previous_successful_y_value
 61 |             previous_tried_y_value_neg = previous_successful_y_value
 62 |             previous_tried_y_value_pos = previous_successful_y_value                                     
 63 |             not_determined_point = True
 64 |             if previous_tried_y_value_init == point_of_equilibrium:
 65 |                 hit_equilibrium = True            
 66 |             else:
 67 |                 hit_equilibrium = False
 68 |             while not_determined_point:
 69 |                 if hit_equilibrium:
 70 |                     previous_tried_y_value_neg = previous_tried_y_value_neg - 1
 71 |                     previous_tried_y_value_pos = previous_tried_y_value_pos + 1                    
 72 |                     y1 = previous_tried_y_value_neg
 73 |                     y2 = previous_tried_y_value_pos
 74 |                     if y1 >= 0:
 75 |                         #print x, y1
 76 |                         point1 =  repr(bi_image[y1][x])
 77 |                         if point1 == white_scalar:
 78 |                             bi_image[y1][x] = CV_RGB(255,0,0)
 79 |                             line_to_draw.append( (x, y1) )
 80 |                             not_determined_point = False
 81 |                             continue
 82 |                     if y2 >= 0:
 83 |                         point2 = repr(bi_image[y2][x])                    
 84 |                         if point2 == white_scalar:
 85 |                             bi_image[y2][x] = CV_RGB(255,0,0)
 86 |                             line_to_draw.append( (x, y2) )
 87 |                             not_determined_point = False
 88 |                             continue
 89 |                     continue
 90 |                 else:
 91 |                     if previous_tried_y_value_init > point_of_equilibrium:
 92 |                         previous_tried_y_value_init = previous_tried_y_value_init - 1
 93 |                     else:
 94 |                         previous_tried_y_value_init = previous_tried_y_value_init + 1                        
 95 |                     if previous_tried_y_value_init == point_of_equilibrium:
 96 |                         hit_equilibrium = True
 97 |                     y = previous_tried_y_value_init
 98 |                     point = repr(bi_image[y][x])                    
 99 |                     if point == white_scalar:
100 |                         bi_image[y][x] = CV_RGB(255,0,0)
101 |                         line_to_draw.append( (x, y) )
102 |                         not_determined_point = False
103 |                         continue
104 |                     else:
105 |                         continue
106 |     #cvPolyLine(bi_image, lines_to_draw, 0, 0, 1, CV_RGB(255,0,0), 1, 8, 0)
107 |     cvSaveImage(filename, bi_image)
108 | 
109 | def create_text_image(blobs, output_dir, bi_image):
110 |     line_type = CV_AA
111 |     filename = os.path.join(output_dir, "blob_text_image.png")
112 |     #font = cvInitFont(CV_FONT_VECTOR0, 0.4, 0.4, 0.0, 2, line_type)
113 |     font = cvInitFont(CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 2, line_type)
114 | 
115 |     for blob in blobs:
116 |         middle_point = cvPoint(blob.min_x + (blob.max_x-blob.min_x), blob.min_y + (blob.max_y-blob.min_y) )
117 |         cvPutText(bi_image, "pa", cvPoint(blob.min_x, blob.min_y), font, CV_RGB(0,0,0) )    
118 |     cvSaveImage(filename, bi_image)
119 | 
120 | def create_black_square_image(blobs, output_dir, bi_image):
121 |     filename = os.path.join(output_dir, "blob_black_squares.png")
122 |     for blob in blobs:
123 |         cvRectangle(bi_image, cvPoint(blob.min_x, blob.min_y), cvPoint(blob.max_x, blob.max_y),
124 |                     CV_RGB(0,0,0), CV_FILLED, 8, 0)
125 |     cvSaveImage(filename, bi_image)    
126 |     
127 | def create_bounding_box_image(blobs, output_dir, bi_image):
128 |     filename = os.path.join(output_dir, "blob_bounding_boxes.png")    
129 |     for blob in blobs:
130 |         cvRectangle(bi_image, cvPoint(blob.min_x, blob.min_y), cvPoint(blob.max_x, blob.max_y),
131 |                     CV_RGB(0,255,0), 1, 8, 0)
132 |     cvSaveImage(filename, bi_image)
133 |     
134 | def create_histogram(blobs, output_dir):
135 |     y_array = []
136 |     for blob in blobs:
137 |         y_array.extend([blob.min_y] * blob.width)
138 |     set_with_counts = get_counts(y_array)
139 |     last_five = [0,0,0,0,0]
140 |     cum_sum_x = []
141 |     cum_sum_y = []
142 |     for (y, y_val_count) in set_with_counts:
143 |         assert len(last_five) == 5
144 | 	new_sum = sum(last_five) + y_val_count
145 | 	cum_sum_x.append(y)
146 | 	cum_sum_y.append(new_sum)
147 | 	last_five.append(y_val_count)
148 | 	last_five.pop(0)
149 | 
150 |     plot_y(cum_sum_x, cum_sum_y, output_dir)    
151 | 
152 | def get_y_values_and_create_multiline_drawing(blobs, output_dir, bi_image, num_lines=8, min_dist_betw_lines=50):
153 |     def should_draw_line(y1):
154 |         for drawn_y in drawn_ys:
155 |             if not abs(drawn_y-y1) > min_dist_betw_lines:
156 |                 return False
157 |         return True    
158 |     filename = os.path.join(output_dir, "blob_line_drawing.png")
159 |     drawn_points = []
160 | 
161 |     image_section_width_ranges = [[0, bi_image.width/3], [(bi_image.width/3)+1, (bi_image.width/3)*2], [((bi_image.width/3)*2)+1, bi_image.width]]
162 |     for image_section_width_range in image_section_width_ranges:
163 |         drawn_ys = []
164 |         y_array = []
165 |         last_five = [0,0,0,0,0]
166 |         cumsum_y_tuples = []
167 |         
168 |         for blob in blobs:
169 |             if blob.min_x >= image_section_width_range[0] and blob.max_x <= image_section_width_range[1]:
170 |                 y_array.extend([blob.min_y] * blob.width)
171 | 
172 |         set_with_counts = get_counts(y_array)
173 |     
174 |         for (y, y_val_count) in set_with_counts:
175 |             assert len(last_five) == 5
176 |             new_sum = sum(last_five) + y_val_count
177 |             cumsum_y_tuples.append( (new_sum, y) )
178 |             last_five.append(y_val_count)
179 |             last_five.pop(0)
180 |         cumsum_y_tuples.sort()
181 |         cumsum_y_tuples.reverse()
182 | 
183 |         width = image_section_width_range[1] - image_section_width_range[0]
184 | 
185 |         for (count, y1) in cumsum_y_tuples:
186 |             if len(drawn_ys) >= num_lines:
187 |                 break
188 |             if should_draw_line(y1):
189 |                 point1 = cvPoint(image_section_width_range[0], y1-3)
190 |                 point2 = cvPoint(image_section_width_range[1], y1-3)
191 |                 cvDrawLine(bi_image, point1, point2, CV_RGB(255,0,0), 1, 8, 0)
192 |                 drawn_ys.append(y1)
193 |                 drawn_points.append((point1, point2))
194 |         drawn_ys.sort()
195 |         print "drawn_ys: ", drawn_ys        
196 |     cvSaveImage(filename, bi_image)    
197 |     drawn_points.sort()
198 |     print "drawn_points: ", drawn_points
199 |     return drawn_points
200 | 
201 | def get_y_values_and_create_line_drawing(blobs, output_dir, bi_image, num_lines=8, min_dist_betw_lines=30):
202 |     def should_draw_line(y1):
203 |         for drawn_y in drawn_ys:
204 |             if not abs(drawn_y-y1) > min_dist_betw_lines:
205 |                 return False
206 |         return True    
207 |     filename = os.path.join(output_dir, "blob_line_drawing.png")
208 |     drawn_ys = []
209 |     y_array = []
210 |     last_five = [0,0,0,0,0]
211 |     cumsum_y_tuples = []
212 |     
213 |     for blob in blobs:
214 |         y_array.extend([blob.min_y] * blob.width)
215 | 
216 |     set_with_counts = get_counts(y_array)
217 |     
218 |     for (y, y_val_count) in set_with_counts:
219 |         assert len(last_five) == 5
220 | 	new_sum = sum(last_five) + y_val_count
221 | 	cumsum_y_tuples.append( (new_sum, y) )
222 | 	last_five.append(y_val_count)
223 | 	last_five.pop(0)
224 |     cumsum_y_tuples.sort()
225 |     cumsum_y_tuples.reverse()
226 | 
227 |     width = int(bi_image.width)
228 |     for (count, y1) in cumsum_y_tuples:
229 |         if len(drawn_ys) >= num_lines:
230 |             break
231 |         if should_draw_line(y1):
232 |             point1 = cvPoint(0,y1-3)
233 |             point2 = cvPoint(width,y1-3)
234 |             cvDrawLine(bi_image, point1, point2, CV_RGB(255,0,0), 1, 8, 0)            
235 |             drawn_ys.append(y1)
236 |     cvSaveImage(filename, bi_image)    
237 |     drawn_ys.sort()
238 |     print "drawn_ys: ", drawn_ys
239 |     return drawn_ys
240 |     
241 | def plot_y(x, y, output_dir):
242 |     fig = pyplot.figure()
243 |     ax = fig.add_subplot(111)
244 |     ax.plot(x,y)
245 | 
246 |     ax.set_xlabel('y value')
247 |     ax.set_ylabel('cum_sum_count')
248 |     ax.grid(True)
249 |     
250 |     filename = os.path.join(output_dir, "blob_histogram.png")
251 |     fig.savefig(filename)
252 | 
253 | def get_counts(y_array):
254 |     y_vals_with_counts = [(y, y_array.count(y)) for y in y_array]
255 |     y_vals_with_counts_distinct = set(y_vals_with_counts)
256 |     y_vals_with_counts_distinct_list = list(y_vals_with_counts_distinct)
257 |     y_vals_with_counts_distinct_list.sort()
258 |     return y_vals_with_counts_distinct_list                          
259 | 
260 | ## def get_lines(array, bi_image, min_dist_betw_lines=70, num_of_lines=8):
261 | ##     drawn_ys = []
262 | ##     def should_draw_line(y1):
263 | ##         for drawn_y in drawn_ys:
264 | ##             if not abs(drawn_y-y1) > min_dist_betw_lines:
265 | ##                 return False
266 | ##         return True
267 |     
268 | ##     set_with_counts = get_counts(array)
269 | ##     last_five = [0,0,0,0,0]
270 | ##     cum_sum_with_y = []
271 | ##     for (y,y_val_count) in set_with_counts:
272 | ##         assert len(last_five) == 5
273 | ## 	new_sum = sum(last_five)+y_val_count
274 | ## 	cum_sum_with_y.append((new_sum,y))
275 | ## 	last_five.append(y_val_count)
276 | ## 	last_five.pop(0)
277 | ##     cum_sum_with_y.sort()
278 | ##     cum_sum_with_y.reverse()
279 | 
280 | ##     width = int(bi_image.width)
281 | ##     for (count,y1) in cum_sum_with_y:
282 | ##         if len(drawn_ys) >= num_of_lines:
283 | ##             break
284 | ##         if should_draw_line(y1):
285 | ##             point1 = cvPoint(0,y1-3)
286 | ##             point2 = cvPoint(width,y1-3)                
287 | ##             drawn_ys.append(y1)
288 | ##     drawn_ys.sort()
289 | ##     print "drawn_ys: ",drawn_ys
290 | ##     return drawn_ys
291 | 
292 | ##     lines_dict = dict([(i+1,y) for i,y in enumerate(drawn_ys)])
293 | ##     print "lines_dict: ", lines_dict
294 | ##     return lines_dict
295 | 
296 | 
297 | ## def draw_lines(array, path_to_image_to_draw_on="/home/ryan/ocr/test/image/test_raw.png", min_dist_betw_lines=70):
298 | ##     drawn_ys = []
299 | ##     def should_draw_line(y1):
300 | ##         for drawn_y in drawn_ys:
301 | ##             if not abs(drawn_y-y1) > min_dist_betw_lines:
302 | ##                 return False
303 | ##         return True
304 |     
305 | ##     set_with_counts = get_counts(array)
306 | ##     last_five = [0,0,0,0,0]
307 | ##     cum_sum_with_y = []
308 | ##     for (y,y_val_count) in set_with_counts:
309 | ##         assert len(last_five) == 5
310 | ## 	new_sum = sum(last_five)+y_val_count
311 | ## 	cum_sum_with_y.append((new_sum,y))
312 | ## 	last_five.append(y_val_count)
313 | ## 	last_five.pop(0)
314 | ##     cum_sum_with_y.sort()
315 | ##     cum_sum_with_y.reverse()
316 | 
317 | ##     im = Image.open(path_to_image_to_draw_on)
318 | ##     width = im.size[0]
319 | ##     draw = ImageDraw.Draw(im)    
320 | ##     for (count,y1) in cum_sum_with_y:
321 | ##         if len(drawn_ys) == 16:
322 | ##             break
323 | ##         if should_draw_line(y1):
324 | ##             line_dims = [0,y1-3,width,y1-3]
325 | ##             draw.line(line_dims, fill="red")
326 | ##             drawn_ys.append(y1)
327 | ##     del draw 
328 | ##     im.save("/home/ryan/ocr/test/output_image/image_with_lines_raw.png", "PNG")
329 | ##     print "drawn_ys: ",drawn_ys
330 | 
331 | ## def draw_lines_2(array, bi_image, min_dist_betw_lines=70, num_of_lines=8):
332 | ##     drawn_ys = []
333 | ##     def should_draw_line(y1):
334 | ##         for drawn_y in drawn_ys:
335 | ##             if not abs(drawn_y-y1) > min_dist_betw_lines:
336 | ##                 return False
337 | ##         return True
338 |     
339 | ##     set_with_counts = get_counts(array)
340 | ##     last_five = [0,0,0,0,0]
341 | ##     cum_sum_with_y = []
342 | ##     for (y,y_val_count) in set_with_counts:
343 | ##         assert len(last_five) == 5
344 | ## 	new_sum = sum(last_five)+y_val_count
345 | ## 	cum_sum_with_y.append((new_sum,y))
346 | ## 	last_five.append(y_val_count)
347 | ## 	last_five.pop(0)
348 | ##     cum_sum_with_y.sort()
349 | ##     cum_sum_with_y.reverse()
350 | 
351 | ##     width = int(bi_image.width)
352 | ##     for (count,y1) in cum_sum_with_y:
353 | ##         if len(drawn_ys) == num_of_lines:
354 | ##             break
355 | ##         if should_draw_line(y1):
356 | ##             point1 = cvPoint(0,y1-3)
357 | ##             point2 = cvPoint(width,y1-3)                
358 | ##             cvDrawLine(bi_image, point1, point2, CV_RGB(255,0,0), 1, 8, 0)
359 | ##             drawn_ys.append(y1)
360 | ##     cvSaveImage("/home/ryan/ocr/test/output_image/vp_lines_boxes.png", bi_image)
361 | ##     print "drawn_ys: ",drawn_ys
362 | 


--------------------------------------------------------------------------------
/make_char_dirs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def make_char_dir(initial_char_dir):
 4 |     char_dir = initial_char_dir
 5 |     while 1:
 6 |         if os.path.exists(char_dir):
 7 |             char_dir = char_dir+'_'
 8 |         else:
 9 |             os.mkdir(char_dir)
10 |             return char_dir
11 |         
12 | char_list = ['ka','kha','ga','nga',
13 |              'ca','cha','ja','nya',
14 |              'ta','tha','da','na',
15 |              'pa','pha','ba','ma',
16 |              'tsa','tsha','dza','wa',
17 |              'zha','za','a','ya',
18 |              'ra','la','sha','sa',
19 |              'ha','aa']
20 | cwd = os.getcwd()
21 | char_dir = make_char_dir(cwd+'/characters/')
22 | for char in char_list:
23 |     os.mkdir(char_dir+char)
24 | print 'dirs created'
25 | 
26 |     
27 | 


--------------------------------------------------------------------------------