├── CookingAtItsBest.png ├── README.md ├── journal1.jpg ├── main.py ├── news1.jpg └── output ├── letter ├── output1_letter.jpg ├── output2_letter.jpg └── output3_letter.jpg ├── line ├── output1_line.jpg ├── output2_line.jpg └── output3_line.jpg ├── margin ├── output1_margin.jpg ├── output2_margin.jpg └── output3_margin.jpg ├── par ├── output1_par.jpg ├── output2_par.jpg └── output3_par.jpg └── word ├── output1_word.jpg ├── output2_word.jpg └── output3_word.jpg /CookingAtItsBest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/CookingAtItsBest.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Run the application: python main.py 2 | 3 | *Note: For first time running the application, create a folder named "output". 4 | 5 | The application is a simple document layout analysis using Python-OpenCV. Using three images, the program needs to do the following: 6 | 7 | 1. Individual characters are boxed 8 | 2. Individual words are boxed 9 | 3. Lines are boxed 10 | 3. Paragraphs are boxed 11 | 3. The paragraphs with margins 12 | 13 | 14 | Here is a blog for a short description: https://warkyou-code.blogspot.com/2016/03/document-layout-analysis.html 15 | -------------------------------------------------------------------------------- /journal1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/journal1.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # import necessary packages 2 | import numpy as np 3 | import cv2 4 | 5 | # loading images 6 | image1 = cv2.imread("journal1.jpg") 7 | image2 = cv2.imread("news1.jpg") 8 | image3 = cv2.imread("CookingAtItsBest.png") 9 | 10 | # hardcoded assigning of output images for the 3 input images 11 | output1_letter = image1.copy() 12 | output1_word = image1.copy() 13 | output1_line = image1.copy() 14 | output1_par = image1.copy() 15 | output1_margin = image1.copy() 16 | 17 | output2_letter = image2.copy() 18 | output2_word = image2.copy() 19 | output2_line = image2.copy() 20 | output2_par = image2.copy() 21 | output2_margin = image2.copy() 22 | 23 | 24 | output3_letter = image3.copy() 25 | output3_word = image3.copy() 26 | output3_line = image3.copy() 27 | output3_par = image3.copy() 28 | output3_margin = image3.copy() 29 | 30 | gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) 31 | gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY) 32 | gray3 = cv2.cvtColor(image3, cv2.COLOR_BGR2GRAY) 33 | 34 | # clean the image using otsu method with the inversed binarized image 35 | ret1,th1 = cv2.threshold(gray1,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) 36 | ret2,th2 = cv2.threshold(gray2,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) 37 | ret3,th3 = cv2.threshold(gray3,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) 38 | 39 | #processing letter by letter boxing 40 | def process_letter(thresh,output): 41 | # assign the kernel size 42 | kernel = np.ones((2,1), np.uint8) # vertical 43 | # use closing morph operation then erode to narrow the image 44 | temp_img = cv2.morphologyEx(thresh,cv2.MORPH_CLOSE,kernel,iterations=3) 45 | # temp_img = cv2.erode(thresh,kernel,iterations=2) 46 | letter_img = cv2.erode(temp_img,kernel,iterations=1) 47 | 48 | # find contours 49 | (contours, _) = cv2.findContours(letter_img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 50 | 51 | # loop in all the contour areas 52 | for cnt in contours: 53 | x,y,w,h = cv2.boundingRect(cnt) 54 | cv2.rectangle(output,(x-1,y-5),(x+w,y+h),(0,255,0),1) 55 | 56 | return output 57 | 58 | 59 | #processing letter by letter boxing 60 | def process_word(thresh,output): 61 | # assign 2 rectangle kernel size 1 vertical and the other will be horizontal 62 | kernel = np.ones((2,1), np.uint8) 63 | kernel2 = np.ones((1,4), np.uint8) 64 | # use closing morph operation but fewer iterations than the letter then erode to narrow the image 65 | temp_img = cv2.morphologyEx(thresh,cv2.MORPH_CLOSE,kernel,iterations=2) 66 | #temp_img = cv2.erode(thresh,kernel,iterations=2) 67 | word_img = cv2.dilate(temp_img,kernel2,iterations=1) 68 | 69 | (contours, _) = cv2.findContours(word_img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 70 | 71 | for cnt in contours: 72 | x,y,w,h = cv2.boundingRect(cnt) 73 | cv2.rectangle(output,(x-1,y-5),(x+w,y+h),(0,255,0),1) 74 | 75 | return output 76 | 77 | #processing line by line boxing 78 | def process_line(thresh,output): 79 | # assign a rectangle kernel size 1 vertical and the other will be horizontal 80 | kernel = np.ones((1,5), np.uint8) 81 | kernel2 = np.ones((2,4), np.uint8) 82 | # use closing morph operation but fewer iterations than the letter then erode to narrow the image 83 | temp_img = cv2.morphologyEx(thresh,cv2.MORPH_CLOSE,kernel2,iterations=2) 84 | #temp_img = cv2.erode(thresh,kernel,iterations=2) 85 | line_img = cv2.dilate(temp_img,kernel,iterations=5) 86 | 87 | (contours, _) = cv2.findContours(line_img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 88 | 89 | for cnt in contours: 90 | x,y,w,h = cv2.boundingRect(cnt) 91 | cv2.rectangle(output,(x-1,y-5),(x+w,y+h),(0,255,0),1) 92 | 93 | return output 94 | 95 | #processing par by par boxing 96 | def process_par(thresh,output): 97 | # assign a rectangle kernel size 98 | kernel = np.ones((5,5), 'uint8') 99 | par_img = cv2.dilate(thresh,kernel,iterations=3) 100 | 101 | (contours, _) = cv2.findContours(par_img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 102 | 103 | for cnt in contours: 104 | x,y,w,h = cv2.boundingRect(cnt) 105 | cv2.rectangle(output,(x,y),(x+w,y+h),(0,255,0),1) 106 | 107 | return output 108 | 109 | #processing margin with paragraph boxing 110 | def process_margin(thresh,output): 111 | # assign a rectangle kernel size 112 | kernel = np.ones((20,5), 'uint8') 113 | margin_img = cv2.dilate(thresh,kernel,iterations=5) 114 | 115 | (contours, _) = cv2.findContours(margin_img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 116 | 117 | for cnt in contours: 118 | x,y,w,h = cv2.boundingRect(cnt) 119 | cv2.rectangle(output,(x,y),(x+w,y+h),(0,255,0),1) 120 | 121 | return output 122 | 123 | 124 | # processing and writing the output 125 | output1_letter = process_letter(th1,output1_letter) 126 | output1_word = process_word(th1,output1_word) 127 | output1_line = process_line(th1,output1_line) 128 | # special case for the 5th output because margin with paragraph is just the 4th output with margin 129 | cv2.imwrite("output/letter/output1_letter.jpg", output1_letter) 130 | cv2.imwrite("output/word/output1_word.jpg", output1_word) 131 | cv2.imwrite("output/line/output1_line.jpg", output1_line) 132 | output1_par = process_par(th1,output1_par) 133 | cv2.imwrite("output/par/output1_par.jpg", output1_par) 134 | output1_margin = process_margin(th1,output1_par) 135 | cv2.imwrite("output/margin/output1_margin.jpg", output1_par) 136 | 137 | output2_letter = process_letter(th2,output2_letter) 138 | output2_word = process_word(th2,output2_word) 139 | output2_line = process_line(th2,output2_line) 140 | 141 | cv2.imwrite("output/letter/output2_letter.jpg", output2_letter) 142 | cv2.imwrite("output/word/output2_word.jpg", output2_word) 143 | cv2.imwrite("output/line/output2_line.jpg", output2_line) 144 | output2_par = process_par(th2,output2_par) 145 | cv2.imwrite("output/par/output2_par.jpg", output2_par) 146 | output2_margin = process_margin(th2,output2_par) 147 | cv2.imwrite("output/margin/output2_margin.jpg", output2_par) 148 | 149 | output3_letter = process_letter(th3,output3_letter) 150 | output3_word = process_word(th3,output3_word) 151 | output3_line = process_line(th3,output3_line) 152 | 153 | cv2.imwrite("output/letter/output3_letter.jpg", output3_letter) 154 | cv2.imwrite("output/word/output3_word.jpg", output3_word) 155 | cv2.imwrite("output/line/output3_line.jpg", output3_line) 156 | output3_par = process_par(th3,output3_par) 157 | cv2.imwrite("output/par/output3_par.jpg", output3_par) 158 | output3_margin = process_margin(th3,output3_par) 159 | cv2.imwrite("output/margin/output3_margin.jpg", output3_par) 160 | 161 | #cv2.imshow("output letter", output1_letter) 162 | #cv2.imshow("output word", output1_word) 163 | #cv2.imshow("output line", output1_line) 164 | #cv2.imshow("output par", output1_par) 165 | #cv2.imshow("output margin", output1_par) 166 | 167 | cv2.waitKey(0) 168 | -------------------------------------------------------------------------------- /news1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/news1.jpg -------------------------------------------------------------------------------- /output/letter/output1_letter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/letter/output1_letter.jpg -------------------------------------------------------------------------------- /output/letter/output2_letter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/letter/output2_letter.jpg -------------------------------------------------------------------------------- /output/letter/output3_letter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/letter/output3_letter.jpg -------------------------------------------------------------------------------- /output/line/output1_line.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/line/output1_line.jpg -------------------------------------------------------------------------------- /output/line/output2_line.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/line/output2_line.jpg -------------------------------------------------------------------------------- /output/line/output3_line.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/line/output3_line.jpg -------------------------------------------------------------------------------- /output/margin/output1_margin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/margin/output1_margin.jpg -------------------------------------------------------------------------------- /output/margin/output2_margin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/margin/output2_margin.jpg -------------------------------------------------------------------------------- /output/margin/output3_margin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/margin/output3_margin.jpg -------------------------------------------------------------------------------- /output/par/output1_par.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/par/output1_par.jpg -------------------------------------------------------------------------------- /output/par/output2_par.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/par/output2_par.jpg -------------------------------------------------------------------------------- /output/par/output3_par.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/par/output3_par.jpg -------------------------------------------------------------------------------- /output/word/output1_word.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/word/output1_word.jpg -------------------------------------------------------------------------------- /output/word/output2_word.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/word/output2_word.jpg -------------------------------------------------------------------------------- /output/word/output3_word.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rbaguila/document-layout-analysis/5dd19921334bc8abaeea13d5f46cdcd945e30b32/output/word/output3_word.jpg --------------------------------------------------------------------------------