├── images
    ├── grid.png
    ├── img.png
    ├── img1.jpeg
    ├── index.jpeg
    ├── sample_DL.jpg
    └── removed_grids.jpg
├── detected_text.jpg
├── README.md
├── extract_text.py
├── remove_grid.py
├── LICENSE
├── remove_background.py
└── text_detect.py


/images/grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/grid.png


--------------------------------------------------------------------------------
/images/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/img.png


--------------------------------------------------------------------------------
/detected_text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/detected_text.jpg


--------------------------------------------------------------------------------
/images/img1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/img1.jpeg


--------------------------------------------------------------------------------
/images/index.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/index.jpeg


--------------------------------------------------------------------------------
/images/sample_DL.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/sample_DL.jpg


--------------------------------------------------------------------------------
/images/removed_grids.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DivyaKrishnani/Working-with-Text-on-Images/HEAD/images/removed_grids.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Working-with-Text-on-Images
 2 | 
 3 | Requirements : 
 4 | Python
 5 | ,OpenCV
 6 | ,pytesseract
 7 | and scipy
 8 | 
 9 | Extracting text from images, removing grids from images, removing background and extracting useful text using OpenCV.
10 | 


--------------------------------------------------------------------------------
/extract_text.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from pytesseract import image_to_string
 4 | from PIL import Image
 5 | 
 6 | im = Image.open('images/img1.jpeg')
 7 | print(im)
 8 | 
 9 | print(image_to_string(im))  
10 | #fw = open("out1.txt" , "w")
11 | #fw.write(image_to_string(im))
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/remove_grid.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | img = cv2.imread('images/grid.png')
 5 | gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
 6 | edges = cv2.Canny(gray,50,150,apertureSize = 3)
 7 | minLineLength = 400
 8 | maxLineGap = 15
 9 | lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength,maxLineGap)
10 | for x1,y1,x2,y2 in lines[0]:
11 |     cv2.line(img,(x1,y1),(x2,y2),(255,255,255),2)
12 | 
13 | cv2.imshow('GRIDS_REMOVED', img)
14 | k = cv2.waitKey(0) & 0xFF
15 | if k == 27:
16 |     cv2.destroyAllWindows()
17 | elif k == ord('s'):
18 |     cv2.imwrite('images/removed_grids.jpg', img)
19 |     cv2.destroyAllWindows()
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Divya Krishnani
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/remove_background.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | from scipy import signal
 4 | from PIL import Image
 5 | 
 6 | 
 7 | def load_image(path):
 8 |     return np.asarray(Image.open(path))/255.0
 9 | 
10 | def save(path, img):
11 |     tmp = np.asarray(img*255.0, dtype=np.uint8)
12 |     Image.fromarray(tmp).save(path)
13 | 
14 | def denoise_image(inp):
15 |     # estimate 'background' color by a median filter
16 |     bg = signal.medfilt2d(inp,11)
17 |     save('images/background.png', bg)
18 | 
19 |     # compute 'foreground' mask as anything that is significantly darker than
20 |     # the background
21 |     mask = inp < bg - 0.1
22 |     save('images/foreground_mask.png', mask)
23 | 
24 |     # return the input value for all pixels in the mask or pure white otherwise
25 |     return np.where(mask, inp, 1.0)
26 | 
27 | 
28 | image = Image.open('images/sample_DL.jpg')
29 | image = image.convert('L') # convert image to grayscale
30 | new_image = image.resize((832, 536))  
31 | new_image.save('images/sample_DL_1.jpg')
32 | inp_path = 'images/sample_DL_1.jpg'
33 | out_path = 'images/output.png'
34 | 
35 | inp = load_image(inp_path)
36 | out = denoise_image(inp)
37 | 
38 | save(out_path, out)
39 | 


--------------------------------------------------------------------------------
/text_detect.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | large = cv2.imread('images/img.png')
 5 | 
 6 | small = cv2.cvtColor(large, cv2.COLOR_BGR2GRAY)
 7 | 
 8 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
 9 | grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
10 | 
11 | _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
12 | 
13 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
14 | connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
15 | 
16 | contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
17 | 
18 | mask = np.zeros(bw.shape, dtype=np.uint8)
19 | 
20 | for idx in range(len(contours)):
21 |     x, y, w, h = cv2.boundingRect(contours[idx])
22 |     mask[y:y+h, x:x+w] = 0
23 |     cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
24 |     r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
25 | 
26 |     if r > 0.45 and w > 8 and h > 8:
27 |         cv2.rectangle(large, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
28 | 
29 | cv2.imshow('rects', large)
30 | k = cv2.waitKey(0) & 0xFF
31 | if k == 27:
32 |     cv2.destroyAllWindows()
33 | elif k == ord('s'):
34 |     cv2.imwrite('images/detected_text.jpg', large)
35 |     cv2.destroyAllWindows()
36 | 


--------------------------------------------------------------------------------