├── _config.yml
├── images
    ├── 14.jpg
    ├── 38.jpg
    ├── mask.jpg
    ├── 14mask.jpg
    ├── joints.jpg
    └── 14joints.jpg
├── README.md
├── LICENSE
├── .gitignore
└── detectTable.py


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/images/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14.jpg


--------------------------------------------------------------------------------
/images/38.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/38.jpg


--------------------------------------------------------------------------------
/images/mask.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/mask.jpg


--------------------------------------------------------------------------------
/images/14mask.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14mask.jpg


--------------------------------------------------------------------------------
/images/joints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/joints.jpg


--------------------------------------------------------------------------------
/images/14joints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14joints.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 表格线提取
 2 | ## 实例1
 3 | ### 1. 识别前的图片
 4 | ![识别前的图片](images/14.jpg)
 5 | 
 6 | ### 2. 点提取
 7 | ![点提取](images/14joints.jpg)
 8 | 
 9 | ### 3. 线提取
10 | ![线提取](images/14mask.jpg)
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 LICHUAN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/detectTable.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import json
  4 | import os
  5 | 
  6 | 
  7 | # 图像切割模块
  8 | class cutImage(object):
  9 |     def __init__(self, img, bin_threshold, kernel, iterations, areaRange, filename, border=10, show=True, write=True, ):
 10 |         '''
 11 |         :param img: 输入图像
 12 |         :param bin_threshold: 二值化的阈值大小
 13 |         :param kernel: 形态学kernel
 14 |         :param iterations: 迭代次数
 15 |         :param areaRange: 面积范围
 16 |         :param filename:保留json数据的文件名称
 17 |         :param border: 留边大小
 18 |         :param show: 是否显示结果图，默认是显示
 19 |         :param write: 是否把结果写到文件，默认是写入
 20 |         '''
 21 |         self.img = img
 22 |         self.bin_threshold = bin_threshold
 23 |         self.kernel = kernel
 24 |         self.iterations = iterations
 25 |         self.areaRange = areaRange
 26 |         self.border = border
 27 |         self.show = show
 28 |         self.write = write
 29 |         self.filename = filename
 30 | 
 31 |     def getRes(self):
 32 |         fl = open(self.filename, 'w')
 33 |         if self.img.shape[2] == 1:  # 灰度图
 34 |             img_gray = self.img
 35 |         elif self.img.shape[2] == 3:
 36 |             img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
 37 |         ret, thresh = cv2.threshold(img_gray, self.bin_threshold, 255, cv2.THRESH_BINARY_INV)  # 二值化
 38 |         img_erode = cv2.dilate(thresh, self.kernel, iterations=self.iterations)
 39 | 
 40 |         cv2.imshow('thresh', thresh)
 41 |         cv2.imshow('erode', img_erode)
 42 |         image, contours, hierarchy = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 43 |         roiList = []
 44 |         res = []
 45 |         result = {}
 46 |         area_coord_roi = []
 47 |         for i in range(len(contours)):
 48 |             cnt = contours[i]
 49 |             area = cv2.contourArea(cnt)
 50 |             if area > self.areaRange[0] and area < self.areaRange[1]:
 51 |                 x, y, w, h = cv2.boundingRect(cnt)
 52 |                 roi = self.img[y + self.border:(y + h) - self.border, x + self.border:(x + w) - self.border]
 53 |                 area_coord_roi.append((area, (x, y, w, h), roi))
 54 |         max_area = max([info[0] for info in area_coord_roi])
 55 | 
 56 |         for info in area_coord_roi:
 57 |             if info[0] == max_area:
 58 |                 max_rect = info[1]
 59 |         for each in area_coord_roi:
 60 |             x, y, w, h = each[1]
 61 |             if x > max_rect[0] and y > max_rect[1] and (x + w) < (max_rect[0] + max_rect[2]) and (y + h) < (
 62 |                     max_rect[1] + max_rect[3]):
 63 |                 pass
 64 |             else:
 65 |                 tmp_ = each[1]
 66 |                 cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
 67 | 
 68 |                 tmp = []
 69 |                 name = "tmp.jpg"
 70 |                 cv2.imwrite(name, each[2])
 71 |                 # text = image_to_string(name,False,'-l chi_sim')
 72 |                 # tmp.append(text)
 73 |                 tmp.append(" ")
 74 |                 tmp.extend(list(tmp_))
 75 |                 tmp.append("0 0 0")
 76 |                 res.append(tmp)
 77 |                 os.remove(name)
 78 |         cv2.imshow("yyy", img)
 79 | 
 80 |         result['1'] = [res]
 81 |         fl.write(json.dumps(result))
 82 |         return roiList
 83 | 
 84 | 
 85 | # 检测表格，使用形态学
 86 | # 返回是表格图以及表格中交叉点的图
 87 | class detectTable(object):
 88 |     def __init__(self, src_img):
 89 |         self.src_img = src_img
 90 | 
 91 |     def run(self):
 92 |         if len(self.src_img.shape) == 2:  # 灰度图
 93 |             gray_img = self.src_img
 94 |         elif len(self.src_img.shape) == 3:
 95 |             gray_img = cv2.cvtColor(self.src_img, cv2.COLOR_BGR2GRAY)
 96 | 
 97 |         thresh_img = cv2.adaptiveThreshold(~gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2)
 98 |         h_img = thresh_img.copy()
 99 |         v_img = thresh_img.copy()
100 |         scale = 15
101 |         h_size = int(h_img.shape[1] / scale)
102 | 
103 |         h_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (h_size, 1))  # 形态学因子
104 |         h_erode_img = cv2.erode(h_img, h_structure, 1)
105 | 
106 |         h_dilate_img = cv2.dilate(h_erode_img, h_structure, 1)
107 |         # cv2.imshow("h_erode",h_dilate_img)
108 |         v_size = int(v_img.shape[0] / scale)
109 | 
110 |         v_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_size))  # 形态学因子
111 |         v_erode_img = cv2.erode(v_img, v_structure, 1)
112 |         v_dilate_img = cv2.dilate(v_erode_img, v_structure, 1)
113 | 
114 |         mask_img = h_dilate_img + v_dilate_img
115 |         joints_img = cv2.bitwise_and(h_dilate_img, v_dilate_img)
116 |         cv2.imshow("joints", joints_img)
117 |         cv2.imshow("mask", mask_img)
118 |         cv2.imwrite('joints.jpg', joints_img)
119 |         cv2.imwrite('mask.jpg', mask_img)
120 | 
121 |         return mask_img, joints_img
122 | 
123 |         # 将生成的json数据显示在图像上
124 | 
125 | 
126 | def drawLine(all_lines, height=841, width=595):
127 |     blank_image = np.zeros((height, width, 3), np.int8)
128 |     color = tuple(reversed((0, 0, 0)))
129 |     blank_image[:] = color
130 |     for _line in all_lines:
131 |         for line in _line:
132 |             if line[1] < 0:
133 |                 line[1] = 0
134 |             if line[2] < 0:
135 |                 line[2] = 0
136 |             if line[3] < 0:
137 |                 line[3] = 0
138 |             if line[4] < 0:
139 |                 line[4] = 0
140 | 
141 |             p1 = [int(np.round(line[1])), int(np.round(line[2]))]
142 |             p2 = [int(np.round(line[1]) + np.round(line[3])), int(np.round(line[2]))]
143 |             p3 = [int(np.round(line[1])), int(np.round(line[2]) + np.round(line[4]))]
144 |             p4 = [int(np.round(line[1]) + np.round(line[3])), int(np.round(line[2]) + np.round(line[4]))]
145 |             cv2.line(blank_image, (p1[0], p1[1]), (p2[0], p2[1]), (255, 0, 0), 1)
146 |             cv2.line(blank_image, (p1[0], p1[1]), (p3[0], p3[1]), (255, 0, 0), 1)
147 |             cv2.line(blank_image, (p2[0], p2[1]), (p4[0], p4[1]), (255, 0, 0), 1)
148 |             cv2.line(blank_image, (p3[0], p4[1]), (p4[0], p4[1]), (255, 0, 0), 1)
149 | 
150 |     cv2.imshow("img", blank_image)
151 |     cv2.waitKey()
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     # image
156 |     img = cv2.imread('./images/14.jpg')
157 |     cv2.imshow("img", img)
158 |     mask, joint = detectTable(img).run()
159 |     # dispaly image
160 |     cv2.waitKey()
161 | 


--------------------------------------------------------------------------------