├── _config.yml ├── images ├── 14.jpg ├── 38.jpg ├── mask.jpg ├── 14mask.jpg ├── joints.jpg └── 14joints.jpg ├── README.md ├── LICENSE ├── .gitignore └── detectTable.py /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /images/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14.jpg -------------------------------------------------------------------------------- /images/38.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/38.jpg -------------------------------------------------------------------------------- /images/mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/mask.jpg -------------------------------------------------------------------------------- /images/14mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14mask.jpg -------------------------------------------------------------------------------- /images/joints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/joints.jpg -------------------------------------------------------------------------------- /images/14joints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuppersd/table_recognition/HEAD/images/14joints.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 表格线提取 2 | ## 实例1 3 | ### 1. 识别前的图片 4 | ![识别前的图片](images/14.jpg) 5 | 6 | ### 2. 点提取 7 | ![点提取](images/14joints.jpg) 8 | 9 | ### 3. 线提取 10 | ![线提取](images/14mask.jpg) 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 LICHUAN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /detectTable.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import json 4 | import os 5 | 6 | 7 | # 图像切割模块 8 | class cutImage(object): 9 | def __init__(self, img, bin_threshold, kernel, iterations, areaRange, filename, border=10, show=True, write=True, ): 10 | ''' 11 | :param img: 输入图像 12 | :param bin_threshold: 二值化的阈值大小 13 | :param kernel: 形态学kernel 14 | :param iterations: 迭代次数 15 | :param areaRange: 面积范围 16 | :param filename:保留json数据的文件名称 17 | :param border: 留边大小 18 | :param show: 是否显示结果图,默认是显示 19 | :param write: 是否把结果写到文件,默认是写入 20 | ''' 21 | self.img = img 22 | self.bin_threshold = bin_threshold 23 | self.kernel = kernel 24 | self.iterations = iterations 25 | self.areaRange = areaRange 26 | self.border = border 27 | self.show = show 28 | self.write = write 29 | self.filename = filename 30 | 31 | def getRes(self): 32 | fl = open(self.filename, 'w') 33 | if self.img.shape[2] == 1: # 灰度图 34 | img_gray = self.img 35 | elif self.img.shape[2] == 3: 36 | img_gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) 37 | ret, thresh = cv2.threshold(img_gray, self.bin_threshold, 255, cv2.THRESH_BINARY_INV) # 二值化 38 | img_erode = cv2.dilate(thresh, self.kernel, iterations=self.iterations) 39 | 40 | cv2.imshow('thresh', thresh) 41 | cv2.imshow('erode', img_erode) 42 | image, contours, hierarchy = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 43 | roiList = [] 44 | res = [] 45 | result = {} 46 | area_coord_roi = [] 47 | for i in range(len(contours)): 48 | cnt = contours[i] 49 | area = cv2.contourArea(cnt) 50 | if area > self.areaRange[0] and area < self.areaRange[1]: 51 | x, y, w, h = cv2.boundingRect(cnt) 52 | roi = self.img[y + self.border:(y + h) - self.border, x + self.border:(x + w) - self.border] 53 | area_coord_roi.append((area, (x, y, w, h), roi)) 54 | max_area = max([info[0] for info in area_coord_roi]) 55 | 56 | for info in area_coord_roi: 57 | if info[0] == max_area: 58 | max_rect = info[1] 59 | for each in area_coord_roi: 60 | x, y, w, h = each[1] 61 | if x > max_rect[0] and y > max_rect[1] and (x + w) < (max_rect[0] + max_rect[2]) and (y + h) < ( 62 | max_rect[1] + max_rect[3]): 63 | pass 64 | else: 65 | tmp_ = each[1] 66 | cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) 67 | 68 | tmp = [] 69 | name = "tmp.jpg" 70 | cv2.imwrite(name, each[2]) 71 | # text = image_to_string(name,False,'-l chi_sim') 72 | # tmp.append(text) 73 | tmp.append(" ") 74 | tmp.extend(list(tmp_)) 75 | tmp.append("0 0 0") 76 | res.append(tmp) 77 | os.remove(name) 78 | cv2.imshow("yyy", img) 79 | 80 | result['1'] = [res] 81 | fl.write(json.dumps(result)) 82 | return roiList 83 | 84 | 85 | # 检测表格,使用形态学 86 | # 返回是表格图以及表格中交叉点的图 87 | class detectTable(object): 88 | def __init__(self, src_img): 89 | self.src_img = src_img 90 | 91 | def run(self): 92 | if len(self.src_img.shape) == 2: # 灰度图 93 | gray_img = self.src_img 94 | elif len(self.src_img.shape) == 3: 95 | gray_img = cv2.cvtColor(self.src_img, cv2.COLOR_BGR2GRAY) 96 | 97 | thresh_img = cv2.adaptiveThreshold(~gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2) 98 | h_img = thresh_img.copy() 99 | v_img = thresh_img.copy() 100 | scale = 15 101 | h_size = int(h_img.shape[1] / scale) 102 | 103 | h_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (h_size, 1)) # 形态学因子 104 | h_erode_img = cv2.erode(h_img, h_structure, 1) 105 | 106 | h_dilate_img = cv2.dilate(h_erode_img, h_structure, 1) 107 | # cv2.imshow("h_erode",h_dilate_img) 108 | v_size = int(v_img.shape[0] / scale) 109 | 110 | v_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_size)) # 形态学因子 111 | v_erode_img = cv2.erode(v_img, v_structure, 1) 112 | v_dilate_img = cv2.dilate(v_erode_img, v_structure, 1) 113 | 114 | mask_img = h_dilate_img + v_dilate_img 115 | joints_img = cv2.bitwise_and(h_dilate_img, v_dilate_img) 116 | cv2.imshow("joints", joints_img) 117 | cv2.imshow("mask", mask_img) 118 | cv2.imwrite('joints.jpg', joints_img) 119 | cv2.imwrite('mask.jpg', mask_img) 120 | 121 | return mask_img, joints_img 122 | 123 | # 将生成的json数据显示在图像上 124 | 125 | 126 | def drawLine(all_lines, height=841, width=595): 127 | blank_image = np.zeros((height, width, 3), np.int8) 128 | color = tuple(reversed((0, 0, 0))) 129 | blank_image[:] = color 130 | for _line in all_lines: 131 | for line in _line: 132 | if line[1] < 0: 133 | line[1] = 0 134 | if line[2] < 0: 135 | line[2] = 0 136 | if line[3] < 0: 137 | line[3] = 0 138 | if line[4] < 0: 139 | line[4] = 0 140 | 141 | p1 = [int(np.round(line[1])), int(np.round(line[2]))] 142 | p2 = [int(np.round(line[1]) + np.round(line[3])), int(np.round(line[2]))] 143 | p3 = [int(np.round(line[1])), int(np.round(line[2]) + np.round(line[4]))] 144 | p4 = [int(np.round(line[1]) + np.round(line[3])), int(np.round(line[2]) + np.round(line[4]))] 145 | cv2.line(blank_image, (p1[0], p1[1]), (p2[0], p2[1]), (255, 0, 0), 1) 146 | cv2.line(blank_image, (p1[0], p1[1]), (p3[0], p3[1]), (255, 0, 0), 1) 147 | cv2.line(blank_image, (p2[0], p2[1]), (p4[0], p4[1]), (255, 0, 0), 1) 148 | cv2.line(blank_image, (p3[0], p4[1]), (p4[0], p4[1]), (255, 0, 0), 1) 149 | 150 | cv2.imshow("img", blank_image) 151 | cv2.waitKey() 152 | 153 | 154 | if __name__ == '__main__': 155 | # image 156 | img = cv2.imread('./images/14.jpg') 157 | cv2.imshow("img", img) 158 | mask, joint = detectTable(img).run() 159 | # dispaly image 160 | cv2.waitKey() 161 | --------------------------------------------------------------------------------