├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── config.py ├── image.py ├── img ├── __init__.py ├── table-ceil.png ├── table-detect.jpg ├── table-detect.png ├── table-detect.xlsx ├── table-detectceil.png └── table-line.png ├── models └── table-detect.cfg ├── requirements.txt ├── table_build.py ├── table_ceil.py ├── table_detect.py ├── table_line.py ├── train ├── __init__.py ├── dataset-line │ └── 0 │ │ ├── 0.jpg │ │ ├── 0.json │ │ ├── 1.jpg │ │ ├── 1.json │ │ ├── 13.jpg │ │ ├── 13.json │ │ ├── 2.jpg │ │ ├── 2.json │ │ ├── 5.jpg │ │ └── 5.json └── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python: 2 | *.py[cod] 3 | *.so 4 | *.egg 5 | *.egg-info 6 | *.pth 7 | *.pb 8 | *.h5 9 | *.pbtxt 10 | *.hdf5 11 | *.weights 12 | *.traineddata 13 | dist 14 | buil 15 | .DS_Store* 16 | .ipynb_checkpoints 17 | __pycache__ 18 | dump.rdb 19 | define 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 chineseocr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # table-detect 2 | 3 | ## table detect(yolo) , table line(unet) (表格检测/表格单元格定位) 4 | 5 | links(下载链接): http://gofile.me/4Nlqh/fNHlWzVWo 6 | download models weights and move to ./modes 7 | 8 | ### test table detect(表格检测) 9 | 10 | ` 11 | python table_detect.py --jpgPath img/table-detect.jpg 12 | ` 13 | 14 | ### test table ceil detect with unet(表格识别输出到excel) 15 | 16 | ` 17 | python table_ceil.py --isToExcel True --jpgPath img/table-detect.jpg 18 | ` 19 | 20 | 21 | ## train table line(训练表格) 22 | ### label table with labelme(https://github.com/wkentaro/labelme) 23 | ` 24 | python train/train.py 25 | ` 26 | 27 | 28 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | @author: chineseocr 6 | """ 7 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | 6 | @author: chineseocr 7 | """ 8 | 9 | tableModelDetectPath = 'models/table-detect.weights' 10 | tableModeLinePath = "models/table-line.h5" 11 | -------------------------------------------------------------------------------- /image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | image 6 | @author: chineseocr 7 | """ 8 | 9 | import base64 10 | import json 11 | 12 | import cv2 13 | import numpy as np 14 | import six 15 | from PIL import Image 16 | 17 | 18 | def plot_lines(img, lines, linetype=2): 19 | tmp = np.copy(img) 20 | for line in lines: 21 | p1, p2 = line 22 | cv2.line(tmp, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), (0, 0, 0), linetype, lineType=cv2.LINE_AA) 23 | 24 | return Image.fromarray(tmp) 25 | 26 | 27 | def base64_to_PIL(string): 28 | try: 29 | 30 | base64_data = base64.b64decode(string) 31 | buf = six.BytesIO() 32 | buf.write(base64_data) 33 | buf.seek(0) 34 | img = Image.open(buf).convert('RGB') 35 | return img 36 | except: 37 | return None 38 | 39 | 40 | def read_json(p): 41 | with open(p) as f: 42 | jsonData = json.loads(f.read()) 43 | shapes = jsonData.get('shapes') 44 | imageData = jsonData.get('imageData') 45 | lines = [] 46 | labels = [] 47 | for shape in shapes: 48 | lines.append(shape['points']) 49 | [x0, y0], [x1, y1] = shape['points'] 50 | label = shape['label'] 51 | if label == '0': 52 | if abs(y1 - y0) > 500: 53 | label = '1' 54 | elif label == '1': 55 | if abs(x1 - x0) > 500: 56 | label = '0' 57 | 58 | labels.append(label) 59 | img = base64_to_PIL(imageData) 60 | return img, lines, labels 61 | 62 | 63 | from numpy import cos, sin, pi 64 | 65 | 66 | def rotate(x, y, angle, cx, cy): 67 | """ 68 | 点(x,y) 绕(cx,cy)点旋转 69 | """ 70 | angle = angle * pi / 180 71 | x_new = (x - cx) * cos(angle) - (y - cy) * sin(angle) + cx 72 | y_new = (x - cx) * sin(angle) + (y - cy) * cos(angle) + cy 73 | return x_new, y_new 74 | 75 | 76 | def box_rotate(box, angle=0, imgH=0, imgW=0): 77 | """ 78 | 对坐标进行旋转 逆时针方向 0\90\180\270, 79 | """ 80 | x1, y1, x2, y2, x3, y3, x4, y4 = box[:8] 81 | if angle == 90: 82 | x1_, y1_ = y2, imgW - x2 83 | x2_, y2_ = y3, imgW - x3 84 | x3_, y3_ = y4, imgW - x4 85 | x4_, y4_ = y1, imgW - x1 86 | 87 | elif angle == 180: 88 | x1_, y1_ = imgW - x3, imgH - y3 89 | x2_, y2_ = imgW - x4, imgH - y4 90 | x3_, y3_ = imgW - x1, imgH - y1 91 | x4_, y4_ = imgW - x2, imgH - y2 92 | 93 | elif angle == 270: 94 | x1_, y1_ = imgH - y4, x4 95 | x2_, y2_ = imgH - y1, x1 96 | x3_, y3_ = imgH - y2, x2 97 | x4_, y4_ = imgH - y3, x3 98 | else: 99 | x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_ = x1, y1, x2, y2, x3, y3, x4, y4 100 | 101 | return (x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_) 102 | 103 | 104 | def angle_transpose(p, angle, w, h): 105 | x, y = p 106 | if angle == 90: 107 | x, y = y, w - x 108 | elif angle == 180: 109 | x, y = w - x, h - y 110 | elif angle == 270: 111 | x, y = h - y, x 112 | return x, y 113 | 114 | 115 | def img_argument(img, lines, labels, size=(512, 512)): 116 | w, h = img.size 117 | if np.random.randint(0, 100) > 80: 118 | degree = np.random.uniform(-5, 5) 119 | else: 120 | degree = 0 121 | # degree = np.random.uniform(-5,5) 122 | newlines = [] 123 | for line in lines: 124 | p1, p2 = line 125 | p1 = rotate(p1[0], p1[1], degree, w / 2, h / 2) 126 | p2 = rotate(p2[0], p2[1], degree, w / 2, h / 2) 127 | newlines.append([p1, p2]) 128 | # img = img.rotate(-degree,center=(w/2,h/2),resample=Image.BILINEAR,fillcolor=(128,128,128)) 129 | img = img.rotate(-degree, center=(w / 2, h / 2), resample=Image.BILINEAR) 130 | angle = np.random.choice([0, 90, 180, 270], 1)[0] 131 | newlables = [] 132 | for i in range(len(newlines)): 133 | p1, p2 = newlines[i] 134 | p1 = angle_transpose(p1, angle, w, h) 135 | p2 = angle_transpose(p2, angle, w, h) 136 | newlines[i] = [p1, p2] 137 | if angle in [90, 270]: 138 | if labels[i] == '0': 139 | newlables.append('1') 140 | else: 141 | newlables.append('0') 142 | else: 143 | newlables.append(labels[i]) 144 | 145 | if angle == 90: 146 | img = img.transpose(Image.ROTATE_90) 147 | elif angle == 180: 148 | img = img.transpose(Image.ROTATE_180) 149 | elif angle == 270: 150 | img = img.transpose(Image.ROTATE_270) 151 | 152 | return img, newlines, newlables 153 | 154 | 155 | def fill_lines(img, lines, linetype=2): 156 | tmp = np.copy(img) 157 | for line in lines: 158 | p1, p2 = line 159 | cv2.line(tmp, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), 255, linetype, lineType=cv2.LINE_AA) 160 | 161 | return tmp 162 | 163 | 164 | def get_img_label(p, size, linetype=1): 165 | img, lines, labels = read_json(p) 166 | img, lines = img_resize(img, lines, target_size=512, max_size=1024) 167 | img, lines, labels = img_argument(img, lines, labels, size) 168 | img, lines, labels = get_random_data(img, lines, labels, size=size) 169 | 170 | lines = np.array(lines) 171 | labels = np.array(labels) 172 | labelImg0 = np.zeros(size[::-1], dtype='uint8') 173 | labelImg1 = np.zeros(size[::-1], dtype='uint8') 174 | 175 | ind = np.where(labels == '0')[0] 176 | labelImg0 = fill_lines(labelImg0, lines[ind], linetype=linetype) 177 | ind = np.where(labels == '1')[0] 178 | labelImg1 = fill_lines(labelImg1, lines[ind], linetype=linetype) 179 | 180 | labelY = np.zeros((size[1], size[0], 2), dtype='uint8') 181 | labelY[:, :, 0] = labelImg0 182 | labelY[:, :, 1] = labelImg1 183 | labelY = labelY > 0 184 | return np.array(img), lines, labelY 185 | 186 | 187 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb 188 | 189 | 190 | def rand(a=0, b=1): 191 | return np.random.rand() * (b - a) + a 192 | 193 | 194 | def get_random_data(image, lines, labels, size=(1024, 1024), jitter=.3, hue=.1, sat=1.5, val=1.5): 195 | '''random preprocessing for real-time data augmentation''' 196 | 197 | iw, ih = image.size 198 | 199 | # resize image 200 | w, h = size 201 | new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter) 202 | # scale = rand(.2, 2) 203 | scale = rand(0.2, 3) 204 | if new_ar < 1: 205 | nh = int(scale * h) 206 | nw = int(nh * new_ar) 207 | else: 208 | nw = int(scale * w) 209 | nh = int(nw / new_ar) 210 | image = image.resize((nw, nh), Image.BICUBIC) 211 | 212 | # place image 213 | dx = int(rand(0, w - nw)) 214 | dy = int(rand(0, h - nh)) 215 | new_image = Image.new('RGB', (w, h), (128, 128, 128)) 216 | new_image.paste(image, (dx, dy)) 217 | image = new_image 218 | 219 | # distort image 220 | hue = rand(-hue, hue) 221 | sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat) 222 | val = rand(1, val) if rand() < .5 else 1 / rand(1, val) 223 | x = rgb_to_hsv(np.array(image) / 255.) 224 | x[..., 0] += hue 225 | x[..., 0][x[..., 0] > 1] -= 1 226 | x[..., 0][x[..., 0] < 0] += 1 227 | x[..., 1] *= sat 228 | x[..., 2] *= val 229 | x[x > 1] = 1 230 | x[x < 0] = 0 231 | image_data = hsv_to_rgb(x) # numpy array, 0 to 1 232 | N = len(lines) 233 | for i in range(N): 234 | p1, p2 = lines[i] 235 | p1 = p1[0] * nw / iw + dx, p1[1] * nh / ih + dy 236 | p2 = p2[0] * nw / iw + dx, p2[1] * nh / ih + dy 237 | lines[i] = [p1, p2] 238 | return image_data, lines, labels 239 | 240 | 241 | def gen(paths, batchsize=2, linetype=2): 242 | num = len(paths) 243 | i = 0 244 | while True: 245 | # sizes = [512,512,512,512,640,1024] ##多尺度训练 246 | # size = np.random.choice(sizes,1)[0] 247 | size = 640 248 | 249 | X = np.zeros((batchsize, size, size, 3)) 250 | Y = np.zeros((batchsize, size, size, 2)) 251 | for j in range(batchsize): 252 | if i >= num: 253 | i = 0 254 | np.random.shuffle(paths) 255 | p = paths[i] 256 | i += 1 257 | 258 | # linetype=2 259 | img, lines, labelImg = get_img_label(p, size=(size, size), linetype=linetype) 260 | X[j] = img 261 | Y[j] = labelImg 262 | 263 | yield X, Y 264 | 265 | 266 | def img_resize(im, lines, target_size=600, max_size=1500): 267 | w, h = im.size 268 | im_size_min = np.min(im.size) 269 | im_size_max = np.max(im.size) 270 | 271 | im_scale = float(target_size) / float(im_size_min) 272 | if max_size is not None: 273 | if np.round(im_scale * im_size_max) > max_size: 274 | im_scale = float(max_size) / float(im_size_max) 275 | 276 | im = im.resize((int(w * im_scale), int(h * im_scale)), Image.BICUBIC) 277 | N = len(lines) 278 | for i in range(N): 279 | p1, p2 = lines[i] 280 | p1 = p1[0] * im_scale, p1[1] * im_scale 281 | p2 = p2[0] * im_scale, p2[1] * im_scale 282 | lines[i] = [p1, p2] 283 | 284 | return im, lines 285 | -------------------------------------------------------------------------------- /img/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 10 01:33:19 2020 5 | 6 | @author: chineseocr 7 | """ 8 | -------------------------------------------------------------------------------- /img/table-ceil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-ceil.png -------------------------------------------------------------------------------- /img/table-detect.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-detect.jpg -------------------------------------------------------------------------------- /img/table-detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-detect.png -------------------------------------------------------------------------------- /img/table-detect.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-detect.xlsx -------------------------------------------------------------------------------- /img/table-detectceil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-detectceil.png -------------------------------------------------------------------------------- /img/table-line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/img/table-line.png -------------------------------------------------------------------------------- /models/table-detect.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=32 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.0001 19 | burn_in=1000 20 | max_batches = 50200 21 | policy=steps 22 | steps=40000,45000 23 | scales=.1,.1 24 | 25 | 26 | 27 | [convolutional] 28 | batch_normalize=1 29 | filters=32 30 | size=3 31 | stride=1 32 | pad=1 33 | activation=leaky 34 | 35 | # Downsample 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=2 42 | pad=1 43 | activation=leaky 44 | 45 | [convolutional] 46 | batch_normalize=1 47 | filters=32 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | batch_normalize=1 55 | filters=64 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [shortcut] 62 | from=-3 63 | activation=linear 64 | 65 | # Downsample 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=128 70 | size=3 71 | stride=2 72 | pad=1 73 | activation=leaky 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=64 78 | size=1 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | batch_normalize=1 85 | filters=128 86 | size=3 87 | stride=1 88 | pad=1 89 | activation=leaky 90 | 91 | [shortcut] 92 | from=-3 93 | activation=linear 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=64 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=128 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [shortcut] 112 | from=-3 113 | activation=linear 114 | 115 | # Downsample 116 | 117 | [convolutional] 118 | batch_normalize=1 119 | filters=256 120 | size=3 121 | stride=2 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | batch_normalize=1 127 | filters=128 128 | size=1 129 | stride=1 130 | pad=1 131 | activation=leaky 132 | 133 | [convolutional] 134 | batch_normalize=1 135 | filters=256 136 | size=3 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [shortcut] 142 | from=-3 143 | activation=linear 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [convolutional] 154 | batch_normalize=1 155 | filters=256 156 | size=3 157 | stride=1 158 | pad=1 159 | activation=leaky 160 | 161 | [shortcut] 162 | from=-3 163 | activation=linear 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=256 176 | size=3 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [shortcut] 182 | from=-3 183 | activation=linear 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=128 188 | size=1 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [convolutional] 194 | batch_normalize=1 195 | filters=256 196 | size=3 197 | stride=1 198 | pad=1 199 | activation=leaky 200 | 201 | [shortcut] 202 | from=-3 203 | activation=linear 204 | 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | filters=128 209 | size=1 210 | stride=1 211 | pad=1 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | filters=256 217 | size=3 218 | stride=1 219 | pad=1 220 | activation=leaky 221 | 222 | [shortcut] 223 | from=-3 224 | activation=linear 225 | 226 | [convolutional] 227 | batch_normalize=1 228 | filters=128 229 | size=1 230 | stride=1 231 | pad=1 232 | activation=leaky 233 | 234 | [convolutional] 235 | batch_normalize=1 236 | filters=256 237 | size=3 238 | stride=1 239 | pad=1 240 | activation=leaky 241 | 242 | [shortcut] 243 | from=-3 244 | activation=linear 245 | 246 | [convolutional] 247 | batch_normalize=1 248 | filters=128 249 | size=1 250 | stride=1 251 | pad=1 252 | activation=leaky 253 | 254 | [convolutional] 255 | batch_normalize=1 256 | filters=256 257 | size=3 258 | stride=1 259 | pad=1 260 | activation=leaky 261 | 262 | [shortcut] 263 | from=-3 264 | activation=linear 265 | 266 | [convolutional] 267 | batch_normalize=1 268 | filters=128 269 | size=1 270 | stride=1 271 | pad=1 272 | activation=leaky 273 | 274 | [convolutional] 275 | batch_normalize=1 276 | filters=256 277 | size=3 278 | stride=1 279 | pad=1 280 | activation=leaky 281 | 282 | [shortcut] 283 | from=-3 284 | activation=linear 285 | 286 | # Downsample 287 | 288 | [convolutional] 289 | batch_normalize=1 290 | filters=512 291 | size=3 292 | stride=2 293 | pad=1 294 | activation=leaky 295 | 296 | [convolutional] 297 | batch_normalize=1 298 | filters=256 299 | size=1 300 | stride=1 301 | pad=1 302 | activation=leaky 303 | 304 | [convolutional] 305 | batch_normalize=1 306 | filters=512 307 | size=3 308 | stride=1 309 | pad=1 310 | activation=leaky 311 | 312 | [shortcut] 313 | from=-3 314 | activation=linear 315 | 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=256 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=512 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | 338 | [convolutional] 339 | batch_normalize=1 340 | filters=256 341 | size=1 342 | stride=1 343 | pad=1 344 | activation=leaky 345 | 346 | [convolutional] 347 | batch_normalize=1 348 | filters=512 349 | size=3 350 | stride=1 351 | pad=1 352 | activation=leaky 353 | 354 | [shortcut] 355 | from=-3 356 | activation=linear 357 | 358 | 359 | [convolutional] 360 | batch_normalize=1 361 | filters=256 362 | size=1 363 | stride=1 364 | pad=1 365 | activation=leaky 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=512 370 | size=3 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [shortcut] 376 | from=-3 377 | activation=linear 378 | 379 | [convolutional] 380 | batch_normalize=1 381 | filters=256 382 | size=1 383 | stride=1 384 | pad=1 385 | activation=leaky 386 | 387 | [convolutional] 388 | batch_normalize=1 389 | filters=512 390 | size=3 391 | stride=1 392 | pad=1 393 | activation=leaky 394 | 395 | [shortcut] 396 | from=-3 397 | activation=linear 398 | 399 | 400 | [convolutional] 401 | batch_normalize=1 402 | filters=256 403 | size=1 404 | stride=1 405 | pad=1 406 | activation=leaky 407 | 408 | [convolutional] 409 | batch_normalize=1 410 | filters=512 411 | size=3 412 | stride=1 413 | pad=1 414 | activation=leaky 415 | 416 | [shortcut] 417 | from=-3 418 | activation=linear 419 | 420 | 421 | [convolutional] 422 | batch_normalize=1 423 | filters=256 424 | size=1 425 | stride=1 426 | pad=1 427 | activation=leaky 428 | 429 | [convolutional] 430 | batch_normalize=1 431 | filters=512 432 | size=3 433 | stride=1 434 | pad=1 435 | activation=leaky 436 | 437 | [shortcut] 438 | from=-3 439 | activation=linear 440 | 441 | [convolutional] 442 | batch_normalize=1 443 | filters=256 444 | size=1 445 | stride=1 446 | pad=1 447 | activation=leaky 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=512 452 | size=3 453 | stride=1 454 | pad=1 455 | activation=leaky 456 | 457 | [shortcut] 458 | from=-3 459 | activation=linear 460 | 461 | # Downsample 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=1024 466 | size=3 467 | stride=2 468 | pad=1 469 | activation=leaky 470 | 471 | [convolutional] 472 | batch_normalize=1 473 | filters=512 474 | size=1 475 | stride=1 476 | pad=1 477 | activation=leaky 478 | 479 | [convolutional] 480 | batch_normalize=1 481 | filters=1024 482 | size=3 483 | stride=1 484 | pad=1 485 | activation=leaky 486 | 487 | [shortcut] 488 | from=-3 489 | activation=linear 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=512 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=leaky 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=1024 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=leaky 506 | 507 | [shortcut] 508 | from=-3 509 | activation=linear 510 | 511 | [convolutional] 512 | batch_normalize=1 513 | filters=512 514 | size=1 515 | stride=1 516 | pad=1 517 | activation=leaky 518 | 519 | [convolutional] 520 | batch_normalize=1 521 | filters=1024 522 | size=3 523 | stride=1 524 | pad=1 525 | activation=leaky 526 | 527 | [shortcut] 528 | from=-3 529 | activation=linear 530 | 531 | [convolutional] 532 | batch_normalize=1 533 | filters=512 534 | size=1 535 | stride=1 536 | pad=1 537 | activation=leaky 538 | 539 | [convolutional] 540 | batch_normalize=1 541 | filters=1024 542 | size=3 543 | stride=1 544 | pad=1 545 | activation=leaky 546 | 547 | [shortcut] 548 | from=-3 549 | activation=linear 550 | 551 | ###################### 552 | 553 | [convolutional] 554 | batch_normalize=1 555 | filters=512 556 | size=1 557 | stride=1 558 | pad=1 559 | activation=leaky 560 | 561 | [convolutional] 562 | batch_normalize=1 563 | size=3 564 | stride=1 565 | pad=1 566 | filters=1024 567 | activation=leaky 568 | 569 | [convolutional] 570 | batch_normalize=1 571 | filters=512 572 | size=1 573 | stride=1 574 | pad=1 575 | activation=leaky 576 | 577 | [convolutional] 578 | batch_normalize=1 579 | size=3 580 | stride=1 581 | pad=1 582 | filters=1024 583 | activation=leaky 584 | 585 | [convolutional] 586 | batch_normalize=1 587 | filters=512 588 | size=1 589 | stride=1 590 | pad=1 591 | activation=leaky 592 | 593 | [convolutional] 594 | batch_normalize=1 595 | size=3 596 | stride=1 597 | pad=1 598 | filters=1024 599 | activation=leaky 600 | 601 | [convolutional] 602 | size=1 603 | stride=1 604 | pad=1 605 | filters=21 606 | activation=linear 607 | 608 | [yolo] 609 | mask = 6,7,8 610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 611 | classes=2 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .5 615 | truth_thresh = 1 616 | random=1 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=21 690 | activation=linear 691 | 692 | [yolo] 693 | mask = 3,4,5 694 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 695 | classes=2 696 | num=9 697 | jitter=.3 698 | ignore_thresh = .5 699 | truth_thresh = 1 700 | random=1 701 | 702 | [route] 703 | layers = -4 704 | 705 | [convolutional] 706 | batch_normalize=1 707 | filters=128 708 | size=1 709 | stride=1 710 | pad=1 711 | activation=leaky 712 | 713 | [upsample] 714 | stride=2 715 | 716 | [route] 717 | layers = -1, 36 718 | 719 | 720 | 721 | [convolutional] 722 | batch_normalize=1 723 | filters=128 724 | size=1 725 | stride=1 726 | pad=1 727 | activation=leaky 728 | 729 | [convolutional] 730 | batch_normalize=1 731 | size=3 732 | stride=1 733 | pad=1 734 | filters=256 735 | activation=leaky 736 | 737 | [convolutional] 738 | batch_normalize=1 739 | filters=128 740 | size=1 741 | stride=1 742 | pad=1 743 | activation=leaky 744 | 745 | [convolutional] 746 | batch_normalize=1 747 | size=3 748 | stride=1 749 | pad=1 750 | filters=256 751 | activation=leaky 752 | 753 | [convolutional] 754 | batch_normalize=1 755 | filters=128 756 | size=1 757 | stride=1 758 | pad=1 759 | activation=leaky 760 | 761 | [convolutional] 762 | batch_normalize=1 763 | size=3 764 | stride=1 765 | pad=1 766 | filters=256 767 | activation=leaky 768 | 769 | [convolutional] 770 | size=1 771 | stride=1 772 | pad=1 773 | filters=21 774 | activation=linear 775 | 776 | [yolo] 777 | mask = 0,1,2 778 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 779 | classes=2 780 | num=9 781 | jitter=.3 782 | ignore_thresh = .5 783 | truth_thresh = 1 784 | random=1 785 | 786 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-contrib-python==4.0.0.21 2 | pillow 3 | h5py 4 | scipy 5 | numpy 6 | tensorflow-gpu==2.5 7 | scikit-learn 8 | matplotlib 9 | scikit-image 10 | xlwt -------------------------------------------------------------------------------- /table_build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jan 13 17:31:37 2021 5 | 6 | @author: lywen 7 | """ 8 | 9 | 10 | class tableBuid: 11 | ##表格重建 12 | def __init__(self, ceilbox, interval=10): 13 | """ 14 | ceilboxes:[[x0,y0,x1,y1,x2,y2,x3,y3,x4,y4]] 15 | """ 16 | diagBoxes =[[int(x[0]), int(x[1]), int(x[4]), int(x[5])] for x in ceilbox] 17 | 18 | self.diagBoxes = diagBoxes 19 | self.interval = interval 20 | self.batch() 21 | 22 | def batch(self): 23 | self.cor = [] 24 | rowcor = self.table_line_cor(self.diagBoxes, axis='row', interval=self.interval) 25 | colcor = self.table_line_cor(self.diagBoxes, axis='col', interval=self.interval) 26 | cor = [{'row': line[1], 'col': line[0]} for line in zip(rowcor, colcor)] 27 | self.cor = cor 28 | 29 | def table_line_cor(self, lines, axis='col', interval=10): 30 | 31 | if axis == 'col': 32 | edges = [[line[1], line[3]] for line in lines] 33 | else: 34 | edges = [[line[0], line[2]] for line in lines] 35 | 36 | edges = sum(edges, []) 37 | edges = sorted(edges) 38 | 39 | nedges = len(edges) 40 | edgesMap = {} 41 | for i in range(nedges): 42 | if i == 0: 43 | edgesMap[edges[i]] = edges[i] 44 | continue 45 | else: 46 | if edges[i] - edgesMap[edges[i - 1]] < interval: 47 | edgesMap[edges[i]] = edgesMap[edges[i - 1]] 48 | else: 49 | edgesMap[edges[i]] = edges[i] 50 | 51 | edgesMapList = [[key, edgesMap[key]] for key in edgesMap] 52 | edgesMapIndex = [line[1] for line in edgesMapList] 53 | edgesMapIndex = list(set(edgesMapIndex)) 54 | edgesMapIndex = {x: ind for ind, x in enumerate(sorted(edgesMapIndex))} 55 | 56 | if axis == 'col': 57 | cor = [[edgesMapIndex[edgesMap[line[1]]], edgesMapIndex[edgesMap[line[3]]]] for line in lines] 58 | else: 59 | cor = [[edgesMapIndex[edgesMap[line[0]]], edgesMapIndex[edgesMap[line[2]]]] for line in lines] 60 | return cor 61 | 62 | import xlwt 63 | def to_excel(res, workbook=None): 64 | ##res:[{'text': '购 买 方', 'cx': 192.0, 'w': 58.0, 'h': 169.0, 'cy': 325.5, 'angle': 0.0, 'row': [0, 1], 'col': [0, 1]}] 65 | row = 0 66 | if workbook is None: 67 | workbook = xlwt.Workbook() 68 | if len(res) == 0: 69 | worksheet = workbook.add_sheet('table') 70 | worksheet.write_merge(0, 0, 0, 0, "无数据") 71 | else: 72 | worksheet = workbook.add_sheet('page') 73 | pageRow = 0 74 | for line in res: 75 | row0, row1 = line['row'] 76 | col0, col1 = line['col'] 77 | text = line.get('text','') 78 | try: 79 | pageRow = max(row1 - 1, pageRow) 80 | worksheet.write_merge(row + row0, row + row1 - 1, col0, col1 - 1, text) 81 | except: 82 | pass 83 | return workbook 84 | 85 | 86 | if __name__=='__main__': 87 | pass 88 | -------------------------------------------------------------------------------- /table_ceil.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | table ceil 6 | @author: chineseocr 7 | """ 8 | 9 | import cv2 10 | import numpy as np 11 | from table_detect import table_detect 12 | from table_line import table_line 13 | from table_build import tableBuid,to_excel 14 | from utils import minAreaRectbox, measure, eval_angle, draw_lines 15 | 16 | class table: 17 | def __init__(self, img, tableSize=(416, 416), tableLineSize=(1024, 1024), isTableDetect=False, isToExcel=False): 18 | self.img = img 19 | self.tableSize = tableSize 20 | self.tableLineSize = tableLineSize 21 | self.isTableDetect = isTableDetect 22 | self.isToExcel = isToExcel 23 | self.img_degree() 24 | self.table_boxes_detect() ##表格定位 25 | self.table_ceil() ##表格单元格定位 26 | 27 | self.table_build() 28 | 29 | def img_degree(self): 30 | img, degree = eval_angle(self.img, angleRange=[-15, 15]) 31 | self.img = img 32 | self.degree = degree 33 | 34 | def table_boxes_detect(self): 35 | h, w = self.img.shape[:2] 36 | 37 | if self.isTableDetect: 38 | boxes, adBoxes, scores = table_detect(self.img, sc=self.tableSize, thresh=0.2, NMSthresh=0.3) 39 | if len(boxes) == 0: 40 | boxes = [[0, 0, w, h]] 41 | adBoxes = [[0, 0, w, h]] 42 | scores = [0] 43 | else: 44 | boxes = [[0, 0, w, h]] 45 | adBoxes = [[0, 0, w, h]] 46 | scores = [0] 47 | 48 | self.boxes = boxes 49 | self.adBoxes = adBoxes 50 | self.scores = scores 51 | 52 | def table_ceil(self): 53 | ###表格单元格 54 | n = len(self.adBoxes) 55 | self.tableCeilBoxes = [] 56 | self.childImgs = [] 57 | for i in range(n): 58 | xmin, ymin, xmax, ymax = [int(x) for x in self.adBoxes[i]] 59 | 60 | childImg = self.img[ymin:ymax, xmin:xmax] 61 | rowboxes, colboxes = table_line(childImg[..., ::-1], size=self.tableLineSize, hprob=0.5, vprob=0.5) 62 | tmp = np.zeros(self.img.shape[:2], dtype='uint8') 63 | tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2) 64 | labels = measure.label(tmp < 255, connectivity=2) # 8连通区域标记 65 | regions = measure.regionprops(labels) 66 | ceilboxes = minAreaRectbox(regions, False, tmp.shape[1], tmp.shape[0], True, True) 67 | ceilboxes = np.array(ceilboxes) 68 | ceilboxes[:, [0, 2, 4, 6]] += xmin 69 | ceilboxes[:, [1, 3, 5, 7]] += ymin 70 | self.tableCeilBoxes.extend(ceilboxes) 71 | self.childImgs.append(childImg) 72 | 73 | def table_build(self): 74 | tablebuild = tableBuid(self.tableCeilBoxes) 75 | cor = tablebuild.cor 76 | for line in cor: 77 | line['text'] = 'table-test'##ocr 78 | if self.isToExcel: 79 | workbook = to_excel(cor, workbook=None) 80 | else: 81 | workbook=None 82 | self.res = cor 83 | self.workbook = workbook 84 | 85 | 86 | def table_ocr(self): 87 | """use ocr and match ceil""" 88 | pass 89 | 90 | 91 | 92 | if __name__ == '__main__': 93 | import argparse 94 | import os 95 | import time 96 | from utils import draw_boxes 97 | 98 | parser = argparse.ArgumentParser(description='tabel to excel demo') 99 | parser.add_argument('--isTableDetect', default=False, type=bool, help="是否先进行表格检测") 100 | parser.add_argument('--tableSize', default='416,416', type=str, help="表格检测输入size") 101 | parser.add_argument('--tableLineSize', default='1024,1024', type=str, help="表格直线输入size") 102 | parser.add_argument('--isToExcel', default=False, type=bool, help="是否输出到excel") 103 | parser.add_argument('--jpgPath', default='img/table-detect.jpg',type=str, help="测试图像地址") 104 | args = parser.parse_args() 105 | args.tableSize = [int(x) for x in args.tableSize.split(',')] 106 | args.tableLineSize = [int(x) for x in args.tableLineSize.split(',')] 107 | print(args) 108 | img = cv2.imread(args.jpgPath) 109 | t = time.time() 110 | tableDetect = table(img,tableSize=args.tableSize, 111 | tableLineSize=args.tableLineSize, 112 | isTableDetect=args.isTableDetect, 113 | isToExcel=args.isToExcel 114 | ) 115 | tableCeilBoxes = tableDetect.tableCeilBoxes 116 | tableJson = tableDetect.res 117 | workbook = tableDetect.workbook 118 | img = tableDetect.img 119 | tmp = np.zeros_like(img) 120 | img = draw_boxes(tmp, tableDetect.tableCeilBoxes, color=(255, 255, 255)) 121 | print(time.time() - t) 122 | pngP = os.path.splitext(args.jpgPath)[0]+'ceil.png' 123 | cv2.imwrite(pngP, img) 124 | if workbook is not None: 125 | workbook.save(os.path.splitext(args.jpgPath)[0]+'.xlsx') 126 | -------------------------------------------------------------------------------- /table_detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | table detect with yolo 6 | @author: chineseocr 7 | """ 8 | import cv2 9 | import numpy as np 10 | 11 | from config import tableModelDetectPath 12 | from utils import nms_box, letterbox_image, rectangle 13 | 14 | tableDetectNet = cv2.dnn.readNetFromDarknet(tableModelDetectPath.replace('.weights', '.cfg'), tableModelDetectPath) # 15 | 16 | 17 | def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3): 18 | """ 19 | 表格检测 20 | img:GBR 21 | 22 | """ 23 | scale = sc[0] 24 | img_height, img_width = img.shape[:2] 25 | inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale)) 26 | inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False); 27 | tableDetectNet.setInput(inputBlob / 255.0) 28 | outputName = tableDetectNet.getUnconnectedOutLayersNames() 29 | outputs = tableDetectNet.forward(outputName) 30 | class_ids = [] 31 | confidences = [] 32 | boxes = [] 33 | for output in outputs: 34 | for detection in output: 35 | scores = detection[5:] 36 | class_id = np.argmax(scores) 37 | confidence = scores[class_id] 38 | if confidence > thresh: 39 | center_x = int(detection[0] * scale / fx) 40 | center_y = int(detection[1] * scale / fy) 41 | width = int(detection[2] * scale / fx) 42 | height = int(detection[3] * scale / fy) 43 | left = int(center_x - width / 2) 44 | top = int(center_y - height / 2) 45 | if class_id == 1: 46 | class_ids.append(class_id) 47 | confidences.append(float(confidence)) 48 | xmin, ymin, xmax, ymax = left, top, left + width, top + height 49 | xmin = max(xmin, 1) 50 | ymin = max(ymin, 1) 51 | xmax = min(xmax, img_width - 1) 52 | ymax = min(ymax, img_height - 1) 53 | boxes.append([xmin, ymin, xmax, ymax]) 54 | 55 | boxes = np.array(boxes) 56 | 57 | confidences = np.array(confidences) 58 | if len(boxes) > 0: 59 | boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh) 60 | 61 | boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img) 62 | return boxes, adBoxes, confidences 63 | 64 | 65 | def point_in_box(p, box): 66 | x, y = p 67 | xmin, ymin, xmax, ymax = box 68 | if xmin <= x <= xmin and ymin <= y <= ymax: 69 | return True 70 | else: 71 | return False 72 | 73 | 74 | def fix_table_box_for_table_line(boxes, confidences, img): 75 | ### 修正表格用于表格线检测 76 | h, w = img.shape[:2] 77 | n = len(boxes) 78 | adBoxes = [] 79 | 80 | for i in range(n): 81 | prob = confidences[i] 82 | 83 | xmin, ymin, xmax, ymax = boxes[i] 84 | padx = (xmax - xmin) * (1 - prob) 85 | padx = padx 86 | 87 | pady = (ymax - ymin) * (1 - prob) 88 | pady = pady 89 | xminNew = max(xmin - padx, 1) 90 | yminNew = max(ymin - pady, 1) 91 | xmaxNew = min(xmax + padx, w) 92 | ymaxNew = min(ymax + pady, h) 93 | 94 | adBoxes.append([xminNew, yminNew, xmaxNew, ymaxNew]) 95 | 96 | return boxes, adBoxes 97 | 98 | 99 | if __name__ == '__main__': 100 | import time 101 | import argparse 102 | parser = argparse.ArgumentParser(description='tabel to excel demo') 103 | parser.add_argument('--tableSize', default='416,416', type=str, help="表格检测输入size") 104 | parser.add_argument('--jpgPath', default='img/table-detect.jpg', type=str, help="测试图像地址") 105 | args = parser.parse_args() 106 | args.tableSize = [int(x) for x in args.tableSize.split(',')] 107 | 108 | p = 'img/table-detect.jpg' 109 | img = cv2.imread(args.jpgPath) 110 | t = time.time() 111 | boxes, adBoxes, scores = table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3) 112 | print(time.time() - t, boxes, adBoxes, scores) 113 | img = rectangle(img, adBoxes) 114 | img.save('img/table-detect.png') 115 | -------------------------------------------------------------------------------- /table_line.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | table line detect 6 | @author: chineseocr 7 | """ 8 | 9 | from tensorflow.keras.layers import Input, concatenate, Conv2D, MaxPooling2D, BatchNormalization, UpSampling2D 10 | from tensorflow.keras.layers import LeakyReLU 11 | from tensorflow.keras.models import Model 12 | 13 | 14 | def table_net(input_shape=(512, 512, 3), num_classes=1): 15 | inputs = Input(shape=input_shape) 16 | # 512 17 | use_bias = False 18 | down0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs) 19 | down0a = BatchNormalization()(down0a) 20 | down0a = LeakyReLU(alpha=0.1)(down0a) 21 | down0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0a) 22 | down0a = BatchNormalization()(down0a) 23 | down0a = LeakyReLU(alpha=0.1)(down0a) 24 | down0a_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0a) 25 | # 256 26 | 27 | down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0a_pool) 28 | down0 = BatchNormalization()(down0) 29 | 30 | down0 = LeakyReLU(alpha=0.1)(down0) 31 | down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) 32 | down0 = BatchNormalization()(down0) 33 | down0 = LeakyReLU(alpha=0.1)(down0) 34 | down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) 35 | # 128 36 | 37 | down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) 38 | down1 = BatchNormalization()(down1) 39 | down1 = LeakyReLU(alpha=0.1)(down1) 40 | down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) 41 | down1 = BatchNormalization()(down1) 42 | down1 = LeakyReLU(alpha=0.1)(down1) 43 | down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) 44 | # 64 45 | 46 | down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) 47 | down2 = BatchNormalization()(down2) 48 | down2 = LeakyReLU(alpha=0.1)(down2) 49 | down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) 50 | down2 = BatchNormalization()(down2) 51 | down2 = LeakyReLU(alpha=0.1)(down2) 52 | down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) 53 | # 32 54 | 55 | down3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(down2_pool) 56 | down3 = BatchNormalization()(down3) 57 | down3 = LeakyReLU(alpha=0.1)(down3) 58 | down3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(down3) 59 | down3 = BatchNormalization()(down3) 60 | down3 = LeakyReLU(alpha=0.1)(down3) 61 | down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) 62 | # 16 63 | 64 | down4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(down3_pool) 65 | down4 = BatchNormalization()(down4) 66 | down4 = LeakyReLU(alpha=0.1)(down4) 67 | down4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(down4) 68 | down4 = BatchNormalization()(down4) 69 | down4 = LeakyReLU(alpha=0.1)(down4) 70 | down4_pool = MaxPooling2D((2, 2), strides=(2, 2))(down4) 71 | # 8 72 | 73 | center = Conv2D(1024, (3, 3), padding='same', use_bias=use_bias)(down4_pool) 74 | center = BatchNormalization()(center) 75 | center = LeakyReLU(alpha=0.1)(center) 76 | center = Conv2D(1024, (3, 3), padding='same', use_bias=use_bias)(center) 77 | center = BatchNormalization()(center) 78 | center = LeakyReLU(alpha=0.1)(center) 79 | # center 80 | 81 | up4 = UpSampling2D((2, 2))(center) 82 | up4 = concatenate([down4, up4], axis=3) 83 | up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4) 84 | up4 = BatchNormalization()(up4) 85 | up4 = LeakyReLU(alpha=0.1)(up4) 86 | up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4) 87 | up4 = BatchNormalization()(up4) 88 | up4 = LeakyReLU(alpha=0.1)(up4) 89 | up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4) 90 | up4 = BatchNormalization()(up4) 91 | up4 = LeakyReLU(alpha=0.1)(up4) 92 | # 16 93 | 94 | up3 = UpSampling2D((2, 2))(up4) 95 | up3 = concatenate([down3, up3], axis=3) 96 | up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3) 97 | up3 = BatchNormalization()(up3) 98 | up3 = LeakyReLU(alpha=0.1)(up3) 99 | up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3) 100 | up3 = BatchNormalization()(up3) 101 | up3 = LeakyReLU(alpha=0.1)(up3) 102 | up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3) 103 | up3 = BatchNormalization()(up3) 104 | up3 = LeakyReLU(alpha=0.1)(up3) 105 | # 32 106 | 107 | up2 = UpSampling2D((2, 2))(up3) 108 | up2 = concatenate([down2, up2], axis=3) 109 | up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2) 110 | up2 = BatchNormalization()(up2) 111 | up2 = LeakyReLU(alpha=0.1)(up2) 112 | up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2) 113 | up2 = BatchNormalization()(up2) 114 | up2 = LeakyReLU(alpha=0.1)(up2) 115 | up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2) 116 | up2 = BatchNormalization()(up2) 117 | up2 = LeakyReLU(alpha=0.1)(up2) 118 | # 64 119 | 120 | up1 = UpSampling2D((2, 2))(up2) 121 | up1 = concatenate([down1, up1], axis=3) 122 | up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1) 123 | up1 = BatchNormalization()(up1) 124 | up1 = LeakyReLU(alpha=0.1)(up1) 125 | up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1) 126 | up1 = BatchNormalization()(up1) 127 | up1 = LeakyReLU(alpha=0.1)(up1) 128 | up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1) 129 | up1 = BatchNormalization()(up1) 130 | up1 = LeakyReLU(alpha=0.1)(up1) 131 | # 128 132 | 133 | up0 = UpSampling2D((2, 2))(up1) 134 | up0 = concatenate([down0, up0], axis=3) 135 | up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0) 136 | up0 = BatchNormalization()(up0) 137 | up0 = LeakyReLU(alpha=0.1)(up0) 138 | up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0) 139 | up0 = BatchNormalization()(up0) 140 | up0 = LeakyReLU(alpha=0.1)(up0) 141 | up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0) 142 | up0 = BatchNormalization()(up0) 143 | up0 = LeakyReLU(alpha=0.1)(up0) 144 | # 256 145 | 146 | up0a = UpSampling2D((2, 2))(up0) 147 | up0a = concatenate([down0a, up0a], axis=3) 148 | up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a) 149 | up0a = BatchNormalization()(up0a) 150 | up0a = LeakyReLU(alpha=0.1)(up0a) 151 | up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a) 152 | up0a = BatchNormalization()(up0a) 153 | up0a = LeakyReLU(alpha=0.1)(up0a) 154 | up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a) 155 | up0a = BatchNormalization()(up0a) 156 | up0a = LeakyReLU(alpha=0.1)(up0a) 157 | # 512 158 | 159 | classify = Conv2D(num_classes, (1, 1), activation='sigmoid')(up0a) 160 | 161 | model = Model(inputs=inputs, outputs=classify) 162 | 163 | return model 164 | 165 | 166 | from config import tableModeLinePath 167 | from utils import letterbox_image, get_table_line, adjust_lines, line_to_line 168 | import numpy as np 169 | import cv2 170 | 171 | model = table_net((None, None, 3), 2) 172 | model.load_weights(tableModeLinePath) 173 | 174 | 175 | def table_line(img, size=(512, 512), hprob=0.5, vprob=0.5, row=50, col=30, alph=15): 176 | sizew, sizeh = size 177 | inputBlob, fx, fy = letterbox_image(img[..., ::-1], (sizew, sizeh)) 178 | pred = model.predict(np.array([np.array(inputBlob) / 255.0])) 179 | pred = pred[0] 180 | vpred = pred[..., 1] > vprob ##竖线 181 | hpred = pred[..., 0] > hprob ##横线 182 | vpred = vpred.astype(int) 183 | hpred = hpred.astype(int) 184 | colboxes = get_table_line(vpred, axis=1, lineW=col) 185 | rowboxes = get_table_line(hpred, axis=0, lineW=row) 186 | ccolbox = [] 187 | crowlbox = [] 188 | if len(rowboxes) > 0: 189 | rowboxes = np.array(rowboxes) 190 | rowboxes[:, [0, 2]] = rowboxes[:, [0, 2]] / fx 191 | rowboxes[:, [1, 3]] = rowboxes[:, [1, 3]] / fy 192 | xmin = rowboxes[:, [0, 2]].min() 193 | xmax = rowboxes[:, [0, 2]].max() 194 | ymin = rowboxes[:, [1, 3]].min() 195 | ymax = rowboxes[:, [1, 3]].max() 196 | ccolbox = [[xmin, ymin, xmin, ymax], [xmax, ymin, xmax, ymax]] 197 | rowboxes = rowboxes.tolist() 198 | 199 | if len(colboxes) > 0: 200 | colboxes = np.array(colboxes) 201 | colboxes[:, [0, 2]] = colboxes[:, [0, 2]] / fx 202 | colboxes[:, [1, 3]] = colboxes[:, [1, 3]] / fy 203 | 204 | xmin = colboxes[:, [0, 2]].min() 205 | xmax = colboxes[:, [0, 2]].max() 206 | ymin = colboxes[:, [1, 3]].min() 207 | ymax = colboxes[:, [1, 3]].max() 208 | colboxes = colboxes.tolist() 209 | crowlbox = [[xmin, ymin, xmax, ymin], [xmin, ymax, xmax, ymax]] 210 | 211 | rowboxes += crowlbox 212 | colboxes += ccolbox 213 | 214 | rboxes_row_, rboxes_col_ = adjust_lines(rowboxes, colboxes, alph=alph) 215 | rowboxes += rboxes_row_ 216 | colboxes += rboxes_col_ 217 | nrow = len(rowboxes) 218 | ncol = len(colboxes) 219 | for i in range(nrow): 220 | for j in range(ncol): 221 | rowboxes[i] = line_to_line(rowboxes[i], colboxes[j], 10) 222 | colboxes[j] = line_to_line(colboxes[j], rowboxes[i], 10) 223 | 224 | return rowboxes, colboxes 225 | 226 | 227 | if __name__ == '__main__': 228 | import time 229 | 230 | p = 'img/table-detect.jpg' 231 | from utils import draw_lines 232 | 233 | img = cv2.imread(p) 234 | t = time.time() 235 | rowboxes, colboxes = table_line(img[..., ::-1], size=(512, 512), hprob=0.5, vprob=0.5) 236 | img = draw_lines(img, rowboxes + colboxes, color=(255, 0, 0), lineW=2) 237 | 238 | print(time.time() - t, len(rowboxes), len(colboxes)) 239 | cv2.imwrite('img/table-line.png', img) 240 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 10 02:52:45 2020 5 | @author: chineseocr 6 | """ 7 | import sys 8 | 9 | sys.path.append('.') 10 | -------------------------------------------------------------------------------- /train/dataset-line/0/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/train/dataset-line/0/0.jpg -------------------------------------------------------------------------------- /train/dataset-line/0/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "3.16.7", 3 | "flags": {}, 4 | "shapes": [ 5 | { 6 | "label": "0", 7 | "line_color": [ 8 | 0, 9 | 0, 10 | 128 11 | ], 12 | "fill_color": [ 13 | 0, 14 | 0, 15 | 128 16 | ], 17 | "points": [ 18 | [ 19 | 62.0, 20 | 166.5 21 | ], 22 | [ 23 | 760.0, 24 | 166.5 25 | ] 26 | ], 27 | "shape_type": "line", 28 | "flags": {} 29 | }, 30 | { 31 | "label": "0", 32 | "line_color": [ 33 | 0, 34 | 0, 35 | 128 36 | ], 37 | "fill_color": [ 38 | 0, 39 | 0, 40 | 128 41 | ], 42 | "points": [ 43 | [ 44 | 62.0, 45 | 201.0 46 | ], 47 | [ 48 | 760.0, 49 | 201.0 50 | ] 51 | ], 52 | "shape_type": "line", 53 | "flags": {} 54 | }, 55 | { 56 | "label": "0", 57 | "line_color": [ 58 | 0, 59 | 0, 60 | 128 61 | ], 62 | "fill_color": [ 63 | 0, 64 | 0, 65 | 128 66 | ], 67 | "points": [ 68 | [ 69 | 62.0, 70 | 231.5 71 | ], 72 | [ 73 | 760.0, 74 | 231.5 75 | ] 76 | ], 77 | "shape_type": "line", 78 | "flags": {} 79 | }, 80 | { 81 | "label": "0", 82 | "line_color": [ 83 | 0, 84 | 0, 85 | 128 86 | ], 87 | "fill_color": [ 88 | 0, 89 | 0, 90 | 128 91 | ], 92 | "points": [ 93 | [ 94 | 62.0, 95 | 286.5 96 | ], 97 | [ 98 | 760.0, 99 | 286.5 100 | ] 101 | ], 102 | "shape_type": "line", 103 | "flags": {} 104 | }, 105 | { 106 | "label": "0", 107 | "line_color": [ 108 | 0, 109 | 0, 110 | 128 111 | ], 112 | "fill_color": [ 113 | 0, 114 | 0, 115 | 128 116 | ], 117 | "points": [ 118 | [ 119 | 62.0, 120 | 304.5 121 | ], 122 | [ 123 | 760.0, 124 | 304.5 125 | ] 126 | ], 127 | "shape_type": "line", 128 | "flags": {} 129 | }, 130 | { 131 | "label": "0", 132 | "line_color": [ 133 | 0, 134 | 0, 135 | 128 136 | ], 137 | "fill_color": [ 138 | 0, 139 | 0, 140 | 128 141 | ], 142 | "points": [ 143 | [ 144 | 62.0, 145 | 335.0 146 | ], 147 | [ 148 | 760.0, 149 | 335.0 150 | ] 151 | ], 152 | "shape_type": "line", 153 | "flags": {} 154 | }, 155 | { 156 | "label": "0", 157 | "line_color": [ 158 | 0, 159 | 0, 160 | 128 161 | ], 162 | "fill_color": [ 163 | 0, 164 | 0, 165 | 128 166 | ], 167 | "points": [ 168 | [ 169 | 62.0, 170 | 389.5 171 | ], 172 | [ 173 | 760.0, 174 | 389.5 175 | ] 176 | ], 177 | "shape_type": "line", 178 | "flags": {} 179 | }, 180 | { 181 | "label": "0", 182 | "line_color": [ 183 | 0, 184 | 0, 185 | 128 186 | ], 187 | "fill_color": [ 188 | 0, 189 | 0, 190 | 128 191 | ], 192 | "points": [ 193 | [ 194 | 62.0, 195 | 432.5 196 | ], 197 | [ 198 | 760.0, 199 | 432.5 200 | ] 201 | ], 202 | "shape_type": "line", 203 | "flags": {} 204 | }, 205 | { 206 | "label": "0", 207 | "line_color": [ 208 | 0, 209 | 0, 210 | 128 211 | ], 212 | "fill_color": [ 213 | 0, 214 | 0, 215 | 128 216 | ], 217 | "points": [ 218 | [ 219 | 62.0, 220 | 550.5 221 | ], 222 | [ 223 | 760.0, 224 | 550.5 225 | ] 226 | ], 227 | "shape_type": "line", 228 | "flags": {} 229 | }, 230 | { 231 | "label": "0", 232 | "line_color": [ 233 | 0, 234 | 0, 235 | 128 236 | ], 237 | "fill_color": [ 238 | 0, 239 | 0, 240 | 128 241 | ], 242 | "points": [ 243 | [ 244 | 62.0, 245 | 581.0 246 | ], 247 | [ 248 | 760.0, 249 | 581.0 250 | ] 251 | ], 252 | "shape_type": "line", 253 | "flags": {} 254 | }, 255 | { 256 | "label": "0", 257 | "line_color": [ 258 | 0, 259 | 0, 260 | 128 261 | ], 262 | "fill_color": [ 263 | 0, 264 | 0, 265 | 128 266 | ], 267 | "points": [ 268 | [ 269 | 62.0, 270 | 624.0 271 | ], 272 | [ 273 | 760.0, 274 | 624.0 275 | ] 276 | ], 277 | "shape_type": "line", 278 | "flags": {} 279 | }, 280 | { 281 | "label": "0", 282 | "line_color": [ 283 | 0, 284 | 0, 285 | 128 286 | ], 287 | "fill_color": [ 288 | 0, 289 | 0, 290 | 128 291 | ], 292 | "points": [ 293 | [ 294 | 62.0, 295 | 653.5 296 | ], 297 | [ 298 | 760.0, 299 | 653.5 300 | ] 301 | ], 302 | "shape_type": "line", 303 | "flags": {} 304 | }, 305 | { 306 | "label": "1", 307 | "line_color": [ 308 | 0, 309 | 0, 310 | 0, 311 | 128 312 | ], 313 | "fill_color": [ 314 | 0, 315 | 0, 316 | 0, 317 | 128 318 | ], 319 | "points": [ 320 | [ 321 | 62.5, 322 | 166.0 323 | ], 324 | [ 325 | 62.5, 326 | 653.9999389648438 327 | ] 328 | ], 329 | "shape_type": "line", 330 | "flags": {} 331 | }, 332 | { 333 | "label": "1", 334 | "line_color": [ 335 | 0, 336 | 0, 337 | 0, 338 | 128 339 | ], 340 | "fill_color": [ 341 | 0, 342 | 0, 343 | 0, 344 | 128 345 | ], 346 | "points": [ 347 | [ 348 | 187.49998474121094, 349 | 166.0 350 | ], 351 | [ 352 | 187.49998474121094, 353 | 653.9999389648438 354 | ] 355 | ], 356 | "shape_type": "line", 357 | "flags": {} 358 | }, 359 | { 360 | "label": "1", 361 | "line_color": [ 362 | 0, 363 | 0, 364 | 0, 365 | 128 366 | ], 367 | "fill_color": [ 368 | 0, 369 | 0, 370 | 0, 371 | 128 372 | ], 373 | "points": [ 374 | [ 375 | 460.5, 376 | 166.0 377 | ], 378 | [ 379 | 460.5, 380 | 653.9999389648438 381 | ] 382 | ], 383 | "shape_type": "line", 384 | "flags": {} 385 | }, 386 | { 387 | "label": "1", 388 | "line_color": [ 389 | 0, 390 | 0, 391 | 0, 392 | 128 393 | ], 394 | "fill_color": [ 395 | 0, 396 | 0, 397 | 0, 398 | 128 399 | ], 400 | "points": [ 401 | [ 402 | 497.5, 403 | 166.0 404 | ], 405 | [ 406 | 497.5, 407 | 653.9999389648438 408 | ] 409 | ], 410 | "shape_type": "line", 411 | "flags": {} 412 | }, 413 | { 414 | "label": "1", 415 | "line_color": [ 416 | 0, 417 | 0, 418 | 0, 419 | 128 420 | ], 421 | "fill_color": [ 422 | 0, 423 | 0, 424 | 0, 425 | 128 426 | ], 427 | "points": [ 428 | [ 429 | 539.4999389648438, 430 | 166.0 431 | ], 432 | [ 433 | 539.4999389648438, 434 | 653.9999389648438 435 | ] 436 | ], 437 | "shape_type": "line", 438 | "flags": {} 439 | }, 440 | { 441 | "label": "1", 442 | "line_color": [ 443 | 0, 444 | 0, 445 | 0, 446 | 128 447 | ], 448 | "fill_color": [ 449 | 0, 450 | 0, 451 | 0, 452 | 128 453 | ], 454 | "points": [ 455 | [ 456 | 593.4999389648438, 457 | 166.0 458 | ], 459 | [ 460 | 593.4999389648438, 461 | 653.9999389648438 462 | ] 463 | ], 464 | "shape_type": "line", 465 | "flags": {} 466 | }, 467 | { 468 | "label": "1", 469 | "line_color": [ 470 | 0, 471 | 0, 472 | 0, 473 | 128 474 | ], 475 | "fill_color": [ 476 | 0, 477 | 0, 478 | 0, 479 | 128 480 | ], 481 | "points": [ 482 | [ 483 | 626.4999389648438, 484 | 166.0 485 | ], 486 | [ 487 | 626.4999389648438, 488 | 653.9999389648438 489 | ] 490 | ], 491 | "shape_type": "line", 492 | "flags": {} 493 | }, 494 | { 495 | "label": "1", 496 | "line_color": [ 497 | 0, 498 | 0, 499 | 0, 500 | 128 501 | ], 502 | "fill_color": [ 503 | 0, 504 | 0, 505 | 0, 506 | 128 507 | ], 508 | "points": [ 509 | [ 510 | 676.4999389648438, 511 | 166.0 512 | ], 513 | [ 514 | 676.4999389648438, 515 | 653.9999389648438 516 | ] 517 | ], 518 | "shape_type": "line", 519 | "flags": {} 520 | }, 521 | { 522 | "label": "1", 523 | "line_color": [ 524 | 0, 525 | 0, 526 | 0, 527 | 128 528 | ], 529 | "fill_color": [ 530 | 0, 531 | 0, 532 | 0, 533 | 128 534 | ], 535 | "points": [ 536 | [ 537 | 759.4999389648438, 538 | 166.0 539 | ], 540 | [ 541 | 759.4999389648438, 542 | 653.9999389648438 543 | ] 544 | ], 545 | "shape_type": "line", 546 | "flags": {} 547 | }, 548 | { 549 | "label": "1", 550 | "line_color": [ 551 | 0, 552 | 0, 553 | 0, 554 | 128 555 | ], 556 | "fill_color": [ 557 | 0, 558 | 0, 559 | 0, 560 | 128 561 | ], 562 | "points": [ 563 | [ 564 | 150.0, 565 | 183.0 566 | ], 567 | [ 568 | 150.0, 569 | 654.0 570 | ] 571 | ], 572 | "shape_type": "line", 573 | "flags": {} 574 | } 575 | ], 576 | "imageData": "| "lineColor": [ 578 | 0, 579 | 255, 580 | 0, 581 | 128 582 | ], 583 | "fillColor": [ 584 | 255, 585 | 0, 586 | 0, 587 | 128 588 | ], 589 | "imagePath": "/Users/lywen/Desktop/dataset/table-line/opencv/01/0.jpg" 590 | } -------------------------------------------------------------------------------- /train/dataset-line/0/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/train/dataset-line/0/1.jpg -------------------------------------------------------------------------------- /train/dataset-line/0/13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/train/dataset-line/0/13.jpg -------------------------------------------------------------------------------- /train/dataset-line/0/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/train/dataset-line/0/2.jpg -------------------------------------------------------------------------------- /train/dataset-line/0/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "3.16.7", 3 | "flags": {}, 4 | "shapes": [ 5 | { 6 | "label": "0", 7 | "line_color": [ 8 | 0, 9 | 0, 10 | 128, 11 | 255 12 | ], 13 | "fill_color": [ 14 | 0, 15 | 0, 16 | 128, 17 | 255 18 | ], 19 | "points": [ 20 | [ 21 | 0.0, 22 | 1.0 23 | ], 24 | [ 25 | 618.0, 26 | 1.0 27 | ] 28 | ], 29 | "shape_type": "line", 30 | "flags": {} 31 | }, 32 | { 33 | "label": "0", 34 | "line_color": [ 35 | 0, 36 | 0, 37 | 128, 38 | 255 39 | ], 40 | "fill_color": [ 41 | 0, 42 | 0, 43 | 128, 44 | 255 45 | ], 46 | "points": [ 47 | [ 48 | 0.0, 49 | 36.499996185302734 50 | ], 51 | [ 52 | 617.9998779296875, 53 | 36.499996185302734 54 | ] 55 | ], 56 | "shape_type": "line", 57 | "flags": {} 58 | }, 59 | { 60 | "label": "0", 61 | "line_color": [ 62 | 0, 63 | 0, 64 | 128, 65 | 255 66 | ], 67 | "fill_color": [ 68 | 0, 69 | 0, 70 | 128, 71 | 255 72 | ], 73 | "points": [ 74 | [ 75 | -1.5308085657314598e-17, 76 | 94.5 77 | ], 78 | [ 79 | 618.0, 80 | 94.5 81 | ] 82 | ], 83 | "shape_type": "line", 84 | "flags": {} 85 | }, 86 | { 87 | "label": "0", 88 | "line_color": [ 89 | 0, 90 | 0, 91 | 128, 92 | 255 93 | ], 94 | "fill_color": [ 95 | 0, 96 | 0, 97 | 128, 98 | 255 99 | ], 100 | "points": [ 101 | [ 102 | -1.5308085657314598e-17, 103 | 135.5 104 | ], 105 | [ 106 | 618.0, 107 | 135.5 108 | ] 109 | ], 110 | "shape_type": "line", 111 | "flags": {} 112 | }, 113 | { 114 | "label": "0", 115 | "line_color": [ 116 | 0, 117 | 0, 118 | 128, 119 | 255 120 | ], 121 | "fill_color": [ 122 | 0, 123 | 0, 124 | 128, 125 | 255 126 | ], 127 | "points": [ 128 | [ 129 | 0.0, 130 | 176.0 131 | ], 132 | [ 133 | 618.0, 134 | 176.0 135 | ] 136 | ], 137 | "shape_type": "line", 138 | "flags": {} 139 | }, 140 | { 141 | "label": "0", 142 | "line_color": [ 143 | 0, 144 | 0, 145 | 128, 146 | 255 147 | ], 148 | "fill_color": [ 149 | 0, 150 | 0, 151 | 128, 152 | 255 153 | ], 154 | "points": [ 155 | [ 156 | 0.0, 157 | 216.5 158 | ], 159 | [ 160 | 618.0, 161 | 216.5 162 | ] 163 | ], 164 | "shape_type": "line", 165 | "flags": {} 166 | }, 167 | { 168 | "label": "0", 169 | "line_color": [ 170 | 0, 171 | 0, 172 | 128, 173 | 255 174 | ], 175 | "fill_color": [ 176 | 0, 177 | 0, 178 | 128, 179 | 255 180 | ], 181 | "points": [ 182 | [ 183 | 0.0, 184 | 258.0 185 | ], 186 | [ 187 | 616.0, 188 | 258.0 189 | ] 190 | ], 191 | "shape_type": "line", 192 | "flags": {} 193 | }, 194 | { 195 | "label": "0", 196 | "line_color": [ 197 | 0, 198 | 0, 199 | 128, 200 | 255 201 | ], 202 | "fill_color": [ 203 | 0, 204 | 0, 205 | 128, 206 | 255 207 | ], 208 | "points": [ 209 | [ 210 | 3.0, 211 | 299.0 212 | ], 213 | [ 214 | 615.0, 215 | 299.0 216 | ] 217 | ], 218 | "shape_type": "line", 219 | "flags": {} 220 | }, 221 | { 222 | "label": "1", 223 | "line_color": [ 224 | 0, 225 | 0, 226 | 0, 227 | 128 228 | ], 229 | "fill_color": [ 230 | 0, 231 | 0, 232 | 0, 233 | 128 234 | ], 235 | "points": [ 236 | [ 237 | 0.5, 238 | 1.0 239 | ], 240 | [ 241 | 0.5, 242 | 299.0 243 | ] 244 | ], 245 | "shape_type": "line", 246 | "flags": {} 247 | }, 248 | { 249 | "label": "1", 250 | "line_color": [ 251 | 0, 252 | 0, 253 | 0, 254 | 128 255 | ], 256 | "fill_color": [ 257 | 0, 258 | 0, 259 | 0, 260 | 128 261 | ], 262 | "points": [ 263 | [ 264 | 17.5, 265 | 1.0 266 | ], 267 | [ 268 | 17.5, 269 | 299.0 270 | ] 271 | ], 272 | "shape_type": "line", 273 | "flags": {} 274 | }, 275 | { 276 | "label": "1", 277 | "line_color": [ 278 | 0, 279 | 0, 280 | 0, 281 | 128 282 | ], 283 | "fill_color": [ 284 | 0, 285 | 0, 286 | 0, 287 | 128 288 | ], 289 | "points": [ 290 | [ 291 | 63.5, 292 | 1.0 293 | ], 294 | [ 295 | 63.5, 296 | 299.0 297 | ] 298 | ], 299 | "shape_type": "line", 300 | "flags": {} 301 | }, 302 | { 303 | "label": "1", 304 | "line_color": [ 305 | 0, 306 | 0, 307 | 0, 308 | 128 309 | ], 310 | "fill_color": [ 311 | 0, 312 | 0, 313 | 0, 314 | 128 315 | ], 316 | "points": [ 317 | [ 318 | 95.5, 319 | 1.0 320 | ], 321 | [ 322 | 95.5, 323 | 299.0 324 | ] 325 | ], 326 | "shape_type": "line", 327 | "flags": {} 328 | }, 329 | { 330 | "label": "1", 331 | "line_color": [ 332 | 0, 333 | 0, 334 | 0, 335 | 128 336 | ], 337 | "fill_color": [ 338 | 0, 339 | 0, 340 | 0, 341 | 128 342 | ], 343 | "points": [ 344 | [ 345 | 136.5, 346 | 1.0 347 | ], 348 | [ 349 | 136.5, 350 | 299.0 351 | ] 352 | ], 353 | "shape_type": "line", 354 | "flags": {} 355 | }, 356 | { 357 | "label": "1", 358 | "line_color": [ 359 | 0, 360 | 0, 361 | 0, 362 | 128 363 | ], 364 | "fill_color": [ 365 | 0, 366 | 0, 367 | 0, 368 | 128 369 | ], 370 | "points": [ 371 | [ 372 | 262.0, 373 | 1.0 374 | ], 375 | [ 376 | 262.0, 377 | 299.0 378 | ] 379 | ], 380 | "shape_type": "line", 381 | "flags": {} 382 | }, 383 | { 384 | "label": "1", 385 | "line_color": [ 386 | 0, 387 | 0, 388 | 0, 389 | 128 390 | ], 391 | "fill_color": [ 392 | 0, 393 | 0, 394 | 0, 395 | 128 396 | ], 397 | "points": [ 398 | [ 399 | 387.0, 400 | 1.0 401 | ], 402 | [ 403 | 387.0, 404 | 299.0 405 | ] 406 | ], 407 | "shape_type": "line", 408 | "flags": {} 409 | }, 410 | { 411 | "label": "1", 412 | "line_color": [ 413 | 0, 414 | 0, 415 | 0, 416 | 128 417 | ], 418 | "fill_color": [ 419 | 0, 420 | 0, 421 | 0, 422 | 128 423 | ], 424 | "points": [ 425 | [ 426 | 481.5, 427 | 1.0 428 | ], 429 | [ 430 | 481.5, 431 | 299.0 432 | ] 433 | ], 434 | "shape_type": "line", 435 | "flags": {} 436 | }, 437 | { 438 | "label": "1", 439 | "line_color": [ 440 | 0, 441 | 0, 442 | 0, 443 | 128 444 | ], 445 | "fill_color": [ 446 | 0, 447 | 0, 448 | 0, 449 | 128 450 | ], 451 | "points": [ 452 | [ 453 | 518.5, 454 | 1.0 455 | ], 456 | [ 457 | 518.5, 458 | 299.0 459 | ] 460 | ], 461 | "shape_type": "line", 462 | "flags": {} 463 | }, 464 | { 465 | "label": "1", 466 | "line_color": [ 467 | 0, 468 | 0, 469 | 0, 470 | 128 471 | ], 472 | "fill_color": [ 473 | 0, 474 | 0, 475 | 0, 476 | 128 477 | ], 478 | "points": [ 479 | [ 480 | 555.5, 481 | 1.0 482 | ], 483 | [ 484 | 555.5, 485 | 299.0 486 | ] 487 | ], 488 | "shape_type": "line", 489 | "flags": {} 490 | }, 491 | { 492 | "label": "1", 493 | "line_color": [ 494 | 0, 495 | 0, 496 | 0, 497 | 128 498 | ], 499 | "fill_color": [ 500 | 0, 501 | 0, 502 | 0, 503 | 128 504 | ], 505 | "points": [ 506 | [ 507 | 591.0, 508 | 1.0 509 | ], 510 | [ 511 | 591.0, 512 | 299.0 513 | ] 514 | ], 515 | "shape_type": "line", 516 | "flags": {} 517 | }, 518 | { 519 | "label": "1", 520 | "line_color": [ 521 | 0, 522 | 0, 523 | 0, 524 | 128 525 | ], 526 | "fill_color": [ 527 | 0, 528 | 0, 529 | 0, 530 | 128 531 | ], 532 | "points": [ 533 | [ 534 | 618.0, 535 | 1.0 536 | ], 537 | [ 538 | 618.0, 539 | 298.0 540 | ] 541 | ], 542 | "shape_type": "line", 543 | "flags": {} 544 | }, 545 | { 546 | "label": "1", 547 | "line_color": [ 548 | 0, 549 | 0, 550 | 0, 551 | 128 552 | ], 553 | "fill_color": [ 554 | 0, 555 | 0, 556 | 0, 557 | 128 558 | ], 559 | "points": [ 560 | [ 561 | 421.5, 562 | 17.000030517578125 563 | ], 564 | [ 565 | 421.5, 566 | 298.9999694824219 567 | ] 568 | ], 569 | "shape_type": "line", 570 | "flags": {} 571 | }, 572 | { 573 | "label": "1", 574 | "line_color": [ 575 | 0, 576 | 0, 577 | 0, 578 | 128 579 | ], 580 | "fill_color": [ 581 | 0, 582 | 0, 583 | 0, 584 | 128 585 | ], 586 | "points": [ 587 | [ 588 | 456.5, 589 | 17.0 590 | ], 591 | [ 592 | 456.5, 593 | 298.99993896484375 594 | ] 595 | ], 596 | "shape_type": "line", 597 | "flags": {} 598 | }, 599 | { 600 | "label": "0", 601 | "line_color": [ 602 | 0, 603 | 0, 604 | 0, 605 | 128 606 | ], 607 | "fill_color": [ 608 | 0, 609 | 0, 610 | 0, 611 | 128 612 | ], 613 | "points": [ 614 | [ 615 | 386.35632183908046, 616 | 18.80459770114942 617 | ], 618 | [ 619 | 481.7586206896552, 620 | 18.229885057471265 621 | ] 622 | ], 623 | "shape_type": "line", 624 | "flags": {} 625 | } 626 | ], 627 | "lineColor": [ 628 | 0, 629 | 255, 630 | 0, 631 | 128 632 | ], 633 | "fillColor": [ 634 | 255, 635 | 0, 636 | 0, 637 | 128 638 | ], 639 | "imagePath": "2.jpg", 640 | "imageData": "", 641 | "imageHeight": 300, 642 | "imageWidth": 619 643 | } -------------------------------------------------------------------------------- /train/dataset-line/0/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chineseocr/table-detect/92488f30ffaf486d29791aab63802beeb1eaca32/train/dataset-line/0/5.jpg -------------------------------------------------------------------------------- /train/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | 6 | @author: chineseocr 7 | """ 8 | import sys 9 | 10 | sys.path.append('.') 11 | from table_line import model 12 | from tensorflow.keras.optimizers import Adam 13 | from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau 14 | from sklearn.model_selection import train_test_split 15 | from glob import glob 16 | from image import gen 17 | 18 | if __name__ == '__main__': 19 | filepath = './models/table-line-fine.h5' ##模型权重存放位置 20 | 21 | checkpointer = ModelCheckpoint(filepath=filepath, monitor='loss', verbose=0, save_weights_only=True, 22 | save_best_only=True) 23 | rlu = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, verbose=0, mode='auto', cooldown=0, min_lr=0) 24 | model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['acc']) 25 | 26 | paths = glob('./train/dataset-line/*/*.json') ##table line dataset label with labelme 27 | trainP, testP = train_test_split(paths, test_size=0.1) 28 | print('total:', len(paths), 'train:', len(trainP), 'test:', len(testP)) 29 | batchsize = 4 30 | trainloader = gen(trainP, batchsize=batchsize, linetype=1) 31 | testloader = gen(testP, batchsize=batchsize, linetype=1) 32 | model.fit_generator(trainloader, 33 | steps_per_epoch=max(1, len(trainP) // batchsize), 34 | callbacks=[checkpointer], 35 | validation_data=testloader, 36 | validation_steps=max(1, len(testP) // batchsize), 37 | epochs=30) 38 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Sep 9 23:11:51 2020 5 | utils 6 | @author: chineseocr 7 | """ 8 | import cv2 9 | import numpy as np 10 | 11 | 12 | def nms_box(boxes, scores, score_threshold=0.5, nms_threshold=0.3): 13 | ##nms box 14 | boxes = np.array(boxes) 15 | scores = np.array(scores) 16 | ind = scores > score_threshold 17 | boxes = boxes[ind] 18 | scores = scores[ind] 19 | 20 | def box_to_center(box): 21 | xmin, ymin, xmax, ymax = [round(float(x), 4) for x in box] 22 | w = xmax - xmin 23 | h = ymax - ymin 24 | return [round(xmin, 4), round(ymin, 4), round(w, 4), round(h, 4)] 25 | 26 | newBoxes = [box_to_center(box) for box in boxes] 27 | newscores = [round(float(x), 6) for x in scores] 28 | 29 | index = cv2.dnn.NMSBoxes(newBoxes, newscores, score_threshold=score_threshold, nms_threshold=nms_threshold) 30 | if len(index) > 0: 31 | index = index.reshape((-1,)) 32 | return boxes[index], scores[index] 33 | else: 34 | return np.array([]), np.array([]) 35 | 36 | 37 | from scipy.ndimage import filters, interpolation 38 | from numpy import amin, amax 39 | 40 | 41 | def resize_im(im, scale, max_scale=None): 42 | f = float(scale) / min(im.shape[0], im.shape[1]) 43 | if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: 44 | f = float(max_scale) / max(im.shape[0], im.shape[1]) 45 | return cv2.resize(im, (0, 0), fx=f, fy=f) 46 | 47 | 48 | def estimate_skew_angle(raw, angleRange=[-15, 15]): 49 | """ 50 | 估计图像文字偏转角度, 51 | angleRange:角度估计区间 52 | """ 53 | raw = resize_im(raw, scale=600, max_scale=900) 54 | image = raw - amin(raw) 55 | image = image / amax(image) 56 | m = interpolation.zoom(image, 0.5) 57 | m = filters.percentile_filter(m, 80, size=(20, 2)) 58 | m = filters.percentile_filter(m, 80, size=(2, 20)) 59 | m = interpolation.zoom(m, 1.0 / 0.5) 60 | # w,h = image.shape[1],image.shape[0] 61 | w, h = min(image.shape[1], m.shape[1]), min(image.shape[0], m.shape[0]) 62 | flat = np.clip(image[:h, :w] - m[:h, :w] + 1, 0, 1) 63 | d0, d1 = flat.shape 64 | o0, o1 = int(0.1 * d0), int(0.1 * d1) 65 | flat = amax(flat) - flat 66 | flat -= amin(flat) 67 | est = flat[o0:d0 - o0, o1:d1 - o1] 68 | angles = range(angleRange[0], angleRange[1]) 69 | estimates = [] 70 | for a in angles: 71 | roest = interpolation.rotate(est, a, order=0, mode='constant') 72 | v = np.mean(roest, axis=1) 73 | v = np.var(v) 74 | estimates.append((v, a)) 75 | 76 | _, a = max(estimates) 77 | return a 78 | 79 | 80 | def eval_angle(img, angleRange=[-5, 5]): 81 | """ 82 | 估计图片文字的偏移角度 83 | """ 84 | im = Image.fromarray(img) 85 | degree = estimate_skew_angle(np.array(im.convert('L')), angleRange=angleRange) 86 | im = im.rotate(degree, center=(im.size[0] / 2, im.size[1] / 2), expand=1, fillcolor=(255, 255, 255)) 87 | img = np.array(im) 88 | return img, degree 89 | 90 | 91 | def letterbox_image(image, size, fillValue=[128, 128, 128]): 92 | ''' 93 | resize image with unchanged aspect ratio using padding 94 | ''' 95 | image_h, image_w = image.shape[:2] 96 | w, h = size 97 | new_w = int(image_w * min(w * 1.0 / image_w, h * 1.0 / image_h)) 98 | new_h = int(image_h * min(w * 1.0 / image_w, h * 1.0 / image_h)) 99 | 100 | resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC) 101 | # cv2.imwrite('tmp/test.png', resized_image[...,::-1]) 102 | if fillValue is None: 103 | fillValue = [int(x.mean()) for x in cv2.split(np.array(image))] 104 | boxed_image = np.zeros((size[1], size[0], 3), dtype=np.uint8) 105 | boxed_image[:] = fillValue 106 | boxed_image[:new_h, :new_w, :] = resized_image 107 | 108 | return boxed_image, new_w / image_w, new_h / image_h 109 | 110 | 111 | from skimage import measure 112 | 113 | 114 | def get_table_line(binimg, axis=0, lineW=10): 115 | ##获取表格线 116 | ##axis=0 横线 117 | ##axis=1 竖线 118 | labels = measure.label(binimg > 0, connectivity=2) # 8连通区域标记 119 | regions = measure.regionprops(labels) 120 | if axis == 1: 121 | lineboxes = [minAreaRect(line.coords) for line in regions if line.bbox[2] - line.bbox[0] > lineW] 122 | else: 123 | lineboxes = [minAreaRect(line.coords) for line in regions if line.bbox[3] - line.bbox[1] > lineW] 124 | return lineboxes 125 | 126 | 127 | def sqrt(p1, p2): 128 | return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) 129 | 130 | 131 | def adjust_lines(RowsLines, ColsLines, alph=50): 132 | ##调整line 133 | 134 | nrow = len(RowsLines) 135 | ncol = len(ColsLines) 136 | newRowsLines = [] 137 | newColsLines = [] 138 | for i in range(nrow): 139 | 140 | x1, y1, x2, y2 = RowsLines[i] 141 | cx1, cy1 = (x1 + x2) / 2, (y1 + y2) / 2 142 | for j in range(nrow): 143 | if i != j: 144 | x3, y3, x4, y4 = RowsLines[j] 145 | cx2, cy2 = (x3 + x4) / 2, (y3 + y4) / 2 146 | if (x3 < cx1 < x4 or y3 < cy1 < y4) or (x1 < cx2 < x2 or y1 < cy2 < y2): 147 | continue 148 | else: 149 | r = sqrt((x1, y1), (x3, y3)) 150 | if r < alph: 151 | newRowsLines.append([x1, y1, x3, y3]) 152 | r = sqrt((x1, y1), (x4, y4)) 153 | if r < alph: 154 | newRowsLines.append([x1, y1, x4, y4]) 155 | 156 | r = sqrt((x2, y2), (x3, y3)) 157 | if r < alph: 158 | newRowsLines.append([x2, y2, x3, y3]) 159 | r = sqrt((x2, y2), (x4, y4)) 160 | if r < alph: 161 | newRowsLines.append([x2, y2, x4, y4]) 162 | 163 | for i in range(ncol): 164 | x1, y1, x2, y2 = ColsLines[i] 165 | cx1, cy1 = (x1 + x2) / 2, (y1 + y2) / 2 166 | for j in range(ncol): 167 | if i != j: 168 | x3, y3, x4, y4 = ColsLines[j] 169 | cx2, cy2 = (x3 + x4) / 2, (y3 + y4) / 2 170 | if (x3 < cx1 < x4 or y3 < cy1 < y4) or (x1 < cx2 < x2 or y1 < cy2 < y2): 171 | continue 172 | else: 173 | r = sqrt((x1, y1), (x3, y3)) 174 | if r < alph: 175 | newColsLines.append([x1, y1, x3, y3]) 176 | r = sqrt((x1, y1), (x4, y4)) 177 | if r < alph: 178 | newColsLines.append([x1, y1, x4, y4]) 179 | 180 | r = sqrt((x2, y2), (x3, y3)) 181 | if r < alph: 182 | newColsLines.append([x2, y2, x3, y3]) 183 | r = sqrt((x2, y2), (x4, y4)) 184 | if r < alph: 185 | newColsLines.append([x2, y2, x4, y4]) 186 | 187 | return newRowsLines, newColsLines 188 | 189 | 190 | def minAreaRect(coords): 191 | """ 192 | 多边形外接矩形 193 | """ 194 | rect = cv2.minAreaRect(coords[:, ::-1]) 195 | box = cv2.boxPoints(rect) 196 | box = box.reshape((8,)).tolist() 197 | 198 | box = image_location_sort_box(box) 199 | 200 | x1, y1, x2, y2, x3, y3, x4, y4 = box 201 | degree, w, h, cx, cy = solve(box) 202 | if w < h: 203 | xmin = (x1 + x2) / 2 204 | xmax = (x3 + x4) / 2 205 | ymin = (y1 + y2) / 2 206 | ymax = (y3 + y4) / 2 207 | 208 | else: 209 | xmin = (x1 + x4) / 2 210 | xmax = (x2 + x3) / 2 211 | ymin = (y1 + y4) / 2 212 | ymax = (y2 + y3) / 2 213 | # degree,w,h,cx,cy = solve(box) 214 | # x1,y1,x2,y2,x3,y3,x4,y4 = box 215 | # return {'degree':degree,'w':w,'h':h,'cx':cx,'cy':cy} 216 | return [xmin, ymin, xmax, ymax] 217 | 218 | 219 | def fit_line(p1, p2): 220 | """A = Y2 - Y1 221 | B = X1 - X2 222 | C = X2*Y1 - X1*Y2 223 | AX+BY+C=0 224 | 直线一般方程 225 | """ 226 | x1, y1 = p1 227 | x2, y2 = p2 228 | A = y2 - y1 229 | B = x1 - x2 230 | C = x2 * y1 - x1 * y2 231 | return A, B, C 232 | 233 | 234 | def point_line_cor(p, A, B, C): 235 | ##判断点与之间的位置关系 236 | # 一般式直线方程(Ax+By+c)=0 237 | x, y = p 238 | r = A * x + B * y + C 239 | return r 240 | 241 | 242 | def line_to_line(points1, points2, alpha=10): 243 | """ 244 | 线段之间的距离 245 | """ 246 | x1, y1, x2, y2 = points1 247 | ox1, oy1, ox2, oy2 = points2 248 | A1, B1, C1 = fit_line((x1, y1), (x2, y2)) 249 | A2, B2, C2 = fit_line((ox1, oy1), (ox2, oy2)) 250 | flag1 = point_line_cor([x1, y1], A2, B2, C2) 251 | flag2 = point_line_cor([x2, y2], A2, B2, C2) 252 | 253 | if (flag1 > 0 and flag2 > 0) or (flag1 < 0 and flag2 < 0): 254 | 255 | x = (B1 * C2 - B2 * C1) / (A1 * B2 - A2 * B1) 256 | y = (A2 * C1 - A1 * C2) / (A1 * B2 - A2 * B1) 257 | p = (x, y) 258 | r0 = sqrt(p, (x1, y1)) 259 | r1 = sqrt(p, (x2, y2)) 260 | 261 | if min(r0, r1) < alpha: 262 | 263 | if r0 < r1: 264 | points1 = [p[0], p[1], x2, y2] 265 | else: 266 | points1 = [x1, y1, p[0], p[1]] 267 | 268 | return points1 269 | 270 | 271 | from scipy.spatial import distance as dist 272 | 273 | 274 | def _order_points(pts): 275 | # 根据x坐标对点进行排序 276 | """ 277 | --------------------- 278 | 作者:Tong_T 279 | 来源:CSDN 280 | 原文:https://blog.csdn.net/Tong_T/article/details/81907132 281 | 版权声明:本文为博主原创文章,转载请附上博文链接! 282 | """ 283 | x_sorted = pts[np.argsort(pts[:, 0]), :] 284 | 285 | left_most = x_sorted[:2, :] 286 | right_most = x_sorted[2:, :] 287 | left_most = left_most[np.argsort(left_most[:, 1]), :] 288 | (tl, bl) = left_most 289 | 290 | distance = dist.cdist(tl[np.newaxis], right_most, "euclidean")[0] 291 | (br, tr) = right_most[np.argsort(distance)[::-1], :] 292 | 293 | return np.array([tl, tr, br, bl], dtype="float32") 294 | 295 | 296 | def image_location_sort_box(box): 297 | x1, y1, x2, y2, x3, y3, x4, y4 = box[:8] 298 | pts = (x1, y1), (x2, y2), (x3, y3), (x4, y4) 299 | pts = np.array(pts, dtype="float32") 300 | (x1, y1), (x2, y2), (x3, y3), (x4, y4) = _order_points(pts) 301 | return [x1, y1, x2, y2, x3, y3, x4, y4] 302 | 303 | 304 | def solve(box): 305 | """ 306 | 绕 cx,cy点 w,h 旋转 angle 的坐标 307 | x = cx-w/2 308 | y = cy-h/2 309 | x1-cx = -w/2*cos(angle) +h/2*sin(angle) 310 | y1 -cy= -w/2*sin(angle) -h/2*cos(angle) 311 | 312 | h(x1-cx) = -wh/2*cos(angle) +hh/2*sin(angle) 313 | w(y1 -cy)= -ww/2*sin(angle) -hw/2*cos(angle) 314 | (hh+ww)/2sin(angle) = h(x1-cx)-w(y1 -cy) 315 | 316 | """ 317 | x1, y1, x2, y2, x3, y3, x4, y4 = box[:8] 318 | cx = (x1 + x3 + x2 + x4) / 4.0 319 | cy = (y1 + y3 + y4 + y2) / 4.0 320 | w = (np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) + np.sqrt((x3 - x4) ** 2 + (y3 - y4) ** 2)) / 2 321 | h = (np.sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2) + np.sqrt((x1 - x4) ** 2 + (y1 - y4) ** 2)) / 2 322 | # x = cx-w/2 323 | # y = cy-h/2 324 | sinA = (h * (x1 - cx) - w * (y1 - cy)) * 1.0 / (h * h + w * w) * 2 325 | angle = np.arcsin(sinA) 326 | return angle, w, h, cx, cy 327 | 328 | 329 | def xy_rotate_box(cx, cy, w, h, angle=0, degree=None, **args): 330 | """ 331 | 绕 cx,cy点 w,h 旋转 angle 的坐标 332 | x_new = (x-cx)*cos(angle) - (y-cy)*sin(angle)+cx 333 | y_new = (x-cx)*sin(angle) + (y-cy)*sin(angle)+cy 334 | """ 335 | if degree is not None: 336 | angle = degree 337 | cx = float(cx) 338 | cy = float(cy) 339 | w = float(w) 340 | h = float(h) 341 | angle = float(angle) 342 | x1, y1 = rotate(cx - w / 2, cy - h / 2, angle, cx, cy) 343 | x2, y2 = rotate(cx + w / 2, cy - h / 2, angle, cx, cy) 344 | x3, y3 = rotate(cx + w / 2, cy + h / 2, angle, cx, cy) 345 | x4, y4 = rotate(cx - w / 2, cy + h / 2, angle, cx, cy) 346 | return x1, y1, x2, y2, x3, y3, x4, y4 347 | 348 | 349 | from numpy import cos, sin 350 | 351 | 352 | def rotate(x, y, angle, cx, cy): 353 | angle = angle # *pi/180 354 | x_new = (x - cx) * cos(angle) - (y - cy) * sin(angle) + cx 355 | y_new = (x - cx) * sin(angle) + (y - cy) * cos(angle) + cy 356 | return x_new, y_new 357 | 358 | 359 | def minAreaRectbox(regions, flag=True, W=0, H=0, filtersmall=False, adjustBox=False): 360 | """ 361 | 多边形外接矩形 362 | """ 363 | boxes = [] 364 | for region in regions: 365 | rect = cv2.minAreaRect(region.coords[:, ::-1]) 366 | 367 | box = cv2.boxPoints(rect) 368 | box = box.reshape((8,)).tolist() 369 | box = image_location_sort_box(box) 370 | x1, y1, x2, y2, x3, y3, x4, y4 = box 371 | angle, w, h, cx, cy = solve(box) 372 | if adjustBox: 373 | x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(cx, cy, w + 5, h + 5, angle=0, degree=None) 374 | 375 | if w > 32 and h > 32 and flag: 376 | if abs(angle / np.pi * 180) < 20: 377 | if filtersmall and w < 10 or h < 10: 378 | continue 379 | boxes.append([x1, y1, x2, y2, x3, y3, x4, y4]) 380 | else: 381 | if w * h < 0.5 * W * H: 382 | if filtersmall and w < 8 or h < 8: 383 | continue 384 | boxes.append([x1, y1, x2, y2, x3, y3, x4, y4]) 385 | return boxes 386 | 387 | 388 | from PIL import Image 389 | 390 | 391 | def rectangle(img, boxes): 392 | tmp = np.copy(img) 393 | for box in boxes: 394 | xmin, ymin, xmax, ymax = box[:4] 395 | cv2.rectangle(tmp, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 0), 1, lineType=cv2.LINE_AA) 396 | return Image.fromarray(tmp) 397 | 398 | 399 | def draw_lines(im, bboxes, color=(0, 0, 0), lineW=3): 400 | """ 401 | boxes: bounding boxes 402 | """ 403 | tmp = np.copy(im) 404 | c = color 405 | h, w = im.shape[:2] 406 | 407 | for box in bboxes: 408 | x1, y1, x2, y2 = box[:4] 409 | cv2.line(tmp, (int(x1), int(y1)), (int(x2), int(y2)), c, lineW, lineType=cv2.LINE_AA) 410 | 411 | return tmp 412 | 413 | 414 | def draw_boxes(im, bboxes, color=(0, 0, 0)): 415 | """ 416 | boxes: bounding boxes 417 | """ 418 | tmp = np.copy(im) 419 | c = color 420 | h, w, _ = im.shape 421 | 422 | for box in bboxes: 423 | if type(box) is dict: 424 | x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(**box) 425 | else: 426 | x1, y1, x2, y2, x3, y3, x4, y4 = box[:8] 427 | 428 | cv2.line(tmp, (int(x1), int(y1)), (int(x2), int(y2)), c, 1, lineType=cv2.LINE_AA) 429 | cv2.line(tmp, (int(x2), int(y2)), (int(x3), int(y3)), c, 1, lineType=cv2.LINE_AA) 430 | cv2.line(tmp, (int(x3), int(y3)), (int(x4), int(y4)), c, 1, lineType=cv2.LINE_AA) 431 | cv2.line(tmp, (int(x4), int(y4)), (int(x1), int(y1)), c, 1, lineType=cv2.LINE_AA) 432 | 433 | return tmp 434 | --------------------------------------------------------------------------------