├── .gitignore ├── README.md ├── __pycache__ ├── corner_detection.cpython-37.pyc ├── corner_detection.cpython-38.pyc ├── transform.cpython-37.pyc ├── transform.cpython-38.pyc ├── utils.cpython-37.pyc └── utils.cpython-38.pyc ├── a.out ├── compile.sh ├── conv2d.cpp ├── corner_detection.py ├── data ├── 000026.jpg ├── 000029.jpg ├── 000872.jpg ├── 001201.jpg ├── 001402.jpg └── 001552.jpg ├── example.cpython-38-x86_64-linux-gnu.so ├── main.py ├── transform.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | result/*.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | hahaha~~ 很开心又开始做新项目了,是《数字图像分析》大作业。实现文档提取与矫正。 3 | *整个项目只用到了opencv*的IO操作(包括卷积,生成高斯滤波器等等) 4 | 5 | # 更新 6 | - 2020.07.10 7 | - 使用 [NumCpp](https://github.com/dpilger26/NumCpp) / [pybind11](https://github.com/pybind/pybind11) 与C++混合编程, 加速卷积过程 8 | ![affQLq.png](https://s1.ax1x.com/2020/08/07/affQLq.png) 9 | - 2020.07.07 10 | - 使用im2col代替原来三重for循环的卷积形式,整体运行时间能减少一半 11 | [![UFaXB8.png](https://s1.ax1x.com/2020/07/07/UFaXB8.png)](https://imgchr.com/i/UFaXB8) 12 | 13 | 14 | # 运行纯python 15 | > 默认. 有时间我添加下参数选项, 选择python还是c++版本 16 | 17 | ## 环境 18 | - `pip install opencv-python numpy` 19 | ## run 20 | - `python main.py` 21 | - 在`main.py`中可以修改需要提取的文件,结果保存在`result`文件夹中 22 | 23 | # 运行cpp混编 24 | > 非默认. 有时间我添加下参数选项, 选择python还是c++版本 25 | 26 | - 配置好[NumCpp](https://github.com/dpilger26/NumCpp) / [pybind11](https://github.com/pybind/pybind11) 27 | - 运行`compile.sh`脚本 28 | - 在`corner_detection.py`文件中修改c++版本卷积 29 | 30 | # 技术栈 31 | ## S&G ? 32 | SG是自己乱起的,包含了两个技术栈: 33 | - S: Susan角点检测 34 | - G: Geometic几何校正 35 | 36 | ## H&G ! 37 | > 通过实验发现直接检测角点根本实现不了,图片中角点太多,且不能加入先验进行过滤。遂通过 38 | 检测直线,求角点实现 39 | 40 | - H: Hough哈夫变换检测直线,得到角点 41 | - G: Geometic几何校正 42 | 43 | # 结果 44 | ![000026.jpg](https://i.loli.net/2019/01/05/5c2ffcc192ae4.jpg) 45 | ![000026.jpg](https://i.loli.net/2019/01/05/5c2ffcfe5021c.jpg) 46 | ![000872.jpg](https://i.loli.net/2019/01/05/5c2ffcc194ee2.jpg) 47 | ![000872.jpg](https://i.loli.net/2019/01/05/5c2ffcfe4e4c4.jpg) 48 | ![001402.jpg](https://i.loli.net/2019/01/05/5c2ffcc1a6844.jpg) 49 | ![001402.jpg](https://i.loli.net/2019/01/05/5c2ffcfe51d36.jpg) 50 | ![001552.jpg](https://i.loli.net/2019/01/05/5c2ffcc1a7a7a.jpg) 51 | ![001552.jpg](https://i.loli.net/2019/01/05/5c2ffcfe53f1c.jpg) 52 | ![001201.jpg](https://i.loli.net/2019/01/05/5c2ffcc1a78a6.jpg) 53 | ![001201.jpg](https://i.loli.net/2019/01/05/5c2ffcfe55b12.jpg) 54 | -------------------------------------------------------------------------------- /__pycache__/corner_detection.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/corner_detection.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/corner_detection.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/corner_detection.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/transform.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/transform.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/transform.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/a.out -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #c++ -Wall -shared -std=c++14 -fPIC `python3 -m pybind11 --includes` conv2d.cpp -o example`python3-config --extension-suffix` 2 | c++ -O3 -Wall -shared -std=c++14 -fPIC `python3 -m pybind11 --includes` conv2d.cpp -o example`python3-config --extension-suffix` -------------------------------------------------------------------------------- /conv2d.cpp: -------------------------------------------------------------------------------- 1 | #define INCLUDE_PYTHON_INTERFACE 1 2 | #define INCLUDE_PYBIND_PYTHON_INTERFACE 1 3 | 4 | #include "pybind11/pybind11.h" 5 | #include "pybind11/numpy.h" 6 | #include "NumCpp.hpp" 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | namespace py=pybind11; 12 | 13 | template 14 | py::array_t cpp_dot(py::array_t inArray1, py::array_t inArray2) 15 | { 16 | auto array1 = nc::pybindInterface::pybind2nc(inArray1); 17 | auto array2 = nc::pybindInterface::pybind2nc(inArray2); 18 | auto dotProduct = nc::dot(array1, array2); 19 | return nc::pybindInterface::nc2pybind(dotProduct); 20 | } 21 | 22 | template 23 | py::array_t conv2d_pure(int s,string mode, py::array_t kernel_py, py::array_t input_py) 24 | { 25 | auto kernel = nc::pybindInterface::pybind2nc(kernel_py); 26 | auto image = nc::pybindInterface::pybind2nc(input_py); 27 | int k=kernel.shape().rows; 28 | int h=image.shape().rows; 29 | int w=image.shape().cols; 30 | int p_h,p_w; 31 | if(mode=="same"){ 32 | p_h=(s*(h-1)+k-h)/2; 33 | p_w=(s*(w-1)+k-w)/2; 34 | } 35 | else if(mode=="valid"){ 36 | p_h=0; 37 | p_w=0; 38 | } 39 | else if(mode=="full"){ 40 | p_h=k-1; 41 | p_w=k-1; 42 | } 43 | else{ 44 | assert(false); 45 | } 46 | int out_h = (h + 2 * p_h - k) /s + 1; 47 | int out_w = (w + 2 * p_w - k) /s + 1; 48 | //填充后的image 49 | auto padded_img = nc::zeros(h + 2 * p_h, w + 2 * p_w); 50 | padded_img.put(nc::Slice(p_h,p_h + h), nc::Slice(p_w,p_w + w),image) ; 51 | auto image_mat=nc::zeros(out_h ,out_w); 52 | // nc::NdArray shapedKernel=kernel.reshape(k*k,1); 53 | for(int i=0;i(padded_img(x,y)*kernel(x-i*s,y-j*s)); 59 | } 60 | } 61 | image_mat.put(i,j,sum); 62 | // auto window=padded_img(nc::Slice(i * s,(i * s +k)),nc::Slice(j*s,j*s+k)); 63 | // image_mat.put(i,j,nc::dot(window.reshape(1,k*k),shapedKernel).item()); 64 | } 65 | } 66 | return nc::pybindInterface::nc2pybind(image_mat); 67 | } 68 | 69 | 70 | 71 | template 72 | py::array_t conv2d(int s,string mode, py::array_t kernel_py, py::array_t input_py) 73 | { 74 | auto kernel = nc::pybindInterface::pybind2nc(kernel_py); 75 | auto image = nc::pybindInterface::pybind2nc(input_py); 76 | int k=kernel.shape().rows; 77 | int h=image.shape().rows; 78 | int w=image.shape().cols; 79 | int p_h,p_w; 80 | if(mode=="same"){ 81 | p_h=(s*(h-1)+k-h)/2; 82 | p_w=(s*(w-1)+k-w)/2; 83 | } 84 | else if(mode=="valid"){ 85 | p_h=0; 86 | p_w=0; 87 | } 88 | else if(mode=="full"){ 89 | p_h=k-1; 90 | p_w=k-1; 91 | } 92 | else{ 93 | assert(false); 94 | } 95 | int out_h = (h + 2 * p_h - k) /s + 1; 96 | int out_w = (w + 2 * p_w - k) /s + 1; 97 | //填充后的image 98 | auto padded_img = nc::zeros(h + 2 * p_h, w + 2 * p_w); 99 | padded_img.put(nc::Slice(p_h,p_h + h), nc::Slice(p_w,p_w + w),image) ; 100 | auto image_mat=nc::zeros(out_h * out_w, k*k); 101 | int row=0; 102 | nc::NdArray window; 103 | for(int i=0;i(image_mat, kernel.reshape(k*k,1)).reshape(out_h,out_w); 111 | return nc::pybindInterface::nc2pybind(dotProduct); 112 | } 113 | 114 | 115 | template 116 | void f(nc::NdArray* image_mat,nc::NdArray * padded_img,int out_h_start,int out_h,int out_w,int row,int s,int k){ 117 | nc::NdArray window; 118 | 119 | for(int i=out_h_start;iput(row,image_mat->cSlice(),window.flatten()); 123 | ++row; 124 | } 125 | } 126 | } 127 | 128 | template 129 | py::array_t conv2d_multi(int s,string mode, py::array_t kernel_py, py::array_t input_py) 130 | { 131 | auto kernel = nc::pybindInterface::pybind2nc(kernel_py); 132 | auto image = nc::pybindInterface::pybind2nc(input_py); 133 | int k=kernel.shape().rows; 134 | int h=image.shape().rows; 135 | int w=image.shape().cols; 136 | int p_h,p_w; 137 | if(mode=="same"){ 138 | p_h=(s*(h-1)+k-h)/2; 139 | p_w=(s*(w-1)+k-w)/2; 140 | } 141 | else if(mode=="valid"){ 142 | p_h=0; 143 | p_w=0; 144 | } 145 | else if(mode=="full"){ 146 | p_h=k-1; 147 | p_w=k-1; 148 | } 149 | else{ 150 | assert(false); 151 | } 152 | int out_h = (h + 2 * p_h - k) /s + 1; 153 | int out_w = (w + 2 * p_w - k) /s + 1; 154 | //填充后的image 155 | auto padded_img = nc::zeros(h + 2 * p_h, w + 2 * p_w); 156 | padded_img.put(nc::Slice(p_h,p_h + h), nc::Slice(p_w,p_w + w),image) ; 157 | auto image_mat=nc::zeros(out_h * out_w, k*k); 158 | 159 | thread t1(f,&image_mat,&padded_img,0,out_h/2,out_w,0,s,k); 160 | thread t2(f,&image_mat,&padded_img,out_h/2,out_h,out_w,out_h * out_w/2,s,k); 161 | t1.join(); 162 | t2.join(); 163 | auto dotProduct = nc::dot(image_mat, kernel.reshape(k*k,1)).reshape(out_h,out_w); 164 | return nc::pybindInterface::nc2pybind(dotProduct); 165 | } 166 | 167 | PYBIND11_MODULE(example, m) 168 | { 169 | m.doc() = "This is an example of using NumCpp with python and NumPy."; 170 | 171 | m.def("cpp_dot_double", &cpp_dot, 172 | pybind11::arg("inArray1"), 173 | pybind11::arg("inArray2"), 174 | "Returns the dot project of the two arrays."); 175 | 176 | m.def("conv2d", &conv2d, 177 | pybind11::arg("s"), 178 | pybind11::arg("mode"), 179 | pybind11::arg("kernel_py"), 180 | pybind11::arg("input_py"), 181 | "Returns the dot project of the two arrays."); 182 | 183 | m.def("conv2d_multi", &conv2d_multi, 184 | pybind11::arg("s"), 185 | pybind11::arg("mode"), 186 | pybind11::arg("kernel_py"), 187 | pybind11::arg("input_py"), 188 | "Returns the dot project of the two arrays."); 189 | 190 | m.def("conv2d_pure", &conv2d_pure, 191 | pybind11::arg("s"), 192 | pybind11::arg("mode"), 193 | pybind11::arg("kernel_py"), 194 | pybind11::arg("input_py"), 195 | "Returns the dot project of the two arrays."); 196 | } 197 | 198 | //#include 199 | // 200 | //int add(int i, int j) { 201 | // return i + j; 202 | //} 203 | // 204 | //PYBIND11_MODULE(example, m) { 205 | // m.doc() = "pybind11 example plugin"; // optional module docstring 206 | // 207 | // m.def("add", &add, "A function which adds two numbers"); 208 | //} -------------------------------------------------------------------------------- /corner_detection.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | import matplotlib.pyplot as plt 5 | import example 6 | ''' 7 | 可调参数 8 | @get_grad_img: grad阈值 9 | ''' 10 | 11 | 12 | def filter2D(img, kernel): 13 | # 计算需要padding的大小 14 | assert kernel.shape[0] == kernel.shape[1] 15 | pad_size = int(kernel.shape[0] / 2) # 每个边应该padding的厚度 16 | kernel_size = kernel.shape[0] 17 | row, col = img.shape 18 | 19 | # TODO: 用边缘像素点填充 20 | # top_padding_content = np.tile(img[0, :], (pad_size, 1)) 21 | # bottom_padding_content = np.tile(img[-1, :], (pad_size, 1)) 22 | # print(top_padding_content.shape) 23 | # left_padding_content = np.tile(img[:, 0], (1, pad_size )) 24 | # right_padding_content = np.tile(img[:, -1], (1, pad_size )) 25 | # img = np.vstack((top_padding_content, img, bottom_padding_content)) 26 | # img = np.hstack((left_padding_content, img, right_padding_content)) 27 | 28 | # 用0填充 29 | top_padding_content = np.zeros((pad_size, col)) 30 | left_padding_content = np.zeros((row + 2 * pad_size, pad_size)) 31 | img = np.vstack((top_padding_content, img, top_padding_content)) 32 | img = np.hstack((left_padding_content, img, left_padding_content)) 33 | grad_img = np.zeros((row, col)) 34 | for i in range(img.shape[0] - kernel_size + 1): # new_size- filter_size+1 @important 35 | for j in range(img.shape[1] - kernel_size + 1): 36 | img_part = img[i:i + kernel_size, j:j + kernel_size] 37 | result = np.sum( 38 | img_part * kernel 39 | ) 40 | grad_img[i][j] = result 41 | # 边界平滑 42 | # done: 能够适应任何size的kernel 43 | top_padding_content = np.tile(grad_img[pad_size, :], (pad_size, 1)) 44 | grad_img[0:pad_size, :] = top_padding_content 45 | 46 | bottom_padding_content = np.tile(grad_img[-(pad_size + 1), :], (pad_size, 1)) 47 | grad_img[-pad_size:, :] = bottom_padding_content 48 | 49 | left_padding_content = np.tile(np.expand_dims(grad_img[:, pad_size], axis=1), (1, pad_size)) 50 | grad_img[:, 0:pad_size] = left_padding_content 51 | 52 | top_padding_content = np.tile(np.expand_dims(grad_img[:, -(pad_size + 1)], axis=1), (1, pad_size)) 53 | grad_img[:, -pad_size:] = top_padding_content 54 | 55 | # top_padding_content = grad_img[1, :] 56 | # grad_img[0, :] = top_padding_content 57 | # top_padding_content = grad_img[-2, :] 58 | # grad_img[-1, :] = top_padding_content 59 | # top_padding_content = grad_img[:, 1] 60 | # grad_img[:, 0] = top_padding_content 61 | # top_padding_content = grad_img[:, -2] 62 | # grad_img[:, -1] = top_padding_content 63 | return grad_img 64 | 65 | 66 | def make_gauss_filter(size, std_D): 67 | assert size % 2 == 1 68 | start = int(size / 2) 69 | x = np.arange(-start, start + 1) 70 | x = np.ravel(np.tile(x, (size, 1))) 71 | y = np.arange(start, -start - 1, step=-1) 72 | y = np.repeat(y, size) 73 | power = -(x * x + y * y) / (2 * std_D * std_D) 74 | filter = np.exp(power) / (2 * np.pi * std_D * std_D) 75 | filter = filter / np.sum(filter) 76 | return np.reshape(filter, (size, size)) 77 | 78 | 79 | def get_grad_img(gray_img): 80 | filter_size = 5 81 | gauss = make_gauss_filter(filter_size, 1.1) 82 | 83 | # 1.朴素卷积 84 | # smoothed_img = filter2D(gray_img, kernel=gauss) 85 | 86 | # 2. im2col卷积 87 | from utils import Conv2d_MULTITHREADS as Conv2d 88 | from utils import Conv2d 89 | filter2D=Conv2d(filter_size,1,1,1,weight=gauss[np.newaxis,:],mode='valid') 90 | smoothed_img = filter2D.filter(gray_img) 91 | 92 | # 3. c++ im2col 93 | # smoothed_img=example.conv2d(1,"valid",gauss,gray_img) 94 | 95 | # 4. c++ im2col threads 96 | # smoothed_img = example.conv2d_multi(1, "valid", gauss, gray_img) 97 | 98 | # 5. conv pure 99 | # smoothed_img = example.conv2d_pure(1, "valid", gauss, gray_img) 100 | 101 | row_ind, col_ind = np.where(smoothed_img > 255) 102 | smoothed_img[row_ind, col_ind] = 255 103 | # 显示灰度图 104 | # cv2.imshow('wind', np.uint8(smoothed_img)) 105 | # cv2.waitKey(0) 106 | 107 | laplace = np.array( 108 | [ 109 | [0, -1, 0], 110 | [-1, 4, -1], 111 | [0, -1, 0] 112 | ], 113 | dtype=np.float32 114 | ) 115 | 116 | # 1. 117 | # grad_img = filter2D(smoothed_img, kernel=laplace) 118 | 119 | # 2. 120 | filter2D = Conv2d(3, 1, 1, 1, weight=laplace[np.newaxis, :],mode='valid') 121 | grad_img=filter2D(smoothed_img[np.newaxis,:])[0] 122 | 123 | # 3. 124 | # grad_img=example.conv2d(1,"valid",laplace,smoothed_img) 125 | 126 | # 4. 127 | # grad_img = example.conv2d_multi(1, "valid", laplace, smoothed_img) 128 | 129 | # 5. 130 | # grad_img = example.conv2d_pure(1, "valid", laplace, smoothed_img) 131 | 132 | # plt.imshow(grad_img) 133 | # plt.show() 134 | grad_img = np.where(grad_img > 7, grad_img, 0) 135 | # plt.imshow(grad_img,cmap="gray") 136 | # plt.show() 137 | return grad_img 138 | 139 | 140 | def houghLines(grad_img): 141 | """ 142 | 从梯度图进行hough变换,检测直线 143 | :param grad_img: 梯度图 144 | :return: 检测出来的直线的极坐标表示 145 | """ 146 | # --------------------------------------投票------------------------------------------ 147 | rho_max = math.sqrt(grad_img.shape[0] * grad_img.shape[0] + grad_img.shape[1] * grad_img.shape[1]) 148 | m, n = 180, 2000 149 | theta_range = np.linspace(0, np.pi, m) 150 | rho_range = np.linspace(-rho_max, rho_max, n) 151 | # 投票的表格 152 | vote_table = np.zeros(shape=(m, n)) 153 | 154 | row_cor, col_cor = np.where(grad_img > 0) # 挑出有选举权的点,假设有K个 155 | cor_mat = np.stack((row_cor, col_cor), axis=1) # K*2 156 | K = cor_mat.shape[0] 157 | 158 | cos_theta = np.cos(theta_range) 159 | sin_theta = np.sin(theta_range) 160 | # 这是一个大坑,row实际对应的是y 161 | # theta_mat = np.stack((cos_theta, sin_theta), axis=0) # 2*m 162 | theta_mat = np.stack((sin_theta, cos_theta), axis=0) # 2*m 163 | 164 | y_mat = np.matmul(cor_mat, theta_mat) # K*m 165 | 166 | rho_ind = ( 167 | (y_mat - (-rho_max)) * (n - 1) / (rho_max - (-rho_max)) 168 | ).astype(np.int32) # K*m 169 | rho_ind = np.ravel(rho_ind, order='F') # 在列方向stack 170 | 171 | theta_ind = np.arange(0, m)[:, np.newaxis] 172 | theta_ind = np.repeat(theta_ind, K) 173 | 174 | np.add.at(vote_table, (theta_ind, rho_ind), 1) # 在vote_table中投票 175 | # ----------------------------------过滤 1: 选出不同的直线------------------------------- 176 | # 取出top_k条不同的直线 177 | top_k = 4 178 | # unravel_index: https://www.jianshu.com/p/a7e19847bd39 -> 将一维index转换到(m,n)维上 179 | argmax_ind = np.dstack(np.unravel_index(np.argsort(-vote_table.ravel(), ), (m, n))) 180 | argmax_ind = argmax_ind[0, :, :] 181 | valid_lines = np.zeros((top_k, 2)) 182 | exist_num = 0 183 | for i in range(0, m * n): 184 | row_ind, col_ind = tuple(argmax_ind[i]) 185 | theta = theta_range[row_ind] 186 | rho = rho_range[col_ind] 187 | if is_new_line(theta, rho, valid_lines, exist_num): 188 | # 遇到新的线了 189 | valid_lines[exist_num][0] = theta 190 | valid_lines[exist_num][1] = rho 191 | exist_num += 1 192 | if exist_num == 4: 193 | area, points = get_area(valid_lines) 194 | too_small = is_too_small(area, grad_img.shape) 195 | if too_small: 196 | exist_num -= 1 197 | if exist_num >= top_k: 198 | break 199 | return valid_lines 200 | 201 | 202 | def is_too_small(area, shape): 203 | img_area = shape[0] * shape[1] 204 | rate = area / img_area 205 | # print(rate) 206 | if rate < 1 / 3: 207 | return True 208 | 209 | 210 | def get_area(polar_lines): 211 | # 1. 为了化简计算,把直线分成接近水平/垂直, 两种直线 212 | vert_ind = np.abs(polar_lines[:, 0] - 1.5) > 0.5 213 | vert_lines = polar_lines[vert_ind, :] # 接近垂直的直线 214 | hori_lines = polar_lines[np.logical_not(vert_ind), :] # 接近水平的直线 215 | 216 | # 排序: 为了能够组成正方形,先进行排序 217 | test = np.argsort(np.abs(vert_lines[:, 1])) 218 | vert_lines = vert_lines[test, :] 219 | 220 | test = np.argsort(np.abs(hori_lines[:, 1])) 221 | hori_lines = hori_lines[test, :] 222 | 223 | # 2. 计算交点 224 | points = [] 225 | num_vert_lines = vert_lines.shape[0] 226 | num_hori_lines = hori_lines.shape[0] 227 | for i in range(num_vert_lines): 228 | for j in range(num_hori_lines): 229 | point = get_intersection_points(vert_lines[i], hori_lines[j]) 230 | points.append([point[1], point[0]]) 231 | 232 | # 3. 近似面积最大的为角点 233 | points = np.array(points).reshape(num_vert_lines, num_hori_lines, 2) 234 | max_area = 0 235 | for i in range(num_vert_lines - 1): 236 | for j in range(num_hori_lines - 1): 237 | left_top = points[i][j] 238 | left_bottom = points[i][j + 1] 239 | right_top = points[i + 1][j] 240 | right_bottom = points[i + 1][j + 1] 241 | area = get_approx_area(left_top, left_bottom, right_top, right_bottom) 242 | if area > max_area: 243 | max_area = area 244 | point_seq = (left_top, right_top, right_bottom, left_bottom) 245 | return max_area, point_seq 246 | 247 | 248 | def detect_corners(gray_img): 249 | grad_img = get_grad_img(gray_img) 250 | polar_lines = houghLines(grad_img) 251 | # 绘制检测到的直线 252 | for i in range(polar_lines.shape[0]): 253 | theta, rho = tuple(polar_lines[i]) 254 | # print(theta, rho) 255 | a = np.cos(theta) 256 | b = np.sin(theta) 257 | x0 = a * rho 258 | y0 = b * rho 259 | x1 = int(x0 + 1000 * (-b)) 260 | y1 = int(y0 + 1000 * (a)) 261 | x2 = int(x0 - 1000 * (-b)) 262 | y2 = int(y0 - 1000 * (a)) 263 | # 逐条显示画出来的线 264 | # cv2.line(grad_img, (x1, y1), (x2, y2), (255, 255, 0), 2) 265 | # cv2.imshow('windows', grad_img) 266 | # cv2.waitKey(0) 267 | # -------------------------------计算交点---------------------------------- 268 | # 1. 为了化简计算,把直线分成接近水平/垂直, 两种直线 269 | vert_ind = np.abs(polar_lines[:, 0] - 1.5) > 0.5 270 | vert_lines = polar_lines[vert_ind, :] # 接近垂直的直线 271 | hori_lines = polar_lines[np.logical_not(vert_ind), :] # 接近水平的直线 272 | 273 | # 排序: 为了能够组成正方形,先进行排序 274 | test = np.argsort(np.abs(vert_lines[:, 1])) 275 | vert_lines = vert_lines[test, :] 276 | 277 | test = np.argsort(np.abs(hori_lines[:, 1])) 278 | hori_lines = hori_lines[test, :] 279 | # 2. 计算交点 280 | points = [] 281 | num_vert_lines = vert_lines.shape[0] 282 | num_hori_lines = hori_lines.shape[0] 283 | for i in range(num_vert_lines): 284 | for j in range(num_hori_lines): 285 | point = get_intersection_points(vert_lines[i], hori_lines[j]) 286 | points.append([point[1], point[0]]) 287 | # cv2.circle(grad_img, tuple(point), 10, (255, 0, 0), 2) # 画出交点 288 | # cv2.imshow('windows',grad_img) 289 | # cv2.waitKey(0) 290 | 291 | # 3. 近似面积最大的为角点 292 | points = np.array(points).reshape(num_vert_lines, num_hori_lines, 2) 293 | max_area = 0 294 | for i in range(num_vert_lines - 1): 295 | for j in range(num_hori_lines - 1): 296 | left_top = points[i][j] 297 | left_bottom = points[i][j + 1] 298 | right_top = points[i + 1][j] 299 | right_bottom = points[i + 1][j + 1] 300 | area = get_approx_area(left_top, left_bottom, right_top, right_bottom) 301 | if area > max_area: 302 | max_area = area 303 | point_seq = (left_top, right_top, right_bottom, left_bottom) 304 | # for c in range(len(point_seq)): 305 | # cv2.circle(grad_img, (point_seq[c][1],point_seq[c][0]), 10, (255, 0, 0), 2) # 画出交点 306 | # cv2.imshow('windows',grad_img) 307 | # cv2.waitKey(0) 308 | return grad_img, np.array(point_seq) 309 | 310 | 311 | def get_approx_area(p1, p2, p3, p4): 312 | top_line = np.abs( 313 | p1[1] - p3[1] 314 | ) 315 | bottem_line = np.abs( 316 | p2[1] - p4[1] 317 | ) 318 | left_line = np.abs( 319 | p1[0] - p2[0] 320 | ) 321 | right_line = np.abs( 322 | p3[0] - p4[0] 323 | ) 324 | return (top_line + bottem_line) * (left_line + right_line) 325 | 326 | 327 | def is_new_line(theta, rho, valid_data, exist_num): 328 | # 保证检测到2条垂直的线,两条水平的线 329 | vertical_line_num = np.abs(valid_data[:exist_num, 0] - 1.5) > 0.5 330 | vertical_line_num = np.sum(vertical_line_num) 331 | if vertical_line_num >= 2 and np.abs(theta - 1.5) > 0.5: 332 | return False 333 | hori_line_num = np.abs(valid_data[:exist_num, 0] - 1.5) <= 0.5 334 | hori_line_num = np.sum(hori_line_num) 335 | if hori_line_num >= 2 and np.abs(theta - 1.5) <= 0.5: 336 | return False 337 | 338 | for i in range(exist_num): 339 | theta = 0 if theta - 3.1 > 0 else theta # 角度3.1...和零度是一样的 340 | if theta - valid_data[i][0] < 0.2 and np.square(np.abs(rho) - np.abs(valid_data[i][1])) < 1000: 341 | # 角度相近 & rho相近 342 | return False 343 | return True 344 | 345 | 346 | def get_intersection_points(line1, line2): 347 | """ 348 | 由极坐标表示的line1, line2,求出角点(矩阵求解方程) 349 | :param line1: [theta1, rho1] 350 | :param line2: [theta2,rho2] 351 | :return: row, col 352 | """ 353 | rho_mat = np.array( 354 | [line1[1], line2[1]] 355 | ) 356 | theta_mat = np.array( 357 | 358 | [[np.cos(line1[0]), np.sin(line1[0])], 359 | [np.cos(line2[0]), np.sin(line2[0])]] 360 | ) 361 | inv_theta_mat = np.linalg.inv(theta_mat) 362 | result = np.matmul(inv_theta_mat, rho_mat).astype(np.int32) 363 | return result.astype(np.int32) # 由于是坐标,需要改成int 364 | 365 | 366 | def harries(gray): 367 | # corners = cv2.cornerHarris(img, 2, 3, 0.04) 368 | corners = cv2.goodFeaturesToTrack(gray, 100, 0.01, 10) 369 | corners = np.int0(corners) 370 | for corner in corners: 371 | x, y = corner.ravel() 372 | cv2.circle(gray, (x, y), 3, 255, -1) 373 | return gray 374 | 375 | 376 | if __name__ == "__main__": 377 | path = "./data/000026.jpg" 378 | # path = './data/000872.jpg' 379 | # path = './data/001201.jpg' 380 | # path = './data/001402.jpg' 381 | # path = './data/001552.jpg' 382 | path = "./data/1.jpg" 383 | img = cv2.imread(path) 384 | img = cv2.resize(img, (504, 738)) 385 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 386 | detect_img, point_seq = detect_corners(gray_img) 387 | print(np.array(point_seq)) 388 | # plt.imshow(detect_img) 389 | # plt.show() 390 | # cv2.imshow('dst', detect_img) 391 | # cv2.waitKey(0) 392 | -------------------------------------------------------------------------------- /data/000026.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/000026.jpg -------------------------------------------------------------------------------- /data/000029.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/000029.jpg -------------------------------------------------------------------------------- /data/000872.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/000872.jpg -------------------------------------------------------------------------------- /data/001201.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/001201.jpg -------------------------------------------------------------------------------- /data/001402.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/001402.jpg -------------------------------------------------------------------------------- /data/001552.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/data/001552.jpg -------------------------------------------------------------------------------- /example.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FantDing/Image-document-extract-and-correction/634f8e4e52bb5345725453fa5e8d5b31844ea364/example.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from transform import cvtColor, warpAffine 4 | from corner_detection import detect_corners 5 | 6 | if __name__ == "__main__": 7 | # filename = "000026.jpg" 8 | # filename = "000029.jpg" 9 | filename= '000872.jpg' 10 | # filename= '001201.jpg' 11 | # filename= '001402.jpg' 12 | # filename = '001552.jpg' 13 | path = os.path.join('./data', filename) 14 | src_img = cv2.imread(path) 15 | gray_img = cvtColor(src_img) 16 | # 检测角点:从左上角开始,顺时针 17 | detect_img, detected_corner = detect_corners(gray_img) 18 | # print(detect_img.shape) 19 | # print(detected_corner) 20 | tar_img = warpAffine(src_img, detected_corner, (504, 378)) 21 | cv2.imwrite(os.path.join('./result', filename), tar_img) 22 | -------------------------------------------------------------------------------- /transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | from corner_detection import detect_corners 5 | 6 | 7 | def build_equ(four_corners): 8 | X = np.zeros(shape=(4, 4)) 9 | for i, (x0, y0) in enumerate(four_corners): 10 | X[i, :] = [x0, y0, x0 * y0, 1] 11 | return X 12 | 13 | 14 | def warpAffine(src_img, detected_corner,size): 15 | ''' 16 | 17 | :param src_img: 18 | :param detected_corner: 19 | :param size: 目标大小 20 | :return: 21 | ''' 22 | height, width = size 23 | target_corner = np.array([[0, 0], [0, width - 1], [height - 1, width - 1], [height - 1, 0]], dtype=np.int32) 24 | # 计算x,y变化矩阵T_x,T_y 25 | leftMatrix = build_equ(target_corner) 26 | inversed_leftMat = np.linalg.inv(leftMatrix) 27 | X1 = detected_corner[:, 0] 28 | T_x = np.matmul(inversed_leftMat, X1) 29 | 30 | Y1 = detected_corner[:, 1] 31 | T_y = np.matmul(inversed_leftMat, Y1) 32 | 33 | tar_img = fast_bi_inter(src_img, height, width, T_x, T_y) 34 | return tar_img 35 | 36 | 37 | def fast_bi_inter(src_img, height, width, T_x, T_y): 38 | """ 39 | 使用矩阵计算,实现快速双线性插值 40 | :param src_img: src image 41 | :param height: target height 42 | :param width: target width 43 | :return: 44 | """ 45 | row_same = np.arange(0, height) 46 | row_same = row_same[:, np.newaxis] 47 | row_same = np.tile(row_same, (1, width)) 48 | 49 | col_same = np.arange(0, width) 50 | col_same = col_same[np.newaxis, :] 51 | col_same = np.tile(col_same, (height, 1)) 52 | 53 | mix_para = row_same * col_same 54 | 55 | ones_para = np.ones(shape=(height, width)) 56 | 57 | paras = np.stack((row_same, col_same, mix_para, ones_para), axis=2) 58 | 59 | paras = np.reshape(paras, (height * width, 4)) 60 | 61 | # 找到原图中的坐标 62 | x = np.matmul(paras, T_x) 63 | y = np.matmul(paras, T_y) 64 | # 确定p1,p2,q1,q2 65 | x_ceil = np.ceil(x).astype(int) 66 | x_floor = np.floor(x).astype(int) 67 | 68 | y_ceil = np.ceil(y).astype(int) 69 | y_floor = np.floor(y).astype(int) 70 | 71 | p1 = x - x_floor 72 | # print(p1.shape) #(190512,) 73 | p2 = 1 - p1 74 | q1 = y - y_floor 75 | q2 = 1 - q1 76 | 77 | tar_img = np.expand_dims(p2 * q2, axis=1) * src_img[x_floor, y_floor, :] + \ 78 | np.expand_dims(p1 * q2, axis=1) * src_img[x_ceil, y_floor, :] + \ 79 | np.expand_dims(q1 * p1, axis=1) * src_img[x_ceil, y_ceil, :] + \ 80 | np.expand_dims(q1 * p2, axis=1) * src_img[x_floor, y_ceil, :] 81 | 82 | tar_img = np.reshape(tar_img, (height, width, 3)) 83 | return tar_img 84 | 85 | 86 | def main(): 87 | # 图片角点顺序如下: 88 | # 0 1 89 | # 3 2 90 | # src_img_path="./data/000026.jpg" 91 | # detected_corner = [[87,96], [99,330], [459,339], [440,57]] 92 | 93 | src_img_path = "./data/000872.jpg" 94 | detected_corner = [[119, 74], [129, 296], [442, 306], [444, 55]] 95 | 96 | detected_corner = np.array(detected_corner, dtype=np.int32) 97 | 98 | height, width = [504, 378] 99 | target_corner = np.array([[0, 0], [0, width - 1], [height - 1, width - 1], [height - 1, 0]], dtype=np.int32) 100 | 101 | # 计算x,y变化矩阵T_x,T_y 102 | leftMatrix = build_equ(target_corner) 103 | inversed_leftMat = np.linalg.inv(leftMatrix) 104 | X1 = detected_corner[:, 0] 105 | T_x = np.matmul(inversed_leftMat, X1) 106 | 107 | Y1 = detected_corner[:, 1] 108 | T_y = np.matmul(inversed_leftMat, Y1) 109 | 110 | # test = np.matmul(leftMatrix[2, :], T_y) 111 | # print(test) 112 | 113 | # 双线性插值 114 | src_img = cv2.imread(src_img_path) 115 | # pick_img=cv2.copyMakeBorder(src_img,0,1,0,1,cv2.BORDER_REPLICATE) 116 | tar_img = np.zeros(shape=(height, width, 3)) 117 | # 遍历每个像素,进行后向插值 118 | for channel in range(3): 119 | for i in range(height): 120 | for j in range(width): 121 | equation_coefficient = np.array([i, j, i * j, 1]) 122 | x = np.matmul(equation_coefficient, T_x) 123 | y = np.matmul(equation_coefficient, T_y) 124 | x_ceil = math.ceil(x) 125 | x_floor = math.floor(x) 126 | y_ceil = math.ceil(y) 127 | y_floor = math.floor(y) 128 | p1 = x - x_floor 129 | p2 = 1 - p1 130 | q1 = y - y_floor 131 | q2 = 1 - q1 132 | 133 | try: 134 | tar_img[i, j, channel] = p2 * q2 * src_img[x_floor, y_floor, channel] + \ 135 | src_img[x_ceil, y_floor, channel] * p1 * q2 + \ 136 | src_img[x_ceil, y_ceil, channel] * q1 * p1 + \ 137 | src_img[x_floor, y_ceil, channel] * q1 * p2 138 | except: 139 | print(x_ceil, x_floor, y_ceil, y_floor) 140 | print(src_img.shape) 141 | assert 1 == 9 142 | cv2.imshow('Window', np.uint8(tar_img)) 143 | cv2.waitKey(0) 144 | 145 | 146 | def cvtColor(src_img): 147 | """ 148 | 转彩色图像为灰度图像。模仿opencv命名 149 | :param src_img: 彩色图像 150 | :return: 灰度图像 151 | """ 152 | gray_img = np.sum(src_img, axis=2) / 3 153 | return gray_img.astype(np.uint8) 154 | 155 | 156 | if __name__ == "__main__": 157 | # path = "./data/1.jpg" 158 | path = "./data/000026.jpg" 159 | # path = './data/000872.jpg' 160 | # path = './data/001201.jpg' 161 | # path = './data/001402.jpg' 162 | # path = './data/001552.jpg' 163 | src_img = cv2.imread(path) 164 | gray_img = cvtColor(src_img) 165 | detect_img, detected_corner = detect_corners(gray_img) 166 | 167 | tar_img = warpAffine(src_img) 168 | cv2.imshow('Window', np.uint8(tar_img)) 169 | cv2.waitKey(0) 170 | cv2.imshow('dst', detect_img) 171 | cv2.waitKey(0) 172 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import multiprocessing 3 | 4 | 5 | class Conv2d_MULTITHREADS: 6 | ''' 7 | 使用矩阵乘法实现卷积操作 8 | ''' 9 | 10 | def __init__(self, k, s, c_in, c_out, mode='same', weight=None): 11 | self.k, self.s, self.c_in, self.c_out, self.mode = k, s, c_in, c_out, mode 12 | if weight is None: 13 | self.weight = np.random.random((c_in, k, k, c_out)) 14 | else: 15 | self.weight = weight 16 | assert mode in ['same', 'full', 'valid'] 17 | 18 | def workers(self, out_h_start, out_h_end, out_w, c, padded_img, RES, ind=0, PID=0): 19 | image_mat = np.zeros(((out_h_end - out_h_start) * out_w, self.k * self.k * c)) 20 | for i in range(out_h_start, out_h_end): 21 | for j in range(out_w): 22 | window = padded_img[:, i * self.s:(i * self.s + self.k), j * self.s:(j * self.s + self.k)] 23 | image_mat[ind] = window.flatten() 24 | ind += 1 25 | RES[PID] = image_mat 26 | 27 | def __call__(self, image): 28 | assert image.shape[ 29 | 0] == self.c_in, f"image in channel {image.shape[0]} not equal to weight channel {self.weight.shape[0]}" 30 | c, h, w = image.shape 31 | if self.mode == "same": 32 | p_h = (self.s * (h - 1) + self.k - h) // 2 33 | p_w = (self.s * (w - 1) + self.k - w) // 2 34 | elif self.mode == 'valid': 35 | p_h, p_w = 0, 0 36 | elif self.mode == 'full': 37 | p_h, p_w = self.k - 1, self.k - 1 38 | else: 39 | assert False, 'error mode' 40 | out_h = (h + 2 * p_h - self.k) // self.s + 1 41 | out_w = (w + 2 * p_w - self.k) // self.s + 1 42 | 43 | # 填充后的image 44 | padded_img = np.zeros((c, h + 2 * p_h, w + 2 * p_w)) 45 | padded_img[:, p_h:p_h + h, p_w:p_w + w] = image 46 | # image_mat 47 | # image_mat = np.zeros((out_h * out_w, self.k * self.k * c)) 48 | # row = 0 49 | # for i in range(out_h): 50 | # for j in range(out_w): 51 | # window = padded_img[:, i * self.s:(i * self.s + self.k), j * self.s:(j * self.s + self.k)] 52 | # image_mat[row] = window.flatten() 53 | # row += 1 54 | 55 | # 多线程 56 | RES = multiprocessing.Manager().dict() 57 | p0 = multiprocessing.Process(target=self.workers, args=(0, out_h // 2, out_w, c, padded_img, RES, 0, 0)) 58 | p1 = multiprocessing.Process(target=self.workers, args=(out_h // 2, out_h, out_w, c, padded_img, RES, 0, 1)) 59 | p0.start() 60 | p1.start() 61 | p0.join() 62 | p1.join() 63 | # print(RES[0].shape) 64 | # print(RES[1].shape) 65 | image_mat = np.concatenate((RES[0], RES[1])) 66 | 67 | # 矩阵乘法 68 | res = np.dot(image_mat, self.weight.reshape(-1, self.c_out)) 69 | res = np.reshape(res, (out_h, out_w, self.c_out)) 70 | return np.transpose(res, (2, 0, 1)) 71 | 72 | 73 | class Conv2d: 74 | ''' 75 | 使用矩阵乘法实现卷积操作 76 | ''' 77 | 78 | def __init__(self, k, s, c_in, c_out, mode='same', weight=None): 79 | ''' 80 | 81 | :param k: kernel size 82 | :param s: stride 83 | :param c_in: in feature maps 84 | :param c_out: out feature maps 85 | :param mode: choose in ['same','valid','full'] 86 | :param weight: kernel data 87 | ''' 88 | self.k, self.s, self.c_in, self.c_out, self.mode = k, s, c_in, c_out, mode 89 | if weight is None: 90 | self.weight = np.random.random((c_in, k, k, c_out)) 91 | else: 92 | self.weight = weight 93 | assert mode in ['same', 'full', 'valid'] 94 | 95 | def filter(self, image): 96 | h, w = image.shape 97 | if self.mode == "same": 98 | p_h = (self.s * (h - 1) + self.k - h) // 2 99 | p_w = (self.s * (w - 1) + self.k - w) // 2 100 | elif self.mode == 'valid': 101 | p_h, p_w = 0, 0 102 | elif self.mode == 'full': 103 | p_h, p_w = self.k - 1, self.k - 1 104 | else: 105 | assert False, 'error mode' 106 | out_h = (h + 2 * p_h - self.k) // self.s + 1 107 | out_w = (w + 2 * p_w - self.k) // self.s + 1 108 | 109 | # 填充后的image 110 | padded_img = np.zeros((h + 2 * p_h, w + 2 * p_w)) 111 | padded_img[p_h:p_h + h, p_w:p_w + w] = image 112 | # image_mat 113 | image_mat = np.zeros((out_h * out_w, self.k * self.k)) 114 | row = 0 115 | for i in range(out_h): 116 | for j in range(out_w): 117 | window = padded_img[i * self.s:(i * self.s + self.k), j * self.s:(j * self.s + self.k)] 118 | 119 | image_mat[row] = window.flatten() 120 | row += 1 121 | # 矩阵乘法 122 | res = np.dot(image_mat, self.weight.flatten()) 123 | res = np.reshape(res, (out_h, out_w)) 124 | return res 125 | 126 | def __call__(self, image): 127 | assert image.shape[ 128 | 0] == self.c_in, f"image in channel {image.shape[0]} not equal to weight channel {self.weight.shape[0]}" 129 | c, h, w = image.shape 130 | if self.mode == "same": 131 | p_h = (self.s * (h - 1) + self.k - h) // 2 132 | p_w = (self.s * (w - 1) + self.k - w) // 2 133 | elif self.mode == 'valid': 134 | p_h, p_w = 0, 0 135 | elif self.mode == 'full': 136 | p_h, p_w = self.k - 1, self.k - 1 137 | else: 138 | assert False, 'error mode' 139 | out_h = (h + 2 * p_h - self.k) // self.s + 1 140 | out_w = (w + 2 * p_w - self.k) // self.s + 1 141 | 142 | # 填充后的image 143 | padded_img = np.zeros((c, h + 2 * p_h, w + 2 * p_w)) 144 | padded_img[:, p_h:p_h + h, p_w:p_w + w] = image 145 | # image_mat 146 | image_mat = np.zeros((out_h * out_w, self.k * self.k * c)) 147 | row = 0 148 | for i in range(out_h): 149 | for j in range(out_w): 150 | window = padded_img[:, i * self.s:(i * self.s + self.k), j * self.s:(j * self.s + self.k)] 151 | image_mat[row] = window.flatten() 152 | row += 1 153 | # 矩阵乘法 154 | res = np.dot(image_mat, self.weight.reshape(-1, self.c_out)) 155 | res = np.reshape(res, (out_h, out_w, self.c_out)) 156 | return np.transpose(res, (2, 0, 1)) 157 | 158 | 159 | if __name__ == "__main__": 160 | image = np.ones((3, 10, 10)) 161 | conv2d = Conv2d(3, 1, 3, 16, 'valid') 162 | res = conv2d(image) 163 | print(res.shape) 164 | --------------------------------------------------------------------------------