├── .gitignore
├── Dockerfile
├── README.md
├── answer_card.jpg
├── app.py
├── detection_choice_question.py
├── detection_exam_num.py
├── main.py
├── notebook
    ├── answer_card_detection.ipynb
    └── choice_option_detection.ipynb
├── pic
    ├── answer_card.jpg
    ├── choice_question_detection_1.jpg
    ├── choice_question_detection_2.png
    ├── choice_question_detection_3.png
    ├── choice_question_detection_4.png
    ├── choice_question_detection_5.png
    ├── num_card_detection_1.png
    ├── num_card_detection_2.png
    ├── num_card_detection_3.png
    ├── special_choice_question.png
    ├── special_num_card_1.png
    ├── special_num_card_2.png
    ├── sub_answer_card_1.jpg
    ├── sub_answer_card_2.jpg
    ├── sub_answer_card_3.jpg
    ├── sub_answer_card_4.jpg
    ├── sub_answer_card_5.jpg
    ├── sub_answer_card_6.jpg
    ├── sub_answer_card_7.jpg
    └── sub_answer_card_8.jpg
├── requirements.txt
├── settings.py
├── test
    ├── test_cnocr.py
    └── test_pytesseract.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
2 | *.png
3 | *.pyc
4 | *.vscode
5 | *.ipynb_checkpoints
6 | *.idea


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM registry-vpc.cn-shenzhen.aliyuncs.com/spark-base/anaconda3:5.2.0
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt requirements.txt
 6 | 
 7 | RUN set -x \
 8 |     && pip install --no-cache-dir -r requirements.txt
 9 | 
10 | COPY . .
11 | 
12 | CMD ["tail","-f","/dev/stderr"]
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # teaching-answer-card-tool
 2 | 
 3 | 答题卡检测
 4 | 
 5 | ### 1、多张图连续检测
 6 | 功能：如果一套试卷的答题卡数量大于1，也支持检测，并且按试题顺序拼凑检测结果（即下面这套试卷假设存在位于第二张答题卡上的第八题，支持连续检测）
 7 | 
 8 | ![](pic/answer_card.jpg)
 9 | 
10 | ### 2、试题切割
11 | 功能：
12 | - 将答题卡按试题切成一张张图片，并将图片与题序一一对应
13 | - 程序能知道第六大题存在2张图片
14 | 
15 | 程序执行结果：
16 | 第一题
17 | ![](pic/sub_answer_card_1.jpg)
18 | 
19 | 第二题
20 | ![](pic/sub_answer_card_2.jpg)
21 | 
22 | 第三题
23 | ![](pic/sub_answer_card_3.jpg)
24 | 
25 | 第四题
26 | ![](pic/sub_answer_card_4.jpg)
27 | 
28 | 第五题
29 | ![](pic/sub_answer_card_5.jpg)
30 | 
31 | 第六题（程序能知道第六大题存在2张图片）
32 | ![](pic/sub_answer_card_6.jpg)
33 | 
34 | ![](pic/sub_answer_card_7.jpg)
35 | 
36 | 第七题：
37 | 
38 | ![](pic/sub_answer_card_8.jpg)
39 | 
40 | 
41 | ### 3、学生考号识别
42 | #### 1）切割出学生手写的准考证号
43 | ![](pic/num_card_detection_1.png)
44 | 
45 | #### 2）识别出学生填充的准考证号
46 | 
47 | 识别结果：53311
48 | 
49 | ![](pic/num_card_detection_2.png)
50 | 
51 | ![](pic/num_card_detection_3.png)
52 | 
53 | 
54 | ### 4、选择题自动批改检测
55 | 识别结果（key：题序，value：答案的索引）：
56 | 
57 |  {1: [2], 2: [1], 3: [3], 4: [3], 5: [1], 6: [1], 7: [4], 8: [2], 9: [4], 10: [1], 11: [3], 12: [1], 13: [1], 14: [1], 15: [1]}
58 | 
59 | 选择题自动批改支持以下变化
60 | - 无论试题的候选项有四个还是五六七八个，都支持识别
61 | - 答案无论是一个还是多个，都支持识别
62 | - 选择题，无论是竖着排列，还是横着排列，还是乱序排序，支持识别
63 | 
64 | 
65 | 原题：
66 | ![](pic/choice_question_detection_1.jpg)
67 | 
68 | 识别出所有的轮廓：
69 | 
70 | ![](pic/choice_question_detection_2.png)
71 | 
72 | 筛选出答案：
73 | 
74 | ![](pic/choice_question_detection_3.png)
75 | 
76 | 筛选出候选框：
77 | 
78 | ![](pic/choice_question_detection_4.png)
79 | 
80 | 筛选出题序：
81 | 
82 | ![](pic/choice_question_detection_5.png)
83 | 
84 | 根据题序及其坐标，寻找到对应题序的答案与候选框，进行自动批改
85 | 
86 | 
87 | ### 5、特殊逻辑处理
88 | （1）填充区域出现连接的情况，只要连接的不是太过分，都支持识别
89 | 
90 |  ![](pic/special_num_card_1.png)
91 | 
92 |  ![](pic/special_num_card_2.png)
93 | 
94 | （2）填充区域，先填充，再擦除，存在曾经填充过的痕迹，目前已经优化，支持准确识别
95 | 
96 |  ![](pic/special_choice_question.png)
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/answer_card.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/answer_card.jpg


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: wangxin
 3 | Date: 2021-05-25 12:12:52
 4 | LastEditTime: 2021-07-01 14:00:58
 5 | LastEditors: Please set LastEditors
 6 | Description: In User Settings Edit
 7 | '''
 8 | 
 9 | from flask import Flask
10 | 
11 | 
12 | app = Flask(__name__)
13 | 
14 | 
15 | @app.route('/detection', methods=["POST"])
16 | def answer_card_detection():
17 |     return "我是一个接口"
18 | 
19 | 
20 | # python3 -m flask run
21 | if __name__ == '__main__':
22 |     app.run(host='0.0.0.0', port=8890, debug=True)
23 | 


--------------------------------------------------------------------------------
/detection_choice_question.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: wangxin
  3 | Date: 2021-05-25 10:31:01
  4 | LastEditTime: 2021-07-01 14:11:58
  5 | LastEditors: Please set LastEditors
  6 | Description: 选择题自动识别与批改
  7 | '''
  8 | 
  9 | # coding=utf-8
 10 | import cv2
 11 | import numpy as np
 12 | import pytesseract
 13 | from pytesseract import Output
 14 | from settings import ANSWER_CARD_MIN_WIDTH, ANSWER_CARD_SIZE
 15 | from utils import sort_contours, save_img_by_cnts
 16 | 
 17 | 
 18 | def detection_choice_question(images_path, ocr):
 19 |     """ 选择题自动识别与批改
 20 | 
 21 |     Args:
 22 |         images_path (list): 图片地址列表
 23 |     Returns:
 24 |         [list]: 每张图片的识别结果
 25 |     """
 26 | 
 27 |     sub_answer_cnt_szie = 0
 28 |     question_answers = []
 29 |     for img_path in images_path:
 30 |         image = cv2.imread(img_path)
 31 |         if not is_choice_question(image):
 32 |             continue
 33 |         
 34 |         # 获取图片中填充的全部答案轮廓
 35 |         answer_option_cnts = get_answer_option_cnts(image)
 36 |         if len(answer_option_cnts) > 0:
 37 |             save_img_by_cnts('out/answer_cnt_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], answer_option_cnts)
 38 | 
 39 |         # 所有被填充的选择项的中心的x坐标
 40 |         answer_options_center_x = get_cnt_center_x(answer_option_cnts)
 41 |         # 所有未被填充的选择项的中心的x坐标
 42 |         choice_options_center_x = get_choice_option_center_x(img_path)
 43 |         # 所有选择项的中心的x坐标
 44 |         all_options_center_x = answer_options_center_x + choice_options_center_x
 45 | 
 46 |         # 获取所有选择项的轮廓及其题序轮廓
 47 |         all_choice_option_cnts, question_number_cnts = get_choice_option_cnts(image, all_options_center_x)
 48 |         if len(all_choice_option_cnts) > 0:
 49 |             save_img_by_cnts('out/choice_cnt_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], all_choice_option_cnts)
 50 |             save_img_by_cnts('out/ques_num_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], question_number_cnts)
 51 | 
 52 |         sub_answer_cnt_szie = sub_answer_cnt_szie + 1
 53 | 
 54 |         # 选择题自动批改
 55 |         if len(all_choice_option_cnts) > 0:
 56 |             question_answer_dict = get_choice_question_answer_index(image, all_choice_option_cnts, answer_option_cnts, question_number_cnts, ocr)
 57 |             question_answers.append(question_answer_dict)
 58 |     return question_answers
 59 | 
 60 | 
 61 | def get_answer_option_cnts(img):
 62 |     """ 识别图片中的填充的全部答案轮廓
 63 | 
 64 |     Args:
 65 |         img_path (String): 图片
 66 | 
 67 |     Returns:
 68 |         [list]: 候选项轮廓
 69 |     """
 70 |     
 71 |     # 转灰度
 72 |     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 73 |     # OTSU二值化（黑底白字）
 74 |     thresh_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
 75 | 
 76 |     # 腐蚀
 77 |     kernel = np.ones((5, 5), np.uint8)
 78 |     erode_img = cv2.erode(thresh_img, kernel, iterations=1)
 79 |     # 膨胀
 80 |     kernel = np.ones((9, 9), np.uint8)
 81 |     dilate_img = cv2.dilate(erode_img, kernel, iterations=1)
 82 | 
 83 |     # 提取答案的轮廓
 84 |     answer_cnts, _ = cv2.findContours(dilate_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
 85 | 
 86 |     # 减少答案轮廓的边数
 87 |     answer_option_cnts = []
 88 |     for cnt in answer_cnts:
 89 |         peri = cv2.arcLength(cnt, True)
 90 |         approx = cv2.approxPolyDP(cnt, 0.06 * peri, True)
 91 |         answer_option_cnts.append(approx)
 92 | 
 93 |     # self.assertTrue(choiceAnswerCnts % 4 == 0, "候选框提取异常, 提取的数量不是4的整数")
 94 |     return answer_option_cnts
 95 | 
 96 | 
 97 | def get_choice_option_cnts(img, all_options_center_x):
 98 |     """识别图片中的所有的选择项轮廓与题序轮廓
 99 | 
100 |     Args:
101 |         img ([type]): [description]
102 |         all_option_center_x ([type]): [description]
103 | 
104 |     Returns:
105 |         [type]: [description]
106 |     """
107 |     # 灰度
108 |     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
109 | 
110 |     # 二值化（黑底白字）
111 |     thresh_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
112 | 
113 |     # 对高亮部分膨胀
114 |     # 因为候选区域由三部分组成（左括号、右括号、大写的英文字母），通过膨胀将三个区域连成一片
115 |     kernel = np.ones((11, 11), np.uint8)
116 |     dilate_img = cv2.dilate(thresh_img, kernel, iterations=1)
117 | 
118 |     # 提取膨胀后的轮廓
119 |     option_cnts, _ = cv2.findContours(dilate_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
120 | 
121 |     # 所有候选框的轮廓
122 |     choice_option_cnts = []
123 |     # 每道选择题的题序
124 |     question_number_cnts = []
125 |     for c in option_cnts:
126 |         peri = cv2.arcLength(c, True)
127 |         area = cv2.contourArea(c)
128 |         approx = cv2.approxPolyDP(c, 0.1 * peri, True)
129 |         (x, y, w, h) = cv2.boundingRect(approx)
130 |         ar = h / float(w)
131 | 
132 |         # 筛选轮廓为四边形的目前轮廓
133 |         #     if y >= 60 and w >= 20 and w <= 60 and ar >= 1 and ar <= 2 and area > 700:
134 |         if y >= 60 and ar > 0.5 and ar < 2:
135 |             if is_choice_option(x, w, all_options_center_x) and area > 400:
136 |                 choice_option_cnts.append(c)
137 |             elif not is_choice_option(x, w, all_options_center_x) and area > 100:
138 |                 question_number_cnts.append(c)
139 |     return choice_option_cnts, question_number_cnts
140 | 
141 | 
142 | def is_choice_option(x, w, all_option_center_x):
143 |     for center_x in all_option_center_x:
144 |         if center_x > x and center_x < x + w:
145 |             return True
146 |     return False
147 | 
148 | 
149 | def get_cnt_center_x(cnts):
150 |     """返回轮廓中心的x轴坐标
151 | 
152 |     Args:
153 |         cnts (list): 轮廓列表
154 | 
155 |     Returns:
156 |         [list]: 中心x轴坐标
157 |     """
158 |     center_x = []
159 |     for cnt in cnts:
160 |         (x, y, w, h) = cv2.boundingRect(cnt)
161 |         center_x.append((2 * x + w) / 2)
162 |     return center_x
163 | 
164 | 
165 | def get_choice_option_center_x(img):
166 |     """ 识别所有未被填充的选择项的中心的x坐标
167 | 
168 |     Args:
169 |         img ([type]): [description]
170 | 
171 |     Returns:
172 |         [type]: [description]
173 |     """
174 |     img = cv2.imread(img)
175 |     ocr_reslut = pytesseract.image_to_data(img, output_type=Output.DICT, lang='chi_sim')
176 | 
177 |     choice_option_center_x = []
178 |     for i in range(len(ocr_reslut['text'])):
179 |         text_i = ocr_reslut['text'][i]
180 |         (x, y, w, _) = (ocr_reslut['left'][i], ocr_reslut['top'][i], ocr_reslut['width'][i], ocr_reslut['height'][i])
181 |         if y > 60 and ('A' in text_i or 'B' in text_i or 'C' in text_i or 'D' in text_i):
182 |             choice_option_center_x.append((2 * x + w) / 2)
183 |     return choice_option_center_x
184 | 
185 | 
186 | def get_answer_card_cnts(img):
187 |     """ 获得答题卡的左右答题区域
188 |     # findContours 函数详解：https://blog.csdn.net/laobai1015/article/details/76400725
189 |     # approxPolyDP 多边形近似 https://blog.csdn.net/kakiebu/article/details/79824856
190 |     
191 |     Args:
192 |         img ([type]): 图片
193 |     Returns:
194 |         [type]: 答题卡的左右答题区域轮廓
195 |     """
196 | 
197 |     # 检测图片中的最外围轮廓
198 |     cnts, _ = cv2.findContours(img.copy(), cv2.RETR_EXTERNAL,
199 |                                cv2.CHAIN_APPROX_SIMPLE)
200 |     # print("原始图片检测的轮廓总数：", len(cnts))
201 |     if len(cnts) == 0:
202 |         return None
203 | 
204 |     # 提取的轮廓总数
205 |     contour_size = 0
206 |     # 检测到的左右答题区域轮廓
207 |     answer_cnts = []
208 | 
209 |     # 将轮廓按大小, 降序排序
210 |     cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
211 |     for c in cnts:
212 |         # arcLength 计算周长
213 |         peri = cv2.arcLength(c, True)
214 |         # print("轮廓周长：", peri)
215 | 
216 |         # 之前寻找到的轮廓可能是多边形，现在通过寻找近似轮廓，得到期望的四边形
217 |         approx = cv2.approxPolyDP(c, 0.02 * peri, True)
218 |         # print('原始轮廓的边数:', len(c), ', 近似轮廓的边数:', len(approx))
219 | 
220 |         # 当近似轮廓为4时，代表是需要提取的矩形区域
221 |         if len(approx) == 4:
222 |             contour_size = contour_size + 1
223 |             answer_cnts.append(approx)
224 | 
225 |         # 只提取答题卡中的最大两个轮廓
226 |         if contour_size == ANSWER_CARD_SIZE:
227 |             break
228 | 
229 |     answer_cnts = sort_contours(answer_cnts, method="left-to-right")[0]
230 |     return answer_cnts
231 | 
232 | 
233 | def get_sub_answer_card_cnts(img_path):
234 |     """ 获得答题卡的子区域
235 |     # findContours 函数详解：https://blog.csdn.net/laobai1015/article/details/76400725
236 |     # approxPolyDP 多边形近似 https://blog.csdn.net/kakiebu/article/details/79824856
237 |     
238 |     Args:
239 |         img ([type]): 图片
240 |     Returns:
241 |         [type]: 答题卡的左右答题区域轮廓
242 |     """
243 |     image = cv2.imread(img_path)
244 |     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
245 |     # warped_answer_image_1 = four_point_transform(gray, answer_contour_1.reshape(4, 2))
246 | 
247 |     # 二值化
248 |     thresh = cv2.threshold(gray, 0, 255,
249 |                            cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
250 |     # 在二值图像中查找轮廓，包括内围、外围轮廓，但是检测到的轮廓不建立等级关系，彼此之间独立
251 |     thresh_cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_LIST,
252 |                                       cv2.CHAIN_APPROX_SIMPLE)
253 | 
254 |     cnt_size = 0
255 |     sub_answer_cnts = []
256 |     if len(thresh_cnts) > 0:
257 |         # 将轮廓按大小, 降序排序
258 |         thresh_cnts = sorted(thresh_cnts, key=cv2.contourArea, reverse=True)
259 |         for c in thresh_cnts:
260 |             cnt_size = cnt_size + 1
261 | 
262 |             # arcLength 计算周长
263 |             peri = cv2.arcLength(c, True)
264 | 
265 |             # 计算轮廓的边界框
266 |             (x, y, w, h) = cv2.boundingRect(c)
267 | 
268 |             # 之前寻找到的轮廓可能是多边形，现在通过寻找近似轮廓，得到期望的四边形
269 |             approx = cv2.approxPolyDP(c, 0.02 * peri, True)
270 | 
271 |             # 只提取近似轮廓为四边形的区域, 且轮廓长度大于指定长度
272 |             # if len(approx) == 4 and w > ANSWER_CARD_MIN_WIDTH:
273 | 
274 |             # print("轮廓周长：", peri, '宽:', w)
275 |             # print('原始轮廓的边数:', len(c), ', 近似轮廓的边数:', len(approx))
276 |             if w > ANSWER_CARD_MIN_WIDTH:
277 |                 sub_answer_cnts.append(approx)
278 | 
279 |             # 只处理前20个最大轮廓
280 |             if cnt_size >= 20:
281 |                 break
282 | 
283 |     # 从上到下，将轮廓排序
284 |     sub_answer_cnts = sort_contours(sub_answer_cnts, method="top-to-bottom")[0]
285 |     return sub_answer_cnts
286 | 
287 | 
288 | def get_question_num_dict(image, question_number_cnts, ocr):
289 |     """获取图片中所有的选择题的题序
290 | 
291 |     Args:
292 |         image ([type]): 图片
293 |         question_number_cnts ([type]): 图片中的所有的选择题的题序轮廓
294 |         ocr ([type]): ocr识别工具
295 | 
296 |     Returns:
297 |         [dict]: key: 题序, value: 题序轮廓的坐标
298 |     """
299 |     question_num_dict = {}
300 |     for question_number_cnt in question_number_cnts:
301 |         peri = cv2.arcLength(question_number_cnt, True)
302 |         approx = cv2.approxPolyDP(question_number_cnt, 0.1 * peri, True)
303 |         (x, y, w, h) = cv2.boundingRect(approx)
304 |         
305 |         # ocr识别题型轮廓区域的文本
306 |         text = ocr.ocr_for_single_line(image[y:y + h, x:x + w])
307 |         question_num = ''.join(text)
308 |         question_num = question_num.replace('.', '')
309 |         
310 |         # 文本是否为数字
311 |         if question_num.isdigit():
312 |             (x, y, w, h) = cv2.boundingRect(question_number_cnt)
313 |             question_num_dict[int(question_num)] = (x, y, w, h)
314 |     
315 |     # 按照题序从小到大排序
316 |     question_num_list = sorted(question_num_dict.items(), key=lambda item: item[0])
317 |     return dict(question_num_list)
318 | 
319 | 
320 | def get_choice_question_answer_index(image, choice_option_cnts, answer_option_cnts, question_number_cnts, ocr):
321 |     """自动批改, 返回每道试题对应的答案索引. \
322 |        注意：(1)用户可能没有填充答案 (2)选择题的答案数量可能大于1
323 | 
324 |     Args:
325 |         choice_option_cnts (list): 试题的选择项轮廓
326 |         answer_option_cnts (list): 用户填充的答案轮廓
327 |         question_number_cnts (list): 试题的题序轮廓
328 |     Returns:
329 |         [dict]: key  题序, value 答案索引列表
330 |     """
331 |     
332 |     # 获取所有选择题的题序
333 |     question_num_dict = get_question_num_dict(image, question_number_cnts, ocr)
334 |     
335 |     question_answer_dict = {}
336 |     for key in question_num_dict.keys():
337 |         (num_x, num_y, num_w, num_h) = question_num_dict[key]
338 |         num_center_x = (2 * num_x + num_w) / 2
339 |         num_center_y = (2 * num_y + num_h) / 2
340 | 
341 |         # 获取同一行中，本题序右侧第一个题序的中心x坐标
342 |         min_num_center_x = float("inf")  # 无穷大
343 |         for question_number_cnt in question_number_cnts:
344 |             (x, y, w, h) = cv2.boundingRect(question_number_cnt)
345 |             right_num_center_x = (2 * x + w) / 2
346 |             if num_center_y > y and num_center_y < y + h and right_num_center_x > num_center_x and right_num_center_x < min_num_center_x:
347 |                 min_num_center_x = right_num_center_x
348 |         # print(min_num_center_x)
349 | 
350 |         # 获取本题的全部答案轮廓的中心x坐标列表
351 |         # 一道选择题题可能有多个答案， 所以answers_center_x为列表
352 |         answers_center_x = []
353 |         for answer_option_cnt in answer_option_cnts:
354 |             (x, y, w, h) = cv2.boundingRect(answer_option_cnt)
355 |             answer_cnt_center_x = (2 * x + w) / 2
356 |             if num_center_y > y and num_center_y < y + h and answer_cnt_center_x > num_center_x and answer_cnt_center_x < min_num_center_x:
357 |                 answers_center_x.append(answer_cnt_center_x)
358 |         # print('answers_center_x', answers_center_x)
359 | 
360 |         # 获取本题的全部选择项轮廓
361 |         question_choice_option_cnts = []
362 |         for choice_option_cnt in choice_option_cnts:
363 |             # print(len(question_choice_option_cnts))
364 |             (x, y, w, h) = cv2.boundingRect(choice_option_cnt)
365 |             choice_option_center_x = (2 * x + w) / 2
366 |             if num_center_y > y and num_center_y < y + h and choice_option_center_x > num_center_x and choice_option_center_x < min_num_center_x:
367 |                 question_choice_option_cnts.append(choice_option_cnt)
368 | 
369 |         question_choice_option_cnts, _ = sort_contours(question_choice_option_cnts, 'left-to-right')
370 |         # print('question_choice_option_cnts', len(question_choice_option_cnts))
371 | 
372 |         # 答案列表
373 |         answer_indexes = []
374 |         # 答案索引
375 |         answer_index = 0
376 |         for choice_option_cnt in question_choice_option_cnts:
377 |             answer_index = answer_index + 1
378 |             (x, y, w, h) = cv2.boundingRect(choice_option_cnt)
379 |             # print((x, y, w, h), answers_center_x)
380 |             for answer_center_x in answers_center_x:
381 |                 if answer_center_x > x and answer_center_x < x + w:
382 |                     answer_indexes.append(answer_index)
383 |                     break
384 |         question_answer_dict[key] = answer_indexes
385 | 
386 |     # 返回每道试题对应的答案索引
387 |     question_answer_dict = sorted(question_answer_dict.items(), key=lambda item: item[0])
388 |     return dict(question_answer_dict)
389 | 
390 | 
391 | def is_choice_question(img):
392 |     """判断当前图片是否属于选择题
393 | 
394 |     Args:
395 |         image_path ([type]): 图片
396 | 
397 |     Returns:
398 |         [boolean]: false 不是  true 是
399 |     """
400 |     ocr_result = pytesseract.image_to_data(img, output_type=Output.DICT, lang='chi_sim')
401 |     ocr_text = ocr_result['text']
402 |     return '[A]' in ocr_text or '[B]' in ocr_text or '[C]' in ocr_text or '[D]' in ocr_text
403 | 


--------------------------------------------------------------------------------
/detection_exam_num.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: wangxin
  3 | Date: 2021-05-25 10:31:01
  4 | LastEditTime: 2021-07-01 13:55:35
  5 | LastEditors: Please set LastEditors
  6 | Description: 检测考号
  7 | '''
  8 | 
  9 | # coding=utf-8
 10 | import cv2
 11 | import numpy as np
 12 | from PIL import Image
 13 | from utils import sort_contours
 14 | 
 15 | 
 16 | def get_exam_num_area(image_path):
 17 |     """ 获取图片中待检测的考号填充区域
 18 | 
 19 |     Args:
 20 |         image_path (String): 图片地址
 21 | 
 22 |     Returns:
 23 |         [type]: [description]
 24 |     """
 25 |     image = Image.open(image_path)
 26 |     image_width = image.width
 27 | 
 28 |     img = cv2.imread(image_path)
 29 |     gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 30 |     threshold_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
 31 | 
 32 |     # 先腐蚀与膨胀, 高亮化学生填充的考号
 33 |     kernel = np.ones((9, 9), np.uint8)
 34 |     erode_img = cv2.erode(threshold_img, kernel, iterations=1)
 35 |     kernel = np.ones((9, 9), np.uint8)
 36 |     dilate_img = cv2.dilate(erode_img, kernel, iterations=1)
 37 | 
 38 |     # 学生填充的考号，最左边边缘的x轴坐标
 39 |     exam_number_left_x = float("inf")
 40 |     # 学生填充的考号，最右边边缘的x轴坐标
 41 |     exam_number_right_x = 0
 42 |     cnts, _ = cv2.findContours(dilate_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
 43 |     for c in cnts:
 44 |         (x, y, w, h) = cv2.boundingRect(c)
 45 |         if x > image_width / 2:
 46 |             if x < exam_number_left_x:
 47 |                 exam_number_left_x = x
 48 |             if x + w > exam_number_right_x:
 49 |                 exam_number_right_x = x + w
 50 | 
 51 |     # 通过x轴坐标，缩小待检测区域的范围
 52 |     threshold_img = threshold_img[:, exam_number_left_x - 15:exam_number_right_x + 15]
 53 | 
 54 |     # 再通过检测图片中面积最大的轮廓（考号手写区域, 而不是填充区域）, 进一步缩小范围
 55 |     cnts, _ = cv2.findContours(threshold_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
 56 |     cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
 57 |     mix_y = None
 58 |     num_card_cnt = None
 59 |     for c in cnts:
 60 |         peri = cv2.arcLength(c, True)
 61 |         approx = cv2.approxPolyDP(c, 0.06 * peri, True)
 62 |         (x, y, w, h) = cv2.boundingRect(approx)
 63 |         if len(approx) == 4:
 64 |             cv2.imwrite('out/num_card.jpg', threshold_img[y:y + h, x:x + w])
 65 |             num_card_cnt = c
 66 |             mix_y = y + h
 67 |             break
 68 | 
 69 |     threshold_img = threshold_img[mix_y:, :]
 70 |     return threshold_img, num_card_cnt
 71 | 
 72 | 
 73 | def get_exam_num_height(img):
 74 |     """ 获取考号填充区域, 行中心与行中心的y轴坐标间隔
 75 | 
 76 |     Args:
 77 |         img ([type]): 图片
 78 | 
 79 |     Returns:
 80 |         [float]: 行中心与行中心的y轴坐标间隔
 81 |     """
 82 | 
 83 |     # 膨胀
 84 |     kernel = np.ones((5, 5), np.uint8)
 85 |     dilate_img = cv2.dilate(img, kernel, iterations=1)
 86 | 
 87 |     # 第一行待填充考号的中心的x轴坐标
 88 |     first_line_center_y = None
 89 |     # 第二行待填充考号的中心的x轴坐标
 90 |     second_line_center_y = None
 91 |     # 第一行待填充考号的底部边缘的y坐标
 92 |     first_line_bottom_y = None
 93 |     cnts, _ = cv2.findContours(dilate_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
 94 |     cnts, _ = sort_contours(cnts, 'top-to-bottom')
 95 |     for c in cnts:
 96 |         peri = cv2.arcLength(c, True)
 97 |         approx = cv2.approxPolyDP(c, 0.06 * peri, True)
 98 |         (x, y, w, h) = cv2.boundingRect(approx)
 99 |         center_y = (2 * y + h) / 2
100 | 
101 |         if h > 10:
102 |             if first_line_center_y is None:
103 |                 first_line_center_y = center_y
104 |                 first_line_bottom_y = y + h
105 | 
106 |             if center_y > first_line_bottom_y and second_line_center_y is None:
107 |                 second_line_center_y = center_y
108 |                 break
109 | 
110 |     print(type(second_line_center_y))
111 |     return second_line_center_y - first_line_center_y
112 | 
113 | 
114 | def detection_exam_num(image_path):
115 |     """ 识别图片中学生填充的考号
116 | 
117 |     Args:
118 |         image_path (String): 图片地址
119 | 
120 |     Returns:
121 |         [list]: 识别的考号结果
122 |     """
123 |     # 获取图片中考号填充区域范围
124 |     thresh_img, _ = get_exam_num_area(image_path)
125 | 
126 |     # 获取考号填充区域, 每2行的中心y轴坐标间隔
127 |     line_y_height = get_exam_num_height(thresh_img)
128 | 
129 |     # 腐蚀与膨胀
130 |     kernel = np.ones((9, 9), np.uint8)
131 |     erode_img = cv2.erode(thresh_img, kernel, iterations=1)
132 |     kernel = np.ones((9, 9), np.uint8)
133 |     dilate_img = cv2.dilate(erode_img, kernel, iterations=1)
134 | 
135 |     # 学生填充考号的识别结果
136 |     num_card = []
137 |     cnts, _ = cv2.findContours(dilate_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
138 |     cnts, boundingBoxes = sort_contours(cnts, 'left-to-right')
139 |     for c in cnts:
140 |         peri = cv2.arcLength(c, True)
141 |         approx = cv2.approxPolyDP(c, 0.06 * peri, True)
142 |         (x, y, w, h) = cv2.boundingRect(approx)
143 |         num_card.append(int(y / line_y_height))
144 |     return num_card
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     num_card = detection_exam_num('out/sub_answer_card_0.jpg')
149 |     print('num_card: ', num_card)
150 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: wangxin
 3 | Date: 2021-05-25 10:29:58
 4 | LastEditTime: 2021-07-01 14:12:15
 5 | LastEditors: Please set LastEditors
 6 | Description: In User Settings Edit
 7 | '''
 8 | 
 9 | from detection_choice_question import get_answer_card_cnts, get_sub_answer_card_cnts, detection_choice_question
10 | from detection_exam_num import detection_exam_num
11 | from settings import TITLE_NUM
12 | from utils import get_init_process_img, capture_img, ocr_single_line_img
13 | from cnocr import CnOcr
14 | 
15 | 
16 | def demo(origin_image_path):
17 |     # 获取答题卡左右区域
18 |     image = get_init_process_img(origin_image_path)
19 |     answer_cnts = get_answer_card_cnts(image)
20 |     answer_card_images_path = []
21 |     if len(answer_cnts) > 0:
22 |         len_answer_cnts = 0
23 |         for c in answer_cnts:
24 |             len_answer_cnts = len_answer_cnts + 1
25 |             answer_card_image_path = 'out/answer_card_' + str(len_answer_cnts) + '.jpg'
26 |             answer_card_images_path.append(answer_card_image_path)
27 |             capture_img(origin_image_path, answer_card_image_path, c)
28 |     print('答题卡左右区域切分结果：', answer_card_images_path)
29 | 
30 |     # 将答题卡切分为一道道试题
31 |     sub_answer_card_images_path = []
32 |     sub_answer_cnt_szie = 0
33 |     for answer_card_image in answer_card_images_path:
34 |         sub_answer_cnts = get_sub_answer_card_cnts(answer_card_image)
35 |         if len(sub_answer_cnts) > 1:
36 |             sub_answer_cnts = sub_answer_cnts[1:len(sub_answer_cnts)]
37 | 
38 |         if len(sub_answer_cnts) > 0:
39 |             for c in sub_answer_cnts:
40 |                 sub_answer_card_image_path = 'out/sub_answer_card_' + str(sub_answer_cnt_szie) + '.jpg'
41 |                 sub_answer_card_images_path.append(sub_answer_card_image_path)
42 |                 capture_img(answer_card_image, sub_answer_card_image_path, c)
43 |                 sub_answer_cnt_szie = sub_answer_cnt_szie + 1
44 |     print('试题切分结果：', sub_answer_card_images_path)
45 | 
46 |     # 获取每个大标题的索引
47 |     ocr = CnOcr()
48 |     title_index = []
49 |     for img in sub_answer_card_images_path:
50 |         res = ocr_single_line_img(img, ocr)
51 |         if len(res) > 0 and res[0] in TITLE_NUM:
52 |             title_index.append(sub_answer_card_images_path.index(img))
53 |     print('每道大题的起始图片索引: ', title_index)
54 |     
55 |     # 学生考号自动识别
56 |     num_card = detection_exam_num(sub_answer_card_images_path[0])
57 |     print('学生考号: ', num_card)
58 | 
59 |     # 选择题自动识别与批改
60 |     question_answer_dict = detection_choice_question(sub_answer_card_images_path, ocr)
61 |     print('每道选择题答案（key 题序, value: 对应题序的答案列表）：', question_answer_dict)
62 |     
63 | 
64 | if __name__ == '__main__':
65 |     demo('pic/answer_card1.jpg')
66 | 


--------------------------------------------------------------------------------
/pic/answer_card.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/answer_card.jpg


--------------------------------------------------------------------------------
/pic/choice_question_detection_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/choice_question_detection_1.jpg


--------------------------------------------------------------------------------
/pic/choice_question_detection_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/choice_question_detection_2.png


--------------------------------------------------------------------------------
/pic/choice_question_detection_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/choice_question_detection_3.png


--------------------------------------------------------------------------------
/pic/choice_question_detection_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/choice_question_detection_4.png


--------------------------------------------------------------------------------
/pic/choice_question_detection_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/choice_question_detection_5.png


--------------------------------------------------------------------------------
/pic/num_card_detection_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/num_card_detection_1.png


--------------------------------------------------------------------------------
/pic/num_card_detection_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/num_card_detection_2.png


--------------------------------------------------------------------------------
/pic/num_card_detection_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/num_card_detection_3.png


--------------------------------------------------------------------------------
/pic/special_choice_question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/special_choice_question.png


--------------------------------------------------------------------------------
/pic/special_num_card_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/special_num_card_1.png


--------------------------------------------------------------------------------
/pic/special_num_card_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/special_num_card_2.png


--------------------------------------------------------------------------------
/pic/sub_answer_card_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_1.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_2.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_3.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_4.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_5.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_6.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_7.jpg


--------------------------------------------------------------------------------
/pic/sub_answer_card_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wasim37/answer_card_detection/b45b2c4fc06f757a78eb0b16188b939bdb17267f/pic/sub_answer_card_8.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python==4.5.2.52
2 | imutils==0.5.4  
3 | pytesseract==0.3.7
4 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: wangxin
 3 | Date: 2021-05-25 10:31:52
 4 | LastEditTime: 2021-07-01 17:04:28
 5 | LastEditors: Please set LastEditors
 6 | Description: In User Settings Edit
 7 | '''
 8 | 
 9 | # coding=utf-8
10 | 
11 | # 图片中的答题卡框数量. 比如一张图片可以划分为左右2个答题卡框
12 | ANSWER_CARD_SIZE = 2
13 | 
14 | # 答题卡框的最小宽度
15 | ANSWER_CARD_MIN_WIDTH = 1200
16 | 
17 | # 大标题序号
18 | TITLE_NUM = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五', '十六', '十七']
19 | 
20 | # 识别左上角大标题序号, 识别范围的宽
21 | TITLE_TOP_LEFT_CORNER_WIDTH = 50
22 | 
23 | # 识别左上角大标题序号, 识别范围的高
24 | TITLE_TOP_LEFT_CORNER_HEIGTH = 65
25 | 


--------------------------------------------------------------------------------
/test/test_cnocr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: your name
 3 | Date: 2021-06-02 17:55:10
 4 | LastEditTime: 2021-06-03 15:40:43
 5 | LastEditors: Please set LastEditors
 6 | Description: In User Settings Edit
 7 | FilePath: \teaching-answer-card-tool\test.py
 8 | '''
 9 | import cv2
10 | from cnocr import CnOcr
11 |  
12 | 
13 | if __name__ == '__main__':
14 |     img = cv2.imread('out/sub_answer_card_1.jpg')
15 |     ocr = CnOcr()
16 |     res = ocr.ocr_for_single_line(img)
17 |     print(res)
18 | 


--------------------------------------------------------------------------------
/test/test_pytesseract.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: your name
  3 | Date: 2021-06-02 17:55:10
  4 | LastEditTime: 2021-06-03 15:40:43
  5 | LastEditors: Please set LastEditors
  6 | Description: In User Settings Edit
  7 | FilePath: \teaching-answer-card-tool\test.py
  8 | '''
  9 | import cv2
 10 | import pytesseract
 11 | from utils import get_init_process_img
 12 | from pytesseract import Output
 13 | from PIL import Image
 14 | from PIL import ImageDraw
 15 | from PIL import ImageFont
 16 | import numpy as np
 17 |  
 18 | # https://livezingy.com/pytesseract-image_to_data_locate_text/
 19 | # pytesseract.pytesseract.tesseract_cmd = r'D:\ProgramData\Tesseract-OCR\tesseract.exe'
 20 | 
 21 | 
 22 | def recoText(im):
 23 |     """
 24 |     识别字符并返回所识别的字符及它们的坐标
 25 |     :param im: 需要识别的图片
 26 |     :return data: 字符及它们在图片的位置
 27 |     """
 28 |     data = {}
 29 |     # im = get_init_process_img(im)
 30 |     # d = pytesseract.image_to_string(im, output_type=Output.DICT, lang='chi_sim')
 31 |     # print(d['text'])
 32 |     
 33 |     dd = pytesseract.image_to_string(im, output_type=Output.DICT, lang="eng")
 34 |     print(dd['text'])
 35 |     # print(d['text'])
 36 |     # for i in range(len(d['text'])):
 37 |     #     print(d['text'][i])
 38 |     #     if 0 < len(d['text'][i]):
 39 |     #         (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
 40 |     #         data[d['text'][i]] = ([d['left'][i], d['top'][i], d['width'][i], d['height'][i]])
 41 |  
 42 |     #         cv2.rectangle(im, (x, y), (x + w, y + h), (255, 0, 0), 1)
 43 |     #         # 使用cv2.putText不能显示中文，需要使用下面的代码代替
 44 |     #         # cv2.putText(im, d['text'][i], (x, y-8), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 0), 1)
 45 |  
 46 |     #         pilimg = Image.fromarray(im)
 47 |     #         draw = ImageDraw.Draw(pilimg)
 48 |     #         # 参数1：字体文件路径，参数2：字体大小
 49 |     #         # Hiragino Sans GB.ttc 为mac下的简体中文
 50 |     #         font = ImageFont.truetype("Hiragino Sans GB.ttc", 15, encoding="utf-8")
 51 |     #         # 参数1：打印坐标，参数2：文本，参数3：字体颜色，参数4：字体
 52 |     #         draw.text((x, y - 10), d['text'][i], (255, 0, 0), font=font)
 53 |     #         im = cv2.cvtColor(np.array(pilimg), cv2.COLOR_RGB2BGR)
 54 |  
 55 |     # cv2.imshow("recoText", im)
 56 |     return data
 57 | 
 58 | 
 59 | def recognize_text(image):
 60 |     # 边缘保留滤波  去噪
 61 |     blur = cv2.pyrMeanShiftFiltering(image, sp=8, sr=60)
 62 |     cv2.imshow('dst', blur)
 63 |     # 灰度图像
 64 |     gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
 65 |     # 二值化  设置阈值  自适应阈值的话 黄色的4会提取不出来
 66 |     ret, binary = cv2.threshold(gray, 185, 255, cv2.THRESH_BINARY_INV)
 67 |     print(f'二值化设置的阈值：{ret}')
 68 |     cv2.imshow('binary', binary)
 69 |     # 逻辑运算  让背景为白色  字体为黑  便于识别
 70 |     cv2.bitwise_not(binary, binary)
 71 |     cv2.imshow('bg_image', binary)
 72 |     # 识别
 73 |     test_message = Image.fromarray(binary)
 74 |     text = pytesseract.image_to_string(test_message)
 75 |     print(f'识别结果：{text}')
 76 |  
 77 |  
 78 | if __name__ == '__main__':
 79 |     # img = cv2.imread('out/sub_answer_card_3.jpg')
 80 |     
 81 |     img = cv2.imread('20210623184007.jpg')
 82 |     
 83 |     # 转灰度
 84 |     # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 85 |     # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)   # 二值化
 86 |     
 87 |     # height, width, deep = img.shape                 # cropImg是从图片里截取的,只包含一行数字
 88 |     # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)      # 转灰度图
 89 |     # dst = np.zeros((height, width, 1), np.uint8)        
 90 |     # for i in range(0, height):                          # 反相 转白底黑字
 91 |     #     for j in range(0, width):
 92 |     #         grayPixel = gray[i, j]
 93 |     #         dst[i, j] = 255 - grayPixel
 94 |     # ret, canny = cv2.threshold(dst, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)   # 二值化
 95 | 
 96 |     
 97 |     # img = cv2.imread('out/test.png')
 98 |     # cv2.imshow("src", img)
 99 |     # data = recoText(img)
100 |     recognize_text(img)
101 |  
102 |     cv2.waitKey(0)
103 |     cv2.destroyAllWindows()
104 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import matplotlib.pyplot as plt
  4 | from settings import TITLE_TOP_LEFT_CORNER_WIDTH, TITLE_TOP_LEFT_CORNER_HEIGTH
  5 | from imutils import auto_canny
  6 | 
  7 | 
  8 | def order_points(pts):
  9 |     """4边形4点排序函数
 10 | 
 11 |     Args:
 12 |         pts ([type]): 4边形任意顺序的4个顶点
 13 | 
 14 |     Returns:
 15 |         [type]: 按照一定顺序的4个顶点
 16 |     """
 17 |     
 18 |     rect = np.zeros((4, 2), dtype="float32")  # 按照左上、右上、右下、左下顺序初始化坐标
 19 | 
 20 |     s = pts.sum(axis=1)  # 计算点xy的和
 21 |     rect[0] = pts[np.argmin(s)]  # 左上角的点的和最小
 22 |     rect[2] = pts[np.argmax(s)]  # 右下角的点的和最大
 23 | 
 24 |     diff = np.diff(pts, axis=1)  # 计算点xy之间的差
 25 |     rect[1] = pts[np.argmin(diff)]  # 右上角的差最小
 26 |     rect[3] = pts[np.argmax(diff)]  # 左下角的差最小
 27 |     return rect  # 返回4个顶点的顺序
 28 | 
 29 | 
 30 | def four_point_transform(image, pts):
 31 |     """4点变换
 32 | 
 33 |     Args:
 34 |         image ([type]): 原始图像
 35 |         pts ([type]): 4个顶点
 36 | 
 37 |     Returns:
 38 |         [type]: 变换后的图像
 39 |     """
 40 |     
 41 |     rect = order_points(pts)  # 获得一致的顺序的点并分别解包他们
 42 |     (tl, tr, br, bl) = rect
 43 | 
 44 |     # 计算新图像的宽度(x)
 45 |     widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2))  # 右下和左下之间距离
 46 |     widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2))  # 右上和左上之间距离
 47 |     maxWidth = max(int(widthA), int(widthB))  # 取大者
 48 | 
 49 |     # 计算新图像的高度(y)
 50 |     heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2))  # 右上和右下之间距离
 51 |     heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2))  # 左上和左下之间距离
 52 |     maxHeight = max(int(heightA), int(heightB))
 53 | 
 54 |     # 有了新图像的尺寸, 构造透视变换后的顶点集合
 55 |     dst = np.array(
 56 |         [
 57 |             [0, 0],  # -------------------------左上
 58 |             [maxWidth - 1, 0],  # --------------右上
 59 |             [maxWidth - 1, maxHeight - 1],  # --右下
 60 |             [0, maxHeight - 1]
 61 |         ],  # ------------左下
 62 |         dtype="float32")
 63 | 
 64 |     M = cv2.getPerspectiveTransform(rect, dst)  # 计算透视变换矩阵
 65 |     warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))  # 执行透视变换
 66 | 
 67 |     return warped  # 返回透视变换后的图像
 68 | 
 69 | 
 70 | def sort_contours(cnts, method="left-to-right"):
 71 |     """轮廓排序
 72 | 
 73 |     Args:
 74 |         cnts ([type]): 轮廓
 75 |         method (str, optional): 排序方式. Defaults to "left-to-right".
 76 | 
 77 |     Returns:
 78 |         [type]: 排序好的轮廓
 79 |     """
 80 |     
 81 |     if cnts is None or len(cnts) == 0:
 82 |         return [], []
 83 |     
 84 |     # 初始化逆序标志和排序索引
 85 |     reverse = False
 86 |     i = 0
 87 | 
 88 |     # 是否需逆序处理
 89 |     if method == "right-to-left" or method == "bottom-to-top":
 90 |         reverse = True
 91 | 
 92 |     # 是否需要按照y坐标函数
 93 |     if method == "top-to-bottom" or method == "bottom-to-top":
 94 |         i = 1
 95 | 
 96 |     # 构造包围框列表，并从上到下对它们进行排序
 97 |     boundingBoxes = [cv2.boundingRect(c) for c in cnts]
 98 |     (cnts, boundingBoxes) = zip(*sorted(
 99 |         zip(cnts, boundingBoxes), key=lambda b: b[1][i], reverse=reverse))
100 | 
101 |     # 返回已排序的轮廓线和边框列表
102 |     return cnts, boundingBoxes
103 | 
104 | 
105 | def get_init_process_img(img_path):
106 |     """
107 |     对图片进行初始化处理，包括灰度，高斯模糊，腐蚀，膨胀和边缘检测等
108 |     :param roi_img: ndarray
109 |     :return: ndarray
110 |     """
111 |     image = cv2.imread(img_path)
112 |     # 转灰度
113 |     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
114 |     # 高斯模糊
115 |     blurred = cv2.GaussianBlur(gray, (5, 5), 0)
116 | 
117 |     # 腐蚀erode与膨胀dilate
118 |     # kernel = np.ones((3, 3), np.uint8)
119 |     # blurred = cv2.erode(blurred, kernel, iterations=1) # 腐蚀
120 |     # blurred = cv2.dilate(blurred, kernel, iterations=2) # 膨胀
121 |     # blurred = cv2.erode(blurred, kernel, iterations=1) # 腐蚀
122 |     # blurred = cv2.dilate(blurred, kernel, iterations=2) # 膨胀
123 | 
124 |     # 边缘检测
125 |     # edged = cv2.Canny(blurred, 75, 200)
126 |     edged = auto_canny(blurred)
127 |     return edged
128 | 
129 | 
130 | def capture_img(origin_image_path, target_image_path, contour):
131 |     """根据轮廓截取图片
132 | 
133 |     Args:
134 |         origin_image_path ([type]): 原始图片路径
135 |         target_image_path ([type]): 目标图片路径
136 |         contour ([type]): 截取轮廓
137 | 
138 |     Returns:
139 |         [type]: [description]
140 |     """
141 |     # 根据轮廓或者坐标
142 |     x, y, w, h = cv2.boundingRect(contour)
143 |     # 截图
144 |     image = cv2.imread(origin_image_path)
145 |     cv2.imwrite(target_image_path, image[y:y + h, x:x + w])
146 | 
147 | 
148 | def save_img_by_cnts(save_image_path, image_size, cnts):
149 |     """通过提取的轮廓绘制图片并保存
150 | 
151 |     Args:
152 |         save_image_path ([type]): 图片存储路径
153 |         image ([type]): 绘制的图片尺寸, 长与宽
154 |         cnts ([type]): 轮廓列表
155 |     """
156 |     black_background = np.ones(image_size, np.uint8) * 0
157 |     cv2.drawContours(black_background, cnts, -1, (255, 255, 255), 2)
158 |     plt.figure(figsize=(10, 5))
159 |     plt.imshow(black_background)
160 |     plt.axis('off')
161 |     plt.savefig(save_image_path)
162 | 
163 | 
164 | def ocr_single_line_img(image_path, ocr):
165 |     """ocr识别图片
166 | 
167 |     Args:
168 |         origin_image_path ([type]): 原始图片路径
169 |         ocr ([type]): ocr
170 | 
171 |     Returns:
172 |         [type]: [description]
173 |     """
174 | 
175 |     image = cv2.imread(image_path)
176 |     res = ocr.ocr_for_single_line(image[0:TITLE_TOP_LEFT_CORNER_WIDTH, 0:TITLE_TOP_LEFT_CORNER_HEIGTH])
177 |     if len(res) > 0 and res[0] == '-':
178 |         res[0] = '一'
179 |     return res
180 | 
181 | 


--------------------------------------------------------------------------------