├── IDrecognition
    ├── 1.jpg
    ├── 10.jpg
    ├── 11.jpg
    ├── 2.jpg
    ├── 3.jpg
    ├── 3.png
    ├── 4.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 7.jpg
    ├── video.py
    ├── readme.html
    └── IDrec.py
└── README.md


/IDrecognition/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/1.jpg


--------------------------------------------------------------------------------
/IDrecognition/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/10.jpg


--------------------------------------------------------------------------------
/IDrecognition/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/11.jpg


--------------------------------------------------------------------------------
/IDrecognition/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/2.jpg


--------------------------------------------------------------------------------
/IDrecognition/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/3.jpg


--------------------------------------------------------------------------------
/IDrecognition/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/3.png


--------------------------------------------------------------------------------
/IDrecognition/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/4.jpg


--------------------------------------------------------------------------------
/IDrecognition/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/5.jpg


--------------------------------------------------------------------------------
/IDrecognition/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/6.jpg


--------------------------------------------------------------------------------
/IDrecognition/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lvxiaojie111/OCR/HEAD/IDrecognition/7.jpg


--------------------------------------------------------------------------------
/IDrecognition/video.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | #createor:     junjielv199108
 3 | #reate time:  2020/10/01/15/59
 4 | ###############################################################################
 5 | import cv2
 6 | import numpy
 7 | cap = cv2.VideoCapture(0)# 调整参数实现读取视频或调用摄像头
 8 | while 1:
 9 |     ret, frame = cap.read()
10 |     try:
11 |         if ret:
12 |             cv2.imshow("cap", frame)
13 |             if cv2.waitKey(100) & 0xff == ord('q'):
14 |                 break;
15 |     except:
16 |         print(1)
17 | cap.release()
18 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/IDrecognition/readme.html:
--------------------------------------------------------------------------------
 1 | crater:junjielv  2020/10/04
 2 | 2020/10/4  身份证识别项目 ----v1.0
 3 | 1、所需库：pytesseract.py
 4 |    安装：pip install pytesseract
 5 | 2、运行video.py代码，调试在线摄像头
 6 | 3、运行IDrec.py,进行身份证识别
 7 | 功能1：图片处理后识别
 8 | 功能2：图片直接识别
 9 | 功能3：视频在线识别
10 | 
11 | 2020/10/5
12 | 加：
13 | 1、中文文字识别（下载chi_sim.traineddata中文文字库：https://tesseract-ocr.github.io/tessdoc/Data-Files）
14 | github官方很慢，可参考：https://pan.baidu.com/s/1uuSTBNo3byJib4f8eRSIFw ,密：8v8u
15 | 可参考：https://blog.csdn.net/qq_38161040/article/details/90668765
16 | 
17 | 
18 | 常见错误：
19 | 1）tesseract is not installed or it's not in your path
20 | ①需要在 window本地环境下安装：Tesseract-OCR  下载地址：https://github.com/tesseract-ocr/tesseract/wiki
21 | ②解压安装，并设置环境变量，可参考：https://blog.csdn.net/weixin_40569991/article/details/82082173
22 | ③打开anaconda\envs\yourvirenv\site-packages\pytesseract\pytesseract.py文件;将tesseract_cmd = r'd\Tesseract-OCR\tesseract.exe'
23 | ④重新在python中运行Idrec.py即可成功.
24 | 
25 | 优点：
26 | 1、对身份证件的识别要好一些
27 | 2、对标准的大字体识别好些
28 | 
29 | 缺点：
30 | 1、对广告拍字体的识别 效果特别差
31 | 2、对车牌的识别也特别差
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OCR
 2 | 身份证文字识别项目
 3 | 
 4 | crater:junjielv  2020/10/04
 5 | 2020/10/4  身份证识别项目 ----v1.0
 6 | 
 7 | 1、所需库：pytesseract.py
 8 |    安装：pip install pytesseract
 9 |    
10 | 2、运行video.py代码，调试在线摄像头
11 | 
12 | 3、运行IDrec.py,进行身份证识别
13 | 
14 | 功能1：图片处理后识别
15 | 
16 | 功能2：图片直接识别
17 | 
18 | 功能3：视频在线识别
19 | 
20 | 2020/10/5
21 | 加：
22 | 
23 | 1、中文文字识别（下载chi_sim.traineddata中文文字库：https://tesseract-ocr.github.io/tessdoc/Data-Files）
24 | github官方很慢，可参考：https://pan.baidu.com/s/1uuSTBNo3byJib4f8eRSIFw ,密：8v8u
25 | 可参考：https://blog.csdn.net/qq_38161040/article/details/90668765
26 | 
27 | 
28 | 常见错误：
29 | 
30 | 1）tesseract is not installed or it's not in your path
31 | 
32 | ①需要在 window本地环境下安装：Tesseract-OCR  下载地址：https://github.com/tesseract-ocr/tesseract/wiki
33 | ②解压安装，并设置环境变量，可参考：https://blog.csdn.net/weixin_40569991/article/details/82082173
34 | ③打开anaconda\envs\yourvirenv\site-packages\pytesseract\pytesseract.py文件;将tesseract_cmd = r'd\Tesseract-OCR\tesseract.exe'
35 | ④重新在python中运行Idrec.py即可成功.
36 | 
37 | 优点：
38 | 
39 | 1、对身份证件的识别要好一些
40 | 2、对标准的大字体识别好些
41 | 
42 | 缺点：
43 | 1、对广告拍字体的识别 效果特别差
44 | 2、对车牌的识别也特别差
45 | 


--------------------------------------------------------------------------------
/IDrecognition/IDrec.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/python
  2 | #-*-coding:utf-8-*-
  3 | import sys
  4 | # import importlib
  5 | # importlib.reload(sys)
  6 | # sys.setdefaultencoding('utf-8')
  7 | #createor:     junjielv199108
  8 | #create time:  2020/10/01/15/59
  9 | import time
 10 | time1 = time.time()
 11 | from PIL import Image
 12 | import pytesseract#ocr字符识别库
 13 | import cv2
 14 | ###########二值化算法
 15 | def binarizing(img,threshold):
 16 |     pixdata = img.load()
 17 |     w, h = img.size
 18 |     for y in range(h):
 19 |         for x in range(w):
 20 |             if pixdata[x, y] < threshold:
 21 |                 pixdata[x, y] = 0
 22 |             else:
 23 |                 pixdata[x, y] = 255
 24 |     return img
 25 | 
 26 | ###########去除干扰线算法
 27 | def depoint(img):   #input: gray image
 28 |     pixdata = img.load()
 29 |     w,h = img.size
 30 |     for y in range(1,h-1):
 31 |         for x in range(1,w-1):
 32 |             count = 0
 33 |             if pixdata[x,y-1] > 245:
 34 |                 count = count + 1
 35 |             if pixdata[x,y+1] > 245:
 36 |                 count = count + 1
 37 |             if pixdata[x-1,y] > 245:
 38 |                 count = count + 1
 39 |             if pixdata[x+1,y] > 245:
 40 |                 count = count + 1
 41 |             if count > 2:
 42 |                 pixdata[x,y] = 255
 43 |     return img
 44 | ########身份证号码识别
 45 | def identity_OCR(pic_path):
 46 |     #####身份证号码截图
 47 |     img1=Image.open(pic_path)
 48 |     w,h=img1.size
 49 |     ##将身份证放大3倍
 50 |     out=img1.resize((w*3,h*3),Image.ANTIALIAS)
 51 |     Image._show(out)
 52 |     cv2.waitKey(0)
 53 |     region = (125*3,200*3,370*3,250*3)
 54 |     #裁切身份证号码图片
 55 |     cropImg = out.crop(region)
 56 |     # print(cropImg)
 57 |     # Image._show(cropImg)
 58 |     # cv2.waitKey(0)
 59 |     # 转化为灰度图
 60 |     img= cropImg.convert('L')
 61 |     Image._show(img)
 62 |     cv2.waitKey(0)
 63 |     # 把图片变成二值图像。
 64 |     img1=binarizing(img,100)
 65 |     img2=depoint(img)
 66 |     code = pytesseract.image_to_string(img2)
 67 |     print("识别该身份证号码是:"+str(code))
 68 | '''
 69 | ########身份证号码识别###################
 70 | ##########英文图片识别####################   
 71 | '''
 72 | def identity_OCR_Nopro(pic_path):
 73 |     image = Image.open(pic_path)
 74 |     content = pytesseract.image_to_string(image)  # 解析图片
 75 |     print(content)
 76 | '''
 77 | #####################中文图片识别###############################
 78 | 要在pytesseract 库的 image_to_string() 方法里加个参数lang='chi_sim'，
 79 | 这个就是引用对应的中文语言包，中文语言包的全名是 chi_sim.traineddata
 80 | '''
 81 | def identity_OCR_Chine(pic_path):
 82 |     from PIL import Image
 83 |     import pytesseract
 84 |     image = Image.open(pic_path)
 85 |     content = pytesseract.image_to_string(image, lang='chi_sim')  # 解析图片
 86 |     print(content)
 87 | 
 88 | '''
 89 | ##############功能：video recognition#############
 90 | '''
 91 | def identity_OCR_Video(pic_path):
 92 |     vid = cv2.VideoCapture(pic_path)
 93 |     while True:
 94 |         try:
 95 |             return_value, frame = vid.read()
 96 |             if return_value:
 97 |                 cv2.imshow("result", frame)
 98 |                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 99 | 
100 |                 # image = Image.fromarray(frame)
101 |                 # result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
102 |                 code = pytesseract.image_to_string(frame,lang='chi_sim')
103 |                 print("识别该身份证号码是:" + str(code))
104 |                 cv2.waitKey(100)
105 |             else:
106 |                 raise ValueError("No image!")
107 |         except:
108 |             print()
109 | if __name__ == '__main__':
110 |     pic_path="./11.jpg"
111 |     # identity_OCR(pic_path)
112 |     # identity_OCR_Video(0)
113 |     identity_OCR_Chine(pic_path)
114 |     # identity_OCR_Nopro(pic_path)
115 | 
116 |     time2 = time.time()
117 |     print(u'总共耗时：' + str(time2 - time1) + 's')
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------