├── .gitattributes
├── .gitignore
├── README.md
├── TencentAPI.py
├── TencentAPIMsg.py
├── __init__.py
└── ai-example.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask instance folder
 57 | instance/
 58 | 
 59 | # Scrapy stuff:
 60 | .scrapy
 61 | 
 62 | # Sphinx documentation
 63 | docs/_build/
 64 | 
 65 | # PyBuilder
 66 | target/
 67 | 
 68 | # IPython Notebook
 69 | .ipynb_checkpoints
 70 | 
 71 | # pyenv
 72 | .python-version
 73 | 
 74 | # celery beat schedule file
 75 | celerybeat-schedule
 76 | 
 77 | # dotenv
 78 | .env
 79 | 
 80 | # virtualenv
 81 | venv/
 82 | ENV/
 83 | 
 84 | # Spyder project settings
 85 | .spyderproject
 86 | 
 87 | # Rope project settings
 88 | .ropeproject
 89 | 
 90 | # =========================
 91 | # Operating System Files
 92 | # =========================
 93 | 
 94 | # OSX
 95 | # =========================
 96 | 
 97 | .DS_Store
 98 | .AppleDouble
 99 | .LSOverride
100 | 
101 | # Thumbnails
102 | ._*
103 | 
104 | # Files that might appear in the root of a volume
105 | .DocumentRevisions-V100
106 | .fseventsd
107 | .Spotlight-V100
108 | .TemporaryItems
109 | .Trashes
110 | .VolumeIcon.icns
111 | 
112 | # Directories potentially created on remote AFP share
113 | .AppleDB
114 | .AppleDesktop
115 | Network Trash Folder
116 | Temporary Items
117 | .apdisk
118 | 
119 | # Windows
120 | # =========================
121 | 
122 | # Windows image file caches
123 | Thumbs.db
124 | ehthumbs.db
125 | 
126 | # Folder config file
127 | Desktop.ini
128 | 
129 | # Recycle Bin used on file shares
130 | $RECYCLE.BIN/
131 | 
132 | # Windows Installer files
133 | *.cab
134 | *.msi
135 | *.msm
136 | *.msp
137 | 
138 | # Windows shortcuts
139 | *.lnk
140 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyTencentAI
2 | Python3 下实现 腾讯人工智能API 调用
3 | ======================================
4 | 
5 | a、鹅厂近期发布了自己的人工智能 api，包括身份证ocr、名片ocr、文本分析等一堆API，因为前期项目用到图形OCR，遂实现试用了一下，发现准确率还不错，放出来给大家共享一下。
6 | 
7 | b、基于python3，跟python2还是有些区别。
8 | 
9 | c、特别需要提到的就是签名生成这块，鹅厂的api说明里写的比较简单，一开始在sign的生成上卡了好几天，后来加的官方群，咨询之后才解决。


--------------------------------------------------------------------------------
/TencentAPI.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # 腾讯API字典
 3 | 
 4 | 
 5 | TencentAPI={
 6 |     #基本文本分析API
 7 |     "nlp_wordseg":    {
 8 |         'APINAME':'分词', #API中文简称
 9 |         'APIDESC': '对文本进行智能分词识别，支持基础词与混排词粒度', #API描述
10 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordseg', #API请求URL
11 |         'APIPARA': 'text' #API非公共参数
12 |     },
13 |     "nlp_wordpos":    {
14 |         'APINAME':'词性标注',
15 |         'APIDESC': '对文本进行分词，同时为每个分词标注正确的词性',
16 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordpos',
17 |         'APIPARA': 'text'
18 |     },
19 |     'nlp_wordner':    {
20 |         'APINAME':'专有名词识别',
21 |         'APIDESC': '对文本进行专有名词的分词识别，找出文本中的专有名词',
22 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordner',
23 |         'APIPARA': 'text'
24 |     },
25 |     'nlp_wordsyn':    {
26 |         'APINAME':'同义词识别',
27 |         'APIDESC': '识别文本中存在同义词的分词，并返回相应的同义词',
28 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordsyn',
29 |         'APIPARA': 'text'
30 |     },
31 |     
32 |     #计算机视觉--OCR识别API
33 |     "ocr_generalocr":    {
34 |         'APINAME':'通用OCR识别',
35 |         'APIDESC': '识别上传图像上面的字段信息',
36 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr',
37 |         'APIPARA': 'image'
38 |     },
39 |     "ocr_idcardocr":    {
40 |         'APINAME':'身份证OCR识别',
41 |         'APIDESC': '识别身份证图像上面的详细身份信息',
42 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_idcardocr',
43 |         'APIPARA': 'image,card_type'
44 |     },
45 |     "ocr_bcocr":    {
46 |         'APINAME':'名片OCR识别',
47 |         'APIDESC': '识别名片图像上面的字段信息',
48 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_bcocr',
49 |         'APIPARA': 'image'
50 |     },
51 |     "ocr_driverlicenseocr":{
52 |         'APINAME':'行驶证驾驶证OCR识别',
53 |         'APIDESC': '识别行驶证或驾驶证图像上面的字段信息',
54 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_driverlicenseocr',
55 |         'APIPARA': 'image,type'
56 |     },
57 |     "ocr_bizlicenseocr":{
58 |         'APINAME':'营业执照OCR识别',
59 |         'APIDESC': '识别营业执照上面的字段信息',
60 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_bizlicenseocr',
61 |         'APIPARA': 'image'
62 |     },
63 |     "ocr_creditcardocr":{
64 |         'APINAME':'银行卡OCR识别',
65 |         'APIDESC': '识别银行卡上面的字段信息',
66 |         'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_creditcardocr',
67 |         'APIPARA': 'image'
68 |     },
69 | }
70 | 


--------------------------------------------------------------------------------
/TencentAPIMsg.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''
  4 | create by : joshua zou
  5 | create date : 2017.11.28
  6 | Purpose: check tecent ai api
  7 | '''
  8 | 
  9 | 
 10 | import requests
 11 | import base64
 12 | import hashlib
 13 | import time
 14 | import random
 15 | import os,string,glob
 16 | from PIL import Image 
 17 | from io import BytesIO
 18 | from urllib.parse import urlencode
 19 | from urllib import parse
 20 | import json
 21 | 
 22 | 
 23 | class TencentAPIMsg(object):
 24 |     def __init__(self,AppID=None,AppKey=None):
 25 |         '''
 26 |         图形OCR
 27 |         appid=000
 28 |         AppKey=0000
 29 |         '''
 30 |         if not AppID: AppID = '1100000000000'
 31 |         if not AppKey: AppKey = 'ZV1w0000000000'
 32 |         self.__app_id= AppID 
 33 |         self.__app_key= AppKey 
 34 |         self.__img_base64str=None
 35 |         
 36 |     def get_random_str(self):
 37 |         """ 随机生成16位字符串
 38 |         @return: 16位字符串
 39 |         """
 40 |         rule = string.ascii_lowercase + string.digits
 41 |         str = random.sample(rule, 32)
 42 |         return "".join(str)
 43 |     
 44 |     def get_time_stamp(self):
 45 |         return str(int(time.time()))
 46 |     
 47 |     def __get_image_base64str__(self,image):
 48 |         if not isinstance(image,Image):return None 
 49 |         outputBuffer = BytesIO()
 50 |         bg.save(outputBuffer, format='JPEG')
 51 |         imgbase64 = base64.b64encode(outputBuffer.getvalue())
 52 |         return imgbase64
 53 |     
 54 |     def __get_imgfile_base64str__(self,image):
 55 |         if not isinstance(image, str): return None
 56 |         if not os.path.isfile(image): return None
 57 | 
 58 |         with open(image,'rb') as fp:
 59 |             imgbase64 = base64.b64encode(fp.read())
 60 |             return imgbase64
 61 |         
 62 |     def get_img_base64str(self,image):
 63 |         if isinstance(image, str): 
 64 |             self.__img_base64str= self.__get_imgfile_base64str__(image)
 65 |         elif isinstance(image,Image):
 66 |             self.__img_base64str= self.__get_imgfile_base64str__(image)
 67 |         return self.__img_base64str.decode()
 68 |     
 69 |     # 生成签名相关算法
 70 |     def get_param_sign_str(self,param_dict):
 71 |         sb = '';
 72 |         for k in sorted(param_dict.keys()):
 73 |             if (0 < len(sb)):
 74 |                 sb += '&' + k + '=' + parse.quote_plus(param_dict[k]);
 75 |             else:
 76 |                 sb += k + '=' + parse.quote_plus(param_dict[k])
 77 |         sign_str = self.gen_str_md5(sb + '&app_key=' + self.__app_key)
 78 |         return sign_str
 79 | 
 80 |     # MD5加密方法
 81 |     def gen_str_md5(self,rawstr):
 82 |         hash = hashlib.md5()  # md5对象，md5不能反解，但是加密是固定的，就是关系是一一对应，所以有缺陷，可以被对撞出来
 83 |         hash.update(bytes(rawstr, encoding='utf-8'))  # 要对哪个字符串进行加密，就放这里
 84 |         return hash.hexdigest().upper();
 85 | 
 86 |     # 组装字典，MD5加密方法
 87 |     '''
 88 |     ======================================
 89 |     tencent获得参数对列表N（字典升级排序）
 90 |     ======================================
 91 |     1\依照算法第一步要求，对参数对进行排序，得到参数对列表N如下。
 92 |     参数名 	参数值
 93 |     app_id 	10000
 94 |     nonce_str 	20e3408a79
 95 |     text 	腾讯开放平台
 96 |     time_stamp 	1493449657
 97 |     
 98 |     2\按URL键值拼接字符串T
 99 |     依照算法第二步要求，将参数对列表N的参数对进行URL键值拼接，值使用URL编码，URL编码算法用大写字母，例如%E8，而不是小写%e8，得到字符串T如下：
100 |     app_id=10000&nonce_str=20e3408a79&text=%E8%85%BE%E8%AE%AF%E5%BC%80%E6%94%BE%E5%B9%B3%E5%8F%B0&time_stamp=1493449657
101 |     
102 |     3\拼接应用密钥，得到字符串S
103 |     依照算法第三步要求，将应用密钥拼接到字符串T的尾末，得到字符串S如下。
104 |     app_id=10000&nonce_str=20e3408a79&text=%E8%85%BE%E8%AE%AF%E5%BC%80%E6%94%BE%E5%B9%B3%E5%8F%B0&time_stamp=1493449657&app_key=a95eceb1ac8c24ee28b70f7dbba912bf
105 |     
106 |     4\计算MD5摘要，得到签名字符串
107 |     依照算法第四步要求，对字符串S进行MD5摘要计算得到签名字符串如。
108 |     e8f6f347d549fe514f0c9c452c95da9d
109 |     
110 |     5\转化md5签名值大写
111 |     对签名字符串所有字母进行大写转换，得到接口请求签名，结束算法。
112 |     E8F6F347D549FE514F0C9C452C95DA9D
113 |     
114 |     6\最终请求数据
115 |     在完成签名计算后，即可得到所有接口请求数据，进一步完成API的调用。
116 |     text 	腾讯开放平台 	接口请求数据，UTF-8编码
117 |     app_id 	10000 	应用标识
118 |     time_stamp 	1493449657 	请求时间戳（秒级），用于防止请求重放
119 |     nonce_str 	20e3408a79 	请求随机字符串，用于保证签名不可预测
120 |     sign 	E8F6F347D549FE514F0C9C452C95DA9D 	请求签名    
121 |     '''
122 |     def gen_dict_md5(self,req_dict,app_key):
123 |         if not isinstance(req_dict,dict) :return None 
124 |         if not isinstance(app_key,str) or not app_key:return None 
125 |         
126 |         try:
127 |             #方法1，自己写urlencode函数
128 |             #md5text =self.get_param_sign_str(req_dict)
129 |             
130 |             #方法2，先对字典排序，排序之后，写app_key，再urlencode
131 |             sort_dict= sorted(req_dict.items(), key=lambda item:item[0], reverse = False)
132 |             sort_dict.append(('app_key',app_key))
133 |             sha = hashlib.md5()
134 |             rawtext= urlencode(sort_dict).encode()
135 |             sha.update(rawtext)
136 |             md5text= sha.hexdigest().upper()
137 |             #print(1)
138 |             #字典可以在函数中改写
139 |             if md5text: req_dict['sign']=md5text
140 |             return md5text
141 |         except Exception as e:
142 |             return   None
143 | 
144 |     #生成字典
145 |     def init_req_dict(self, req_dict,app_id=None, app_key=None,time_stamp=None, nonce_str=None):
146 |         """用MD5算法生成安全签名"""
147 |         if not req_dict.get('app_id'): 
148 |             if not app_id: app_id= self.__app_id
149 |             req_dict['app_id']= app_id
150 |        
151 |         #nonce_str 字典无值
152 |         if not req_dict.get('time_stamp'): 
153 |             if not time_stamp: time_stamp= self.get_time_stamp()
154 |             req_dict['time_stamp']= time_stamp
155 |         
156 |         if not req_dict.get('nonce_str'): 
157 |             if not nonce_str: nonce_str= self.get_random_str()
158 |             req_dict['nonce_str']= nonce_str
159 |         #app_key 取系统参数。
160 |         if not app_key: app_key= self.__app_key        
161 |         md5key= self.gen_dict_md5(req_dict, app_key)
162 |         return md5key
163 |         
164 | 
165 | 
166 | 
167 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/ai-example.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | '''
 4 | create by : joshua zou
 5 | create date : 2017.11.28
 6 | Purpose: check tecent ai api
 7 | '''
 8 | 
 9 | 
10 | from TencentAPI import * 
11 | from TencentAPIMsg import *
12 | 
13 | #通用api构造函数
14 | def ExecTecentAPI(*arg,**kwds):
15 |     if kwds.get('Apiname'): apiname= kwds.pop('Apiname')
16 |     
17 |     url = TencentAPI[apiname]['APIURL']
18 |     name = TencentAPI[apiname]['APINAME']
19 |     desc= TencentAPI[apiname]['APIDESC']
20 |     para= TencentAPI[apiname]['APIPARA']
21 |     
22 |     tx= TencentAPIMsg(APPID,APPKEY)
23 | 
24 |     Req_Dict={}
25 |     for key in para.split(','):
26 |         value=None
27 |         #print (kwds)
28 |         if kwds.get(key):  value = kwds.pop(key)
29 |         if key=='image': 
30 |             #图像获取base64
31 |             value= tx.get_img_base64str(value)
32 |         if key=='text':
33 |             #文本进行GBK编码
34 |             value= value.encode('gbk')
35 |        
36 |         Req_Dict[key]=value        
37 |         #print (key,value,Req_Dict[key])
38 |         
39 |     #生成请求包
40 |     sign= tx.init_req_dict(req_dict=Req_Dict)
41 |     resp = requests.post(url,data=Req_Dict)
42 |     print (name+',API应答码:'+str(resp.json()['ret']))
43 |     text = ''
44 |     try :
45 |         for each in resp.json()['data']['item_list']:
46 |             text = text+'/'+ each['itemstring']
47 |     except :
48 |         text = ''
49 |     return  text 
50 |     
51 | '''
52 | 基本文本分析
53 | ===========
54 | 分词 	对文本进行智能分词识别，支持基础词与混排词粒度 	https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordseg text
55 | 词性标注 	对文本进行分词，同时为每个分词标注正确的词性 	https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordpos text
56 | 专有名词识别 	对文本进行专有名词的分词识别，找出文本中的专有名词 	https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordner text
57 | 同义词识别 	识别文本中存在同义词的分词，并返回相应的同义词 	https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordsyn text
58 | 
59 | 
60 | 计算机视觉--OCR识别
61 | ====================
62 | 通用OCR识别 	识别上传图像上面的字段信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr image
63 | 身份证OCR识别 	识别身份证图像上面的详细身份信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_idcardocr image,card_type(身份证，0-正面，1-反面)
64 | 名片OCR识别 	识别名片图像上面的字段信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_bcocr image
65 | 行驶证驾驶证OCR识别 	识别行驶证或驾驶证图像上面的字段信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_driverlicenseocr image,type(识别类型，0-行驶证识别，1-驾驶证识别)
66 | 营业执照OCR识别 	识别营业执照上面的字段信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_bizlicenseocr image
67 | 银行卡OCR识别 	识别银行卡上面的字段信息 	https://api.ai.qq.com/fcgi-bin/ocr/ocr_creditcardocr image
68 | '''
69 | #改成你自己腾讯APPID及APPKEY
70 | APPID='100000000'
71 | APPKEY='ZV1w000000'
72 | 
73 | if __name__ == "__main__":
74 |     for file in glob.glob('D:\python\guoyaotang\*.jpg'):
75 |         rest = ExecTecentAPI(Apiname='ocr_generalocr',image=file)
76 |         print (file+rest)


--------------------------------------------------------------------------------