├── .gitattributes ├── .gitignore ├── README.md ├── TencentAPI.py ├── TencentAPIMsg.py ├── __init__.py └── ai-example.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Scrapy stuff: 60 | .scrapy 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # IPython Notebook 69 | .ipynb_checkpoints 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # celery beat schedule file 75 | celerybeat-schedule 76 | 77 | # dotenv 78 | .env 79 | 80 | # virtualenv 81 | venv/ 82 | ENV/ 83 | 84 | # Spyder project settings 85 | .spyderproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # ========================= 91 | # Operating System Files 92 | # ========================= 93 | 94 | # OSX 95 | # ========================= 96 | 97 | .DS_Store 98 | .AppleDouble 99 | .LSOverride 100 | 101 | # Thumbnails 102 | ._* 103 | 104 | # Files that might appear in the root of a volume 105 | .DocumentRevisions-V100 106 | .fseventsd 107 | .Spotlight-V100 108 | .TemporaryItems 109 | .Trashes 110 | .VolumeIcon.icns 111 | 112 | # Directories potentially created on remote AFP share 113 | .AppleDB 114 | .AppleDesktop 115 | Network Trash Folder 116 | Temporary Items 117 | .apdisk 118 | 119 | # Windows 120 | # ========================= 121 | 122 | # Windows image file caches 123 | Thumbs.db 124 | ehthumbs.db 125 | 126 | # Folder config file 127 | Desktop.ini 128 | 129 | # Recycle Bin used on file shares 130 | $RECYCLE.BIN/ 131 | 132 | # Windows Installer files 133 | *.cab 134 | *.msi 135 | *.msm 136 | *.msp 137 | 138 | # Windows shortcuts 139 | *.lnk 140 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTencentAI 2 | Python3 下实现 腾讯人工智能API 调用 3 | ====================================== 4 | 5 | a、鹅厂近期发布了自己的人工智能 api,包括身份证ocr、名片ocr、文本分析等一堆API,因为前期项目用到图形OCR,遂实现试用了一下,发现准确率还不错,放出来给大家共享一下。 6 | 7 | b、基于python3,跟python2还是有些区别。 8 | 9 | c、特别需要提到的就是签名生成这块,鹅厂的api说明里写的比较简单,一开始在sign的生成上卡了好几天,后来加的官方群,咨询之后才解决。 -------------------------------------------------------------------------------- /TencentAPI.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 腾讯API字典 3 | 4 | 5 | TencentAPI={ 6 | #基本文本分析API 7 | "nlp_wordseg": { 8 | 'APINAME':'分词', #API中文简称 9 | 'APIDESC': '对文本进行智能分词识别,支持基础词与混排词粒度', #API描述 10 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordseg', #API请求URL 11 | 'APIPARA': 'text' #API非公共参数 12 | }, 13 | "nlp_wordpos": { 14 | 'APINAME':'词性标注', 15 | 'APIDESC': '对文本进行分词,同时为每个分词标注正确的词性', 16 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordpos', 17 | 'APIPARA': 'text' 18 | }, 19 | 'nlp_wordner': { 20 | 'APINAME':'专有名词识别', 21 | 'APIDESC': '对文本进行专有名词的分词识别,找出文本中的专有名词', 22 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordner', 23 | 'APIPARA': 'text' 24 | }, 25 | 'nlp_wordsyn': { 26 | 'APINAME':'同义词识别', 27 | 'APIDESC': '识别文本中存在同义词的分词,并返回相应的同义词', 28 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordsyn', 29 | 'APIPARA': 'text' 30 | }, 31 | 32 | #计算机视觉--OCR识别API 33 | "ocr_generalocr": { 34 | 'APINAME':'通用OCR识别', 35 | 'APIDESC': '识别上传图像上面的字段信息', 36 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr', 37 | 'APIPARA': 'image' 38 | }, 39 | "ocr_idcardocr": { 40 | 'APINAME':'身份证OCR识别', 41 | 'APIDESC': '识别身份证图像上面的详细身份信息', 42 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_idcardocr', 43 | 'APIPARA': 'image,card_type' 44 | }, 45 | "ocr_bcocr": { 46 | 'APINAME':'名片OCR识别', 47 | 'APIDESC': '识别名片图像上面的字段信息', 48 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_bcocr', 49 | 'APIPARA': 'image' 50 | }, 51 | "ocr_driverlicenseocr":{ 52 | 'APINAME':'行驶证驾驶证OCR识别', 53 | 'APIDESC': '识别行驶证或驾驶证图像上面的字段信息', 54 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_driverlicenseocr', 55 | 'APIPARA': 'image,type' 56 | }, 57 | "ocr_bizlicenseocr":{ 58 | 'APINAME':'营业执照OCR识别', 59 | 'APIDESC': '识别营业执照上面的字段信息', 60 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_bizlicenseocr', 61 | 'APIPARA': 'image' 62 | }, 63 | "ocr_creditcardocr":{ 64 | 'APINAME':'银行卡OCR识别', 65 | 'APIDESC': '识别银行卡上面的字段信息', 66 | 'APIURL': 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_creditcardocr', 67 | 'APIPARA': 'image' 68 | }, 69 | } 70 | -------------------------------------------------------------------------------- /TencentAPIMsg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | create by : joshua zou 5 | create date : 2017.11.28 6 | Purpose: check tecent ai api 7 | ''' 8 | 9 | 10 | import requests 11 | import base64 12 | import hashlib 13 | import time 14 | import random 15 | import os,string,glob 16 | from PIL import Image 17 | from io import BytesIO 18 | from urllib.parse import urlencode 19 | from urllib import parse 20 | import json 21 | 22 | 23 | class TencentAPIMsg(object): 24 | def __init__(self,AppID=None,AppKey=None): 25 | ''' 26 | 图形OCR 27 | appid=000 28 | AppKey=0000 29 | ''' 30 | if not AppID: AppID = '1100000000000' 31 | if not AppKey: AppKey = 'ZV1w0000000000' 32 | self.__app_id= AppID 33 | self.__app_key= AppKey 34 | self.__img_base64str=None 35 | 36 | def get_random_str(self): 37 | """ 随机生成16位字符串 38 | @return: 16位字符串 39 | """ 40 | rule = string.ascii_lowercase + string.digits 41 | str = random.sample(rule, 32) 42 | return "".join(str) 43 | 44 | def get_time_stamp(self): 45 | return str(int(time.time())) 46 | 47 | def __get_image_base64str__(self,image): 48 | if not isinstance(image,Image):return None 49 | outputBuffer = BytesIO() 50 | bg.save(outputBuffer, format='JPEG') 51 | imgbase64 = base64.b64encode(outputBuffer.getvalue()) 52 | return imgbase64 53 | 54 | def __get_imgfile_base64str__(self,image): 55 | if not isinstance(image, str): return None 56 | if not os.path.isfile(image): return None 57 | 58 | with open(image,'rb') as fp: 59 | imgbase64 = base64.b64encode(fp.read()) 60 | return imgbase64 61 | 62 | def get_img_base64str(self,image): 63 | if isinstance(image, str): 64 | self.__img_base64str= self.__get_imgfile_base64str__(image) 65 | elif isinstance(image,Image): 66 | self.__img_base64str= self.__get_imgfile_base64str__(image) 67 | return self.__img_base64str.decode() 68 | 69 | # 生成签名相关算法 70 | def get_param_sign_str(self,param_dict): 71 | sb = ''; 72 | for k in sorted(param_dict.keys()): 73 | if (0 < len(sb)): 74 | sb += '&' + k + '=' + parse.quote_plus(param_dict[k]); 75 | else: 76 | sb += k + '=' + parse.quote_plus(param_dict[k]) 77 | sign_str = self.gen_str_md5(sb + '&app_key=' + self.__app_key) 78 | return sign_str 79 | 80 | # MD5加密方法 81 | def gen_str_md5(self,rawstr): 82 | hash = hashlib.md5() # md5对象,md5不能反解,但是加密是固定的,就是关系是一一对应,所以有缺陷,可以被对撞出来 83 | hash.update(bytes(rawstr, encoding='utf-8')) # 要对哪个字符串进行加密,就放这里 84 | return hash.hexdigest().upper(); 85 | 86 | # 组装字典,MD5加密方法 87 | ''' 88 | ====================================== 89 | tencent获得参数对列表N(字典升级排序) 90 | ====================================== 91 | 1\依照算法第一步要求,对参数对进行排序,得到参数对列表N如下。 92 | 参数名 参数值 93 | app_id 10000 94 | nonce_str 20e3408a79 95 | text 腾讯开放平台 96 | time_stamp 1493449657 97 | 98 | 2\按URL键值拼接字符串T 99 | 依照算法第二步要求,将参数对列表N的参数对进行URL键值拼接,值使用URL编码,URL编码算法用大写字母,例如%E8,而不是小写%e8,得到字符串T如下: 100 | app_id=10000&nonce_str=20e3408a79&text=%E8%85%BE%E8%AE%AF%E5%BC%80%E6%94%BE%E5%B9%B3%E5%8F%B0&time_stamp=1493449657 101 | 102 | 3\拼接应用密钥,得到字符串S 103 | 依照算法第三步要求,将应用密钥拼接到字符串T的尾末,得到字符串S如下。 104 | app_id=10000&nonce_str=20e3408a79&text=%E8%85%BE%E8%AE%AF%E5%BC%80%E6%94%BE%E5%B9%B3%E5%8F%B0&time_stamp=1493449657&app_key=a95eceb1ac8c24ee28b70f7dbba912bf 105 | 106 | 4\计算MD5摘要,得到签名字符串 107 | 依照算法第四步要求,对字符串S进行MD5摘要计算得到签名字符串如。 108 | e8f6f347d549fe514f0c9c452c95da9d 109 | 110 | 5\转化md5签名值大写 111 | 对签名字符串所有字母进行大写转换,得到接口请求签名,结束算法。 112 | E8F6F347D549FE514F0C9C452C95DA9D 113 | 114 | 6\最终请求数据 115 | 在完成签名计算后,即可得到所有接口请求数据,进一步完成API的调用。 116 | text 腾讯开放平台 接口请求数据,UTF-8编码 117 | app_id 10000 应用标识 118 | time_stamp 1493449657 请求时间戳(秒级),用于防止请求重放 119 | nonce_str 20e3408a79 请求随机字符串,用于保证签名不可预测 120 | sign E8F6F347D549FE514F0C9C452C95DA9D 请求签名 121 | ''' 122 | def gen_dict_md5(self,req_dict,app_key): 123 | if not isinstance(req_dict,dict) :return None 124 | if not isinstance(app_key,str) or not app_key:return None 125 | 126 | try: 127 | #方法1,自己写urlencode函数 128 | #md5text =self.get_param_sign_str(req_dict) 129 | 130 | #方法2,先对字典排序,排序之后,写app_key,再urlencode 131 | sort_dict= sorted(req_dict.items(), key=lambda item:item[0], reverse = False) 132 | sort_dict.append(('app_key',app_key)) 133 | sha = hashlib.md5() 134 | rawtext= urlencode(sort_dict).encode() 135 | sha.update(rawtext) 136 | md5text= sha.hexdigest().upper() 137 | #print(1) 138 | #字典可以在函数中改写 139 | if md5text: req_dict['sign']=md5text 140 | return md5text 141 | except Exception as e: 142 | return None 143 | 144 | #生成字典 145 | def init_req_dict(self, req_dict,app_id=None, app_key=None,time_stamp=None, nonce_str=None): 146 | """用MD5算法生成安全签名""" 147 | if not req_dict.get('app_id'): 148 | if not app_id: app_id= self.__app_id 149 | req_dict['app_id']= app_id 150 | 151 | #nonce_str 字典无值 152 | if not req_dict.get('time_stamp'): 153 | if not time_stamp: time_stamp= self.get_time_stamp() 154 | req_dict['time_stamp']= time_stamp 155 | 156 | if not req_dict.get('nonce_str'): 157 | if not nonce_str: nonce_str= self.get_random_str() 158 | req_dict['nonce_str']= nonce_str 159 | #app_key 取系统参数。 160 | if not app_key: app_key= self.__app_key 161 | md5key= self.gen_dict_md5(req_dict, app_key) 162 | return md5key 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /ai-example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | create by : joshua zou 5 | create date : 2017.11.28 6 | Purpose: check tecent ai api 7 | ''' 8 | 9 | 10 | from TencentAPI import * 11 | from TencentAPIMsg import * 12 | 13 | #通用api构造函数 14 | def ExecTecentAPI(*arg,**kwds): 15 | if kwds.get('Apiname'): apiname= kwds.pop('Apiname') 16 | 17 | url = TencentAPI[apiname]['APIURL'] 18 | name = TencentAPI[apiname]['APINAME'] 19 | desc= TencentAPI[apiname]['APIDESC'] 20 | para= TencentAPI[apiname]['APIPARA'] 21 | 22 | tx= TencentAPIMsg(APPID,APPKEY) 23 | 24 | Req_Dict={} 25 | for key in para.split(','): 26 | value=None 27 | #print (kwds) 28 | if kwds.get(key): value = kwds.pop(key) 29 | if key=='image': 30 | #图像获取base64 31 | value= tx.get_img_base64str(value) 32 | if key=='text': 33 | #文本进行GBK编码 34 | value= value.encode('gbk') 35 | 36 | Req_Dict[key]=value 37 | #print (key,value,Req_Dict[key]) 38 | 39 | #生成请求包 40 | sign= tx.init_req_dict(req_dict=Req_Dict) 41 | resp = requests.post(url,data=Req_Dict) 42 | print (name+',API应答码:'+str(resp.json()['ret'])) 43 | text = '' 44 | try : 45 | for each in resp.json()['data']['item_list']: 46 | text = text+'/'+ each['itemstring'] 47 | except : 48 | text = '' 49 | return text 50 | 51 | ''' 52 | 基本文本分析 53 | =========== 54 | 分词 对文本进行智能分词识别,支持基础词与混排词粒度 https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordseg text 55 | 词性标注 对文本进行分词,同时为每个分词标注正确的词性 https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordpos text 56 | 专有名词识别 对文本进行专有名词的分词识别,找出文本中的专有名词 https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordner text 57 | 同义词识别 识别文本中存在同义词的分词,并返回相应的同义词 https://api.ai.qq.com/fcgi-bin/nlp/nlp_wordsyn text 58 | 59 | 60 | 计算机视觉--OCR识别 61 | ==================== 62 | 通用OCR识别 识别上传图像上面的字段信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr image 63 | 身份证OCR识别 识别身份证图像上面的详细身份信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_idcardocr image,card_type(身份证,0-正面,1-反面) 64 | 名片OCR识别 识别名片图像上面的字段信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_bcocr image 65 | 行驶证驾驶证OCR识别 识别行驶证或驾驶证图像上面的字段信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_driverlicenseocr image,type(识别类型,0-行驶证识别,1-驾驶证识别) 66 | 营业执照OCR识别 识别营业执照上面的字段信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_bizlicenseocr image 67 | 银行卡OCR识别 识别银行卡上面的字段信息 https://api.ai.qq.com/fcgi-bin/ocr/ocr_creditcardocr image 68 | ''' 69 | #改成你自己腾讯APPID及APPKEY 70 | APPID='100000000' 71 | APPKEY='ZV1w000000' 72 | 73 | if __name__ == "__main__": 74 | for file in glob.glob('D:\python\guoyaotang\*.jpg'): 75 | rest = ExecTecentAPI(Apiname='ocr_generalocr',image=file) 76 | print (file+rest) --------------------------------------------------------------------------------