├── asr ├── __init__.py ├── flash_recognizer.py └── speech_recognizer.py ├── soe ├── __init__.py └── speaking_assessment.py ├── tts ├── __init__.py ├── speech_synthesizer.py ├── speech_synthesizer_ws.py └── flowing_speech_synthesizer.py ├── vc ├── __init__.py └── speech_convertor_ws.py ├── __init__.py ├── common ├── __init__.py ├── utils.py ├── credential.py └── log.py ├── .gitignore ├── examples ├── asr │ ├── test.wav │ ├── flashexample.py │ └── asrexample.py ├── vc │ ├── test.wav │ └── vcexample.py ├── tts │ ├── tts_text.txt │ ├── ttsexample.py │ └── ttsexample_flowing.py └── soe │ └── soeexample.py ├── readme.md └── LICENSE /asr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /soe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | #!#-*-coding:utf-8 -*- 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .DS_Store 3 | .idea 4 | *.wav 5 | *.mp3 6 | *.pcm 7 | *.log -------------------------------------------------------------------------------- /examples/asr/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-python/HEAD/examples/asr/test.wav -------------------------------------------------------------------------------- /examples/vc/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-python/HEAD/examples/vc/test.wav -------------------------------------------------------------------------------- /common/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | def is_python3(): 5 | if sys.version > '3': 6 | return True 7 | return False -------------------------------------------------------------------------------- /common/credential.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | class Credential: 3 | def __init__(self, secret_id, secret_key, token=""): 4 | self.secret_id = secret_id 5 | self.secret_key = secret_key 6 | self.token = token 7 | -------------------------------------------------------------------------------- /common/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | 4 | 5 | 6 | FORMAT = '%(asctime)15s %(name)s-%(levelname)s %(funcName)s:%(lineno)s %(message)s' 7 | logging.basicConfig(level=logging.DEBUG, format=FORMAT) 8 | logger = logging.getLogger('tencent_speech.log') 9 | 10 | handler = logging.handlers.RotatingFileHandler('tencent_speech.log', maxBytes=1024 * 1024, 11 | backupCount=5, encoding='utf-8') 12 | handler.setLevel(logging.DEBUG) 13 | handler.setFormatter(logging.Formatter(FORMAT)) 14 | logger.addHandler(handler) 15 | logger.setLevel('INFO') 16 | 17 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 3 | 欢迎使用腾讯云语音SDK,腾讯云语音SDK为开发者提供了访问腾讯云语音识别、语音合成等语音服务的配套开发工具,简化腾讯云语音服务的接入流程。 4 | 5 | 本项目是腾讯云语音SDK的python语言版本。 6 | 7 | # 依赖环境 8 | 9 | 1. python 环境 10 | 2. pip命令行安装websocket, websocket-client, requests 11 | 注意:websocket-client必须是0.48版本 12 | 3. 在腾讯云控制台[账号信息](https://console.cloud.tencent.com/developer)页面查看账号APPID,[访问管理](https://console.cloud.tencent.com/cam/capi)页面获取 SecretID 和 SecretKey 。 13 | 14 | 注:TTS websocket,仅支持python3,使用前需安装websocket-client包,如下 15 | ``` 16 | pip3 install websocket-client 17 | ``` 18 | 19 | # 示例 20 | 21 | 参见 [examples](https://github.com/TencentCloud/tencentcloud-speech-sdk-python/tree/master/examples) 目录,该目录下包含各语音服务的示例代码。 22 | -------------------------------------------------------------------------------- /examples/tts/tts_text.txt: -------------------------------------------------------------------------------- 1 | 床前明月光,疑是地上霜。 2 | 举头望明月,低头思故乡。 3 | 红豆生南国,春来发几枝。 4 | 君自故乡来,应知故乡事。 5 | 春眠不觉晓,处处闻啼鸟。 6 | 欲穷千里目,更上一层楼。 7 | 黄河远上白云间,一片孤城万仞山。 8 | 两岸猿声啼不住,轻舟已过万重山。 9 | 独在异乡为异客,每逢佳节倍思亲。 10 | 春城无处不飞花,寒食东风御柳斜。 11 | 月落乌啼霜满天,江枫渔火对愁眠。 12 | 春潮带雨晚来急,野渡无人舟自横。 13 | 洛阳亲友如相问,一片冰心在玉壶。 14 | 空山新雨后,天气晚来秋。 15 | 千里莺啼绿映红,水村山郭酒旗风。 16 | 朝辞白帝彩云间,千里江陵一日还。 17 | 会当凌绝顶,一览众山小。 18 | 野旷天低树,江清月近人。 19 | 国破山河在,城春草木深。 20 | 感时花溅泪,恨别鸟惊心。 21 | 白日依山尽,黄河入海流。 22 | 红豆不堪看,满眼相思泪。 23 | 独怜幽草涧边生,上有黄鹂深树鸣。 24 | 云想衣裳花想容,春风拂槛露华浓。 25 | 夜来风雨声,花落知多少。 26 | 江水流春去欲尽,江潭落月复西斜。 27 | 独在异乡为异客,每逢佳节倍思亲。 28 | 春江潮水连海平,海上明月共潮生。 29 | 此夜曲中闻折柳,何人不起故园情。 30 | 无边落木萧萧下,不尽长江滚滚来。 31 | 春心莫共花争发,一寸相思一寸灰。 32 | 月出惊山鸟,时鸣春涧中。 33 | 江畔何人初见月,江月何年初照人。 34 | 谁家玉笛暗飞声,散入春风满洛城。 35 | 草长莺飞二月天,拂堤杨柳醉春烟。 36 | 春风又绿江南岸,明月何时照我还。 37 | 露从今夜白,月是故乡明。 38 | 君问归期未有期,巴山夜雨涨秋池。 39 | 秋水共长天一色,落霞与孤鹜齐飞。 40 | 月黑雁飞高,单于夜遁逃。 41 | 风急天高猿啸哀,渚清沙白鸟飞回。 42 | 星垂平野阔,月涌大江流。 43 | 秦时明月汉时关,万里长征人未还。 44 | 沧海月明珠有泪,蓝田日暖玉生烟。 45 | 独上江楼思渺然,月光如水水如天。 46 | 相看两不厌,只有敬亭山。 47 | 桃花流水窅然去,别有天地非人间。 48 | 黄鹤一去不复返,白云千载空悠悠。 49 | 朱雀桥边野草花,乌衣巷口夕阳斜。 50 | 绿蚁新醅酒,红泥小火炉。 51 | 姑苏城外寒山寺,夜半钟声到客船。 52 | 银烛秋光冷画屏,轻罗小扇扑流萤。 53 | 碧玉妆成一树高,万条垂下绿丝绦。 54 | 渭城朝雨浥轻尘,客舍青青柳色新。 55 | 忽如一夜春风来,千树万树梨花开。 56 | 故人西辞黄鹤楼,烟花三月下扬州。 57 | 洛阳城里春光好,洛阳才子他乡老。 58 | 江南好,风景旧曾谙。 59 | 江南可采莲,莲叶何田田。 60 | 采得百花成蜜后,为谁辛苦为谁甜。 -------------------------------------------------------------------------------- /examples/asr/flashexample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import time 4 | import sys 5 | import threading 6 | from datetime import datetime 7 | import json 8 | sys.path.append("../..") 9 | from common import credential 10 | from asr import flash_recognizer 11 | 12 | # 注意:使用前务必先填写APPID、SECRET_ID、SECRET_KEY,否则会无法运行!!! 13 | APPID = "" 14 | SECRET_ID = "" 15 | SECRET_KEY = "" 16 | ENGINE_TYPE = "16k_zh" 17 | 18 | if __name__=="__main__": 19 | if APPID == "": 20 | print("Please set APPID!") 21 | exit(0) 22 | if SECRET_ID == "": 23 | print("Please set SECRET_ID!") 24 | exit(0) 25 | if SECRET_KEY == "": 26 | print("Please set SECRET_KEY!") 27 | exit(0) 28 | 29 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 30 | # 新建FlashRecognizer,一个recognizer可以执行N次识别请求 31 | recognizer = flash_recognizer.FlashRecognizer(APPID, credential_var) 32 | 33 | # 新建识别请求 34 | req = flash_recognizer.FlashRecognitionRequest(ENGINE_TYPE) 35 | req.set_filter_modal(0) 36 | req.set_filter_punc(0) 37 | req.set_filter_dirty(0) 38 | req.set_voice_format("wav") 39 | req.set_word_info(0) 40 | req.set_convert_num_mode(1) 41 | 42 | # 音频路径 43 | audio = "./test.wav" 44 | with open(audio, 'rb') as f: 45 | #读取音频数据 46 | data = f.read() 47 | #执行识别 48 | resultData = recognizer.recognize(req, data) 49 | resp = json.loads(resultData) 50 | request_id = resp["request_id"] 51 | code = resp["code"] 52 | if code != 0: 53 | print("recognize faild! request_id: ", request_id, " code: ", code, ", message: ", resp["message"]) 54 | exit(0) 55 | 56 | print("request_id: ", request_id) 57 | #一个channl_result对应一个声道的识别结果 58 | #大多数音频是单声道,对应一个channl_result 59 | for channl_result in resp["flash_result"]: 60 | print("channel_id: ", channl_result["channel_id"]) 61 | print(channl_result["text"]) 62 | -------------------------------------------------------------------------------- /examples/asr/asrexample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 引用 SDK 3 | 4 | import time 5 | import sys 6 | import threading 7 | from datetime import datetime 8 | import json 9 | sys.path.append("../..") 10 | from common import credential 11 | from asr import speech_recognizer 12 | 13 | APPID = "" 14 | SECRET_ID = "" 15 | SECRET_KEY = "" 16 | ENGINE_MODEL_TYPE = "16k_zh" 17 | SLICE_SIZE = 6400 18 | 19 | 20 | class MySpeechRecognitionListener(speech_recognizer.SpeechRecognitionListener): 21 | def __init__(self, id): 22 | self.id = id 23 | 24 | def on_recognition_start(self, response): 25 | print("%s|%s|OnRecognitionStart\n" % ( 26 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'])) 27 | 28 | def on_sentence_begin(self, response): 29 | rsp_str = json.dumps(response, ensure_ascii=False) 30 | print("%s|%s|OnRecognitionSentenceBegin, rsp %s\n" % ( 31 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 32 | 33 | def on_recognition_result_change(self, response): 34 | rsp_str = json.dumps(response, ensure_ascii=False) 35 | print("%s|%s|OnResultChange, rsp %s\n" % (datetime.now().strftime( 36 | "%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 37 | 38 | def on_sentence_end(self, response): 39 | rsp_str = json.dumps(response, ensure_ascii=False) 40 | print("%s|%s|OnSentenceEnd, rsp %s\n" % (datetime.now().strftime( 41 | "%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 42 | 43 | def on_recognition_complete(self, response): 44 | print("%s|%s|OnRecognitionComplete\n" % ( 45 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'])) 46 | 47 | def on_fail(self, response): 48 | rsp_str = json.dumps(response, ensure_ascii=False) 49 | print("%s|%s|OnFail,message %s\n" % (datetime.now().strftime( 50 | "%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 51 | 52 | 53 | def process(id): 54 | audio = "test.wav" 55 | listener = MySpeechRecognitionListener(id) 56 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 57 | recognizer = speech_recognizer.SpeechRecognizer( 58 | APPID, credential_var, ENGINE_MODEL_TYPE, listener) 59 | recognizer.set_filter_modal(1) 60 | recognizer.set_filter_punc(1) 61 | recognizer.set_filter_dirty(1) 62 | recognizer.set_need_vad(1) 63 | #recognizer.set_vad_silence_time(600) 64 | recognizer.set_voice_format(1) 65 | recognizer.set_word_info(1) 66 | #recognizer.set_nonce("12345678") 67 | recognizer.set_convert_num_mode(1) 68 | try: 69 | recognizer.start() 70 | with open(audio, 'rb') as f: 71 | content = f.read(SLICE_SIZE) 72 | while content: 73 | recognizer.write(content) 74 | content = f.read(SLICE_SIZE) 75 | #sleep模拟实际实时语音发送间隔 76 | time.sleep(0.02) 77 | except Exception as e: 78 | print(e) 79 | finally: 80 | recognizer.stop() 81 | 82 | 83 | def process_multithread(number): 84 | thread_list = [] 85 | for i in range(0, number): 86 | thread = threading.Thread(target=process, args=(i,)) 87 | thread_list.append(thread) 88 | thread.start() 89 | 90 | for thread in thread_list: 91 | thread.join() 92 | 93 | 94 | if __name__ == "__main__": 95 | process(0) 96 | # process_multithread(20) 97 | -------------------------------------------------------------------------------- /examples/soe/soeexample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 引用 SDK 3 | 4 | import time 5 | import sys 6 | import threading 7 | from datetime import datetime 8 | import json 9 | sys.path.append("../..") 10 | from common import credential 11 | from soe import speaking_assessment 12 | 13 | #TODO 补充账号信息 14 | APPID = "" 15 | SECRET_ID = "" 16 | SECRET_KEY = "" 17 | # 只有临时秘钥鉴权需要 18 | TOKEN = "" 19 | ENGINE_MODEL_TYPE = "16k_en" 20 | # 对应wav格式200ms音频 21 | SLICE_SIZE = 32000 22 | 23 | 24 | 25 | class MySpeechRecognitionListener(speaking_assessment.SpeakingAssessmentListener): 26 | def __init__(self, id): 27 | self.id = id 28 | 29 | def on_recognition_start(self, response): 30 | print("%s|%s|OnRecognitionStart\n" % ( 31 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'])) 32 | 33 | def on_intermediate_result(self, response): 34 | rsp_str = json.dumps(response, ensure_ascii=False) 35 | print("%s|%s|OnIntermediateResults|%s\n" % ( 36 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 37 | 38 | def on_recognition_complete(self, response): 39 | rsp_str = json.dumps(response, ensure_ascii=False) 40 | print("%s|%s|OnRecognitionComplete| %s\n" % ( 41 | datetime.now().strftime("%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 42 | 43 | def on_fail(self, response): 44 | rsp_str = json.dumps(response, ensure_ascii=False) 45 | print("%s|%s|OnFail,message %s\n" % (datetime.now().strftime( 46 | "%Y-%m-%d %H:%M:%S"), response['voice_id'], rsp_str)) 47 | 48 | # 流式识别模式 49 | def process(id): 50 | audio = "english.wav" 51 | listener = MySpeechRecognitionListener(id) 52 | # 临时秘钥鉴权使用带token的方式 credential_var = credential.Credential(SECRET_ID, SECRET_KEY, TOKEN) 53 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 54 | recognizer = speaking_assessment.SpeakingAssessment( 55 | APPID, credential_var, ENGINE_MODEL_TYPE, listener) 56 | recognizer.set_text_mode(0) 57 | recognizer.set_ref_text("beautiful") 58 | recognizer.set_eval_mode(0) 59 | recognizer.set_keyword("") 60 | recognizer.set_sentence_info_enabled(0) 61 | recognizer.set_voice_format(1) 62 | try: 63 | recognizer.start() 64 | with open(audio, 'rb') as f: 65 | content = f.read(SLICE_SIZE) 66 | while content: 67 | recognizer.write(content) 68 | content = f.read(SLICE_SIZE) 69 | #sleep模拟实际实时语音发送间隔 70 | # 注意:该行sleep代码用于模拟实时音频流1:1产生音频数据(每200ms产生200ms音频) 71 | # 实际音频流场景建议删除该行代码,或业务根据自己的需求情况自行调整 72 | time.sleep(0.2) 73 | except Exception as e: 74 | print(e) 75 | finally: 76 | recognizer.stop() 77 | 78 | # 录音识别模式 79 | def process_rec(id): 80 | audio = "english.wav" 81 | listener = MySpeechRecognitionListener(id) 82 | # 临时秘钥鉴权使用带token的方式 credential_var = credential.Credential(SECRET_ID, SECRET_KEY, TOKEN) 83 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 84 | recognizer = speaking_assessment.SpeakingAssessment( 85 | APPID, credential_var, ENGINE_MODEL_TYPE, listener) 86 | recognizer.set_text_mode(0) 87 | recognizer.set_ref_text("beautiful") 88 | recognizer.set_eval_mode(0) 89 | recognizer.set_keyword("") 90 | recognizer.set_sentence_info_enabled(0) 91 | recognizer.set_voice_format(1) 92 | # 录音识别下可发送单个大长度分片(上限300s), 实时评测建议不要使用此模式 93 | # 单次连接只能发一个分片传送完全部音频数据 94 | recognizer.set_rec_mode(1) 95 | try: 96 | recognizer.start() 97 | with open(audio, 'rb') as f: 98 | content = f.read() 99 | recognizer.write(content) 100 | except Exception as e: 101 | print(e) 102 | finally: 103 | recognizer.stop() 104 | 105 | 106 | def process_multithread(number): 107 | thread_list = [] 108 | for i in range(0, number): 109 | thread = threading.Thread(target=process, args=(i,)) 110 | thread_list.append(thread) 111 | thread.start() 112 | 113 | for thread in thread_list: 114 | thread.join() 115 | 116 | 117 | if __name__ == "__main__": 118 | # 实时识别 119 | process(0) 120 | # 录音识别 121 | process_rec(0) 122 | # process_multithread(20) 123 | -------------------------------------------------------------------------------- /tts/speech_synthesizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import uuid 9 | import requests 10 | 11 | 12 | def is_python3(): 13 | if sys.version > '3': 14 | return True 15 | return False 16 | 17 | 18 | _PROTOCOL = "https://" 19 | _HOST = "tts.cloud.tencent.com" 20 | _PATH = "/stream" 21 | _ACTION = "TextToStreamAudio" 22 | 23 | 24 | class SpeechSynthesisListener: 25 | ''' 26 | reponse: 27 | 所有回调均包含session_id字段 28 | on_message与on_message包含data字段 29 | on_fail包含Code、Message字段。 30 | 31 | 字段名 类型 说明 32 | session_id String 本次请求id 33 | data String 语音数据 34 | Code String 错误码 35 | Message String 错误信息 36 | ''' 37 | 38 | def on_message(self, response): 39 | pass 40 | 41 | def on_complete(self, response): 42 | pass 43 | 44 | def on_fail(self, response): 45 | pass 46 | 47 | 48 | class SpeechSynthesizer: 49 | 50 | def __init__(self, appid, credential, voice_type, listener): 51 | self.appid = appid 52 | self.credential = credential 53 | self.voice_type = voice_type 54 | self.codec = "pcm" 55 | self.sample_rate = 16000 56 | self.volume = 0 57 | self.speed = 0 58 | self.listener = listener 59 | 60 | def set_voice_type(self, voice_type): 61 | self.voice_type = voice_type 62 | 63 | def set_codec(self, codec): 64 | self.codec = codec 65 | 66 | def set_sample_rate(self, sample_rate): 67 | self.sample_rate = sample_rate 68 | 69 | def set_speed(self, speed): 70 | self.speed = speed 71 | 72 | def set_volume(self, volume): 73 | self.volume = volume 74 | 75 | def synthesis(self, text): 76 | session_id = str(uuid.uuid1()) 77 | params = self.__gen_params(session_id, text) 78 | signature = self.__gen_signature(params) 79 | headers = { 80 | "Content-Type": "application/json", 81 | "Authorization": str(signature) 82 | } 83 | url = _PROTOCOL + _HOST + _PATH 84 | r = requests.post(url, headers=headers, 85 | data=json.dumps(params), stream=True) 86 | data = None 87 | response = dict() 88 | response["session_id"] = session_id 89 | for chunk in r.iter_content(None): 90 | if data is None: 91 | try: 92 | rsp = json.loads(chunk) 93 | response["Code"] = rsp["Response"]["Error"]["Code"] 94 | response["Message"] = rsp["Response"]["Error"]["Message"] 95 | self.listener.on_fail(response) 96 | return 97 | except: 98 | data = chunk 99 | response["data"] = data 100 | self.listener.on_message(response) 101 | continue 102 | data = data + chunk 103 | response["data"] = data 104 | self.listener.on_message(response) 105 | response["data"] = data 106 | self.listener.on_complete(response) 107 | 108 | def __gen_signature(self, params): 109 | sort_dict = sorted(params.keys()) 110 | sign_str = "POST" + _HOST + _PATH + "?" 111 | for key in sort_dict: 112 | sign_str = sign_str + key + "=" + str(params[key]) + '&' 113 | sign_str = sign_str[:-1] 114 | hmacstr = hmac.new(self.credential.secret_key.encode('utf-8'), 115 | sign_str.encode('utf-8'), hashlib.sha1).digest() 116 | s = base64.b64encode(hmacstr) 117 | s = s.decode('utf-8') 118 | return s 119 | 120 | def __sign(self, signstr, secret_key): 121 | hmacstr = hmac.new(secret_key.encode('utf-8'), 122 | signstr.encode('utf-8'), hashlib.sha1).digest() 123 | s = base64.b64encode(hmacstr) 124 | s = s.decode('utf-8') 125 | return s 126 | 127 | def __gen_params(self, session_id, text): 128 | params = dict() 129 | params['Action'] = _ACTION 130 | params['AppId'] = int(self.appid) 131 | params['SecretId'] = self.credential.secret_id 132 | params['ModelType'] = 1 133 | params['VoiceType'] = self.voice_type 134 | params['Codec'] = self.codec 135 | params['SampleRate'] = self.sample_rate 136 | params['Speed'] = self.speed 137 | params['Volume'] = self.volume 138 | params['SessionId'] = session_id 139 | params['Text'] = text 140 | 141 | timestamp = int(time.time()) 142 | params['Timestamp'] = timestamp 143 | params['Expired'] = timestamp + 24 * 60 * 60 144 | return params 145 | -------------------------------------------------------------------------------- /examples/vc/vcexample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 引用 SDK 3 | 4 | import sys 5 | sys.path.append("../..") 6 | 7 | import wave 8 | import time 9 | import threading 10 | from common import credential 11 | from vc import speech_convertor_ws 12 | from common.log import logger 13 | from common.utils import is_python3 14 | 15 | 16 | APPID = 0 17 | SECRET_ID = '' 18 | SECRET_KEY = '' 19 | 20 | VOICETYPE = 301005 # 音色类型 21 | CODEC = "pcm" # 音频格式:pcm 22 | SAMPLE_RATE = 16000 # 音频采样率:16000 23 | 24 | 25 | class MySpeechConvertListener(speech_convertor_ws.SpeechConvertListener): 26 | 27 | def __init__(self, id, codec, sample_rate): 28 | self.start_time = time.time() 29 | self.id = id 30 | self.codec = codec.lower() 31 | self.sample_rate = sample_rate 32 | 33 | self.audio_file = "" 34 | self.audio_data = bytes() 35 | 36 | def set_output_file(self, filename): 37 | self.audio_file = filename 38 | 39 | def on_convert_start(self, voice_id): 40 | ''' 41 | voice_id: voice id,类型字符串 42 | ''' 43 | super().on_convert_start(voice_id) 44 | 45 | # TODO 音色变换开始,添加业务逻辑 46 | if not self.audio_file: 47 | self.audio_file = "speech_convert_output." + self.codec 48 | self.audio_data = bytes() 49 | 50 | def on_convert_end(self): 51 | super().on_convert_end() 52 | 53 | # TODO 音色变换结束,添加业务逻辑 54 | logger.info("write audio file, path={}, size={}".format( 55 | self.audio_file, len(self.audio_data) 56 | )) 57 | if self.codec == "pcm": 58 | wav_fp = wave.open(self.audio_file, "wb") 59 | wav_fp.setnchannels(1) 60 | wav_fp.setsampwidth(2) 61 | wav_fp.setframerate(self.sample_rate) 62 | wav_fp.writeframes(self.audio_data) 63 | wav_fp.close() 64 | else: 65 | logger.info("codec {}: service NOT support") 66 | 67 | def on_audio_result(self, audio_bytes): 68 | ''' 69 | audio_bytes: 二进制音频,类型 bytes 70 | ''' 71 | super().on_audio_result(audio_bytes) 72 | 73 | # TODO 接收到二进制音频数据,添加实时播放或保存逻辑 74 | self.audio_data += audio_bytes 75 | 76 | def on_convert_fail(self, response): 77 | ''' 78 | response: 错误,类型 dict,如下 79 | 字段名 类型 80 | Code int 错误码 81 | Message string 错误信息 82 | ''' 83 | super().on_convert_fail(response) 84 | 85 | # TODO 音色变换失败,添加错误处理逻辑 86 | err_code = response["Code"] 87 | err_msg = response["Message"] 88 | 89 | 90 | def process(id, input_audio_file): 91 | # 初始化音色变换监听器 MySpeechConvertListener 与变换服务 SpeechConvertor 92 | listener = MySpeechConvertListener(id, CODEC, SAMPLE_RATE) 93 | listener.set_output_file( 94 | '.'.join(input_audio_file.split('.')[:-1])+'_output_{}.wav'.format(id)) # set output file 95 | 96 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 97 | convertor = speech_convertor_ws.SpeechConvertor( 98 | APPID, credential_var, listener) 99 | convertor.set_voice_type(VOICETYPE) 100 | convertor.set_codec(CODEC) 101 | convertor.set_sample_rate(SAMPLE_RATE) 102 | convertor.start() 103 | 104 | # 等待连接成功 105 | if not convertor.wait_to_send(): 106 | logger.error("wait to send failed") 107 | return 108 | 109 | # 音频要求:16k采样率,单声道,16bit 110 | wavfile = wave.open(input_audio_file, "rb") 111 | sample_rate = wavfile.getframerate() 112 | if sample_rate != 16000: 113 | logger.error("sample rate is not 16000, please resample to 16000") 114 | return 115 | channel_num = wavfile.getnchannels() 116 | if channel_num != 1: 117 | logger.error("channel num is not 1, please convert to mono") 118 | return 119 | sample_width = wavfile.getsampwidth() 120 | if sample_width != 2: 121 | logger.error("sample width is not 2, please convert to 16bit") 122 | return 123 | 124 | # 发送音频:每100ms发送100ms时长(即1:1实时率)的数据包,对应 pcm 大小为 16k采样率3200字节 125 | nframe_per_chunk = sample_rate // 10 # 100ms per chunk 126 | total_frame_num = wavfile.getnframes() 127 | chunk_num = total_frame_num // nframe_per_chunk 128 | logger.info("process send start: chunk_size={}, nframe_per_chunk={}, chunk_num={}".format( 129 | nframe_per_chunk*sample_width, nframe_per_chunk, chunk_num)) 130 | is_end = False 131 | for i in range(0, chunk_num): 132 | if i == chunk_num - 1: 133 | is_end = True 134 | 135 | data = wavfile.readframes(nframe_per_chunk) 136 | if not convertor.send(data, is_end): 137 | logger.error("process send failed, break") 138 | break 139 | time.sleep(0.1) # sleep 100ms 140 | wavfile.close() 141 | logger.info("process send done") 142 | 143 | # 等待接收音色变换完成 144 | convertor.wait() 145 | logger.info("process recv done") 146 | 147 | def process_multithread(number, input_audio_file): 148 | thread_list = [] 149 | for i in range(0, number): 150 | thread = threading.Thread(target=process, args=(i,input_audio_file,)) 151 | thread_list.append(thread) 152 | thread.start() 153 | print(i) 154 | 155 | for thread in thread_list: 156 | thread.join() 157 | 158 | 159 | if __name__ == "__main__": 160 | if not is_python3(): 161 | print("only support python3") 162 | sys.exit(0) 163 | input_audio_file = "test.wav" 164 | process_multithread(1, input_audio_file) 165 | -------------------------------------------------------------------------------- /asr/flash_recognizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import requests 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import random 8 | import os 9 | import json 10 | from common import credential 11 | 12 | #录音识别极速版使用 13 | class FlashRecognitionRequest: 14 | def __init__(self, engine_type): 15 | self.engine_type = engine_type 16 | self.speaker_diarization = 0 17 | self.hotword_id = "" 18 | self.hotword_list = "" 19 | self.input_sample_rate = 0 20 | self.customization_id = "" 21 | self.filter_dirty = 0 22 | self.filter_modal = 0 23 | self.filter_punc = 0 24 | self.convert_num_mode = 1 25 | self.word_info = 0 26 | self.voice_format = "" 27 | self.first_channel_only = 1 28 | self.reinforce_hotword = 0 29 | self.sentence_max_length = 0 30 | 31 | def set_first_channel_only(self, first_channel_only): 32 | self.first_channel_only = first_channel_only 33 | 34 | def set_speaker_diarization(self, speaker_diarization): 35 | self.speaker_diarization = speaker_diarization 36 | 37 | def set_filter_dirty(self, filter_dirty): 38 | self.filter_dirty = filter_dirty 39 | 40 | def set_filter_modal(self, filter_modal): 41 | self.filter_modal = filter_modal 42 | 43 | def set_filter_punc(self, filter_punc): 44 | self.filter_punc = filter_punc 45 | 46 | def set_convert_num_mode(self, convert_num_mode): 47 | self.convert_num_mode = convert_num_mode 48 | 49 | def set_word_info(self, word_info): 50 | self.word_info = word_info 51 | 52 | def set_hotword_id(self, hotword_id): 53 | self.hotword_id = hotword_id 54 | 55 | def set_hotword_list(self, hotword_list): 56 | self.hotword_list = hotword_list 57 | 58 | def set_input_sample_rate(self, input_sample_rate): 59 | self.input_sample_rate = input_sample_rate 60 | 61 | def set_customization_id(self, customization_id): 62 | self.customization_id = customization_id 63 | 64 | def set_voice_format(self, voice_format): 65 | self.voice_format = voice_format 66 | 67 | def set_sentence_max_length(self, sentence_max_length): 68 | self.sentence_max_length = sentence_max_length 69 | 70 | def set_reinforce_hotword(self, reinforce_hotword): 71 | self.reinforce_hotword = reinforce_hotword 72 | 73 | 74 | 75 | class FlashRecognizer: 76 | ''' 77 | reponse: 78 | 字段名 类型 79 | request_id string 80 | status Integer 81 | message String 82 | audio_duration Integer 83 | flash_result Result Array 84 | 85 | Result的结构体格式为: 86 | text String 87 | channel_id Integer 88 | sentence_list Sentence Array 89 | 90 | Sentence的结构体格式为: 91 | text String 92 | start_time Integer 93 | end_time Integer 94 | speaker_id Integer 95 | word_list Word Array 96 | 97 | Word的类型为: 98 | word String 99 | start_time Integer 100 | end_time Integer 101 | stable_flag: Integer 102 | ''' 103 | 104 | def __init__(self, appid, credential): 105 | self.credential = credential 106 | self.appid = appid 107 | 108 | def _format_sign_string(self, param): 109 | signstr = "POSTasr.cloud.tencent.com/asr/flash/v1/" 110 | for t in param: 111 | if 'appid' in t: 112 | signstr += str(t[1]) 113 | break 114 | signstr += "?" 115 | for x in param: 116 | tmp = x 117 | if 'appid' in x: 118 | continue 119 | for t in tmp: 120 | signstr += str(t) 121 | signstr += "=" 122 | signstr = signstr[:-1] 123 | signstr += "&" 124 | signstr = signstr[:-1] 125 | return signstr 126 | 127 | def _build_header(self): 128 | header = dict() 129 | header["Host"] = "asr.cloud.tencent.com" 130 | return header 131 | 132 | def _sign(self, signstr, secret_key): 133 | hmacstr = hmac.new(secret_key.encode('utf-8'), 134 | signstr.encode('utf-8'), hashlib.sha1).digest() 135 | s = base64.b64encode(hmacstr) 136 | s = s.decode('utf-8') 137 | return s 138 | 139 | def _build_req_with_signature(self, secret_key, params, header): 140 | query = sorted(params.items(), key=lambda d: d[0]) 141 | signstr = self._format_sign_string(query) 142 | signature = self._sign(signstr, secret_key) 143 | header["Authorization"] = signature 144 | requrl = "https://" 145 | requrl += signstr[4::] 146 | return requrl 147 | 148 | def _create_query_arr(self, req): 149 | query_arr = dict() 150 | query_arr['appid'] = self.appid 151 | query_arr['secretid'] = self.credential.secret_id 152 | query_arr['timestamp'] = str(int(time.time())) 153 | query_arr['engine_type'] = req.engine_type 154 | query_arr['voice_format'] = req.voice_format 155 | query_arr['speaker_diarization'] = req.speaker_diarization 156 | if req.hotword_id != "": 157 | query_arr['hotword_id'] = req.hotword_id 158 | if req.hotword_list != "": 159 | query_arr['hotword_list'] = req.hotword_list 160 | if req.input_sample_rate != 0: 161 | query_arr['input_sample_rate'] = req.input_sample_rate 162 | query_arr['customization_id'] = req.customization_id 163 | query_arr['filter_dirty'] = req.filter_dirty 164 | query_arr['filter_modal'] = req.filter_modal 165 | query_arr['filter_punc'] = req.filter_punc 166 | query_arr['convert_num_mode'] = req.convert_num_mode 167 | query_arr['word_info'] = req.word_info 168 | query_arr['first_channel_only'] = req.first_channel_only 169 | query_arr['reinforce_hotword'] = req.reinforce_hotword 170 | query_arr['sentence_max_length'] = req.sentence_max_length 171 | return query_arr 172 | 173 | def recognize(self, req, data): 174 | header = self._build_header() 175 | query_arr = self._create_query_arr(req) 176 | req_url = self._build_req_with_signature(self.credential.secret_key, query_arr, header) 177 | r = requests.post(req_url, headers=header, data=data) 178 | return r.text 179 | -------------------------------------------------------------------------------- /examples/tts/ttsexample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 引用 SDK 3 | 4 | import sys 5 | sys.path.append("../..") 6 | 7 | import wave 8 | import time 9 | from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed 10 | 11 | from common import credential 12 | from tts import speech_synthesizer_ws 13 | from common.log import logger 14 | from common.utils import is_python3 15 | 16 | 17 | APPID = 0 18 | SECRET_ID = '' 19 | SECRET_KEY = '' 20 | 21 | VOICETYPE = 101001 # 音色类型 22 | FASTVOICETYPE = "" 23 | CODEC = "pcm" # 音频格式:pcm/mp3 24 | SAMPLE_RATE = 16000 # 音频采样率:8000/16000 25 | ENABLE_SUBTITLE = True 26 | 27 | 28 | class MySpeechSynthesisListener(speech_synthesizer_ws.SpeechSynthesisListener): 29 | 30 | def __init__(self, id, codec, sample_rate): 31 | self.start_time = time.time() 32 | self.id = id 33 | self.codec = codec.lower() 34 | self.sample_rate = sample_rate 35 | 36 | self.audio_file = "" 37 | self.audio_data = bytes() 38 | 39 | def set_audio_file(self, filename): 40 | self.audio_file = filename 41 | 42 | def on_synthesis_start(self, session_id): 43 | ''' 44 | session_id: 请求session id,类型字符串 45 | ''' 46 | super().on_synthesis_start(session_id) 47 | 48 | # TODO 合成开始,添加业务逻辑 49 | if not self.audio_file: 50 | self.audio_file = "speech_synthesis_output_" + str(self.id) + "." + self.codec 51 | self.audio_data = bytes() 52 | 53 | def on_synthesis_end(self): 54 | super().on_synthesis_end() 55 | 56 | # TODO 合成结束,添加业务逻辑 57 | logger.info("write audio file, path={}, size={}".format( 58 | self.audio_file, len(self.audio_data) 59 | )) 60 | if self.codec == "pcm": 61 | wav_fp = wave.open(self.audio_file + ".wav", "wb") 62 | wav_fp.setnchannels(1) 63 | wav_fp.setsampwidth(2) 64 | wav_fp.setframerate(self.sample_rate) 65 | wav_fp.writeframes(self.audio_data) 66 | wav_fp.close() 67 | elif self.codec == "mp3": 68 | fp = open(self.audio_file, "wb") 69 | fp.write(self.audio_data) 70 | fp.close() 71 | else: 72 | logger.info("codec {}: sdk NOT implemented, please save the file yourself".format( 73 | self.codec 74 | )) 75 | 76 | def on_audio_result(self, audio_bytes): 77 | ''' 78 | audio_bytes: 二进制音频,类型 bytes 79 | ''' 80 | super().on_audio_result(audio_bytes) 81 | 82 | # TODO 接收到二进制音频数据,添加实时播放或保存逻辑 83 | self.audio_data += audio_bytes 84 | 85 | def on_text_result(self, response): 86 | ''' 87 | response: 文本结果,类型 dict,如下 88 | 字段名 类型 说明 89 | code int 错误码(无需处理,SpeechSynthesizer中已解析,错误消息路由至 on_synthesis_fail) 90 | message string 错误信息 91 | session_id string 回显客户端传入的 session id 92 | request_id string 请求 id,区分不同合成请求,一次 websocket 通信中,该字段相同 93 | message_id string 消息 id,区分不同 websocket 消息 94 | final bool 合成是否完成(无需处理,SpeechSynthesizer中已解析) 95 | result Result 文本结果结构体 96 | 97 | Result 结构体 98 | 字段名 类型 说明 99 | subtitles array of Subtitle 时间戳数组 100 | 101 | Subtitle 结构体 102 | 字段名 类型 说明 103 | Text string 合成文本 104 | BeginTime int 开始时间戳 105 | EndTime int 结束时间戳 106 | BeginIndex int 开始索引 107 | EndIndex int 结束索引 108 | Phoneme string 音素 109 | ''' 110 | super().on_text_result(response) 111 | 112 | # TODO 接收到文本数据,添加业务逻辑 113 | result = response["result"] 114 | subtitles = [] 115 | if "subtitles" in result and len(result["subtitles"]) > 0: 116 | subtitles = result["subtitles"] 117 | 118 | def on_synthesis_fail(self, response): 119 | ''' 120 | response: 文本结果,类型 dict,如下 121 | 字段名 类型 122 | code int 错误码 123 | message string 错误信息 124 | ''' 125 | super().on_synthesis_fail(response) 126 | 127 | # TODO 合成失败,添加错误处理逻辑 128 | err_code = response["code"] 129 | err_msg = response["message"] 130 | 131 | 132 | def process(id, text): 133 | logger.info("process start: idx={} text={}".format(id, text)) 134 | listener = MySpeechSynthesisListener(id, CODEC, SAMPLE_RATE) 135 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 136 | synthesizer = speech_synthesizer_ws.SpeechSynthesizer( 137 | APPID, credential_var, listener) 138 | synthesizer.set_text(text) 139 | synthesizer.set_voice_type(VOICETYPE) 140 | synthesizer.set_codec(CODEC) 141 | synthesizer.set_sample_rate(SAMPLE_RATE) 142 | synthesizer.set_enable_subtitle(ENABLE_SUBTITLE) 143 | synthesizer.set_fast_voice_type(FASTVOICETYPE) 144 | 145 | synthesizer.start() 146 | # wait for processing complete 147 | synthesizer.wait() 148 | 149 | logger.info("process done: idx={} text={}".format(id, text)) 150 | return id 151 | 152 | def read_tts_text(): 153 | lines_list = [] 154 | with open('tts_text.txt', 'r', encoding='utf-8') as file: 155 | for line in file: 156 | lines_list.append(line.strip()) 157 | # print("total read {} lines".format(len(lines_list))) 158 | return lines_list 159 | 160 | if __name__ == "__main__": 161 | if not is_python3(): 162 | print("only support python3") 163 | sys.exit(0) 164 | 165 | # 读取示例文本 166 | lines = read_tts_text() 167 | 168 | #### 示例一:单线程串行调用 #### 169 | for idx, line in enumerate(lines): 170 | result = process(idx, line) 171 | print(f"\nTask {result} completed\n") 172 | 173 | #### 示例二:多线程调用 #### 174 | # thread_concurrency_num = 3 # 最大线程数 175 | # with ThreadPoolExecutor(max_workers=thread_concurrency_num) as executor: 176 | # futures = [executor.submit(process, idx, line) for idx, line in enumerate(lines)] 177 | # for future in as_completed(futures): 178 | # result = future.result() 179 | # print(f"\nTask {result} completed\n") 180 | 181 | #### 示例三:多进程调用(适用于高并发场景) #### 182 | # process_concurrency_num = 3 # 最大进程数 183 | # with ProcessPoolExecutor(max_workers=process_concurrency_num) as executor: 184 | # futures = [executor.submit(process, idx, line) for idx, line in enumerate(lines)] 185 | # for future in as_completed(futures): 186 | # result = future.result() 187 | # print(f"\nTask {result} completed\n") 188 | -------------------------------------------------------------------------------- /examples/tts/ttsexample_flowing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 引用 SDK 3 | 4 | import sys 5 | sys.path.append("../..") 6 | 7 | import wave 8 | import time 9 | import threading 10 | from common import credential 11 | from tts import flowing_speech_synthesizer 12 | from common.log import logger 13 | from common.utils import is_python3 14 | 15 | APPID = 0 16 | SECRET_ID = '' 17 | SECRET_KEY = '' 18 | 19 | VOICETYPE = 101001 # 音色类型 20 | CODEC = "mp3" # 音频格式:pcm/mp3 21 | SAMPLE_RATE = 16000 # 音频采样率:8000/16000 22 | ENABLE_SUBTITLE = False 23 | 24 | 25 | class MySpeechSynthesisListener(flowing_speech_synthesizer.FlowingSpeechSynthesisListener): 26 | 27 | def __init__(self, id, codec, sample_rate): 28 | self.start_time = time.time() 29 | self.id = id 30 | self.codec = codec.lower() 31 | self.sample_rate = sample_rate 32 | 33 | self.audio_file = "" 34 | self.audio_data = bytes() 35 | 36 | def set_audio_file(self, filename): 37 | self.audio_file = filename 38 | 39 | def on_synthesis_start(self, session_id): 40 | ''' 41 | session_id: 请求session id,类型字符串 42 | ''' 43 | if is_python3(): 44 | super().on_synthesis_start(session_id) 45 | else: 46 | super(MySpeechSynthesisListener, self).on_synthesis_start(session_id) 47 | 48 | # TODO 合成开始,添加业务逻辑 49 | if not self.audio_file: 50 | self.audio_file = "speech_synthesis_output." + self.codec 51 | self.audio_data = bytes() 52 | 53 | def on_synthesis_end(self): 54 | if is_python3(): 55 | super().on_synthesis_end() 56 | else: 57 | super(MySpeechSynthesisListener, self).on_synthesis_end() 58 | 59 | # TODO 合成结束,添加业务逻辑 60 | logger.info("write audio file, path={}, size={}".format( 61 | self.audio_file, len(self.audio_data) 62 | )) 63 | if self.codec == "pcm": 64 | wav_fp = wave.open(self.audio_file + ".wav", "wb") 65 | wav_fp.setnchannels(1) 66 | wav_fp.setsampwidth(2) 67 | wav_fp.setframerate(self.sample_rate) 68 | wav_fp.writeframes(self.audio_data) 69 | wav_fp.close() 70 | elif self.codec == "mp3": 71 | fp = open(self.audio_file, "wb") 72 | fp.write(self.audio_data) 73 | fp.close() 74 | else: 75 | logger.info("codec {}: sdk NOT implemented, please save the file yourself".format( 76 | self.codec 77 | )) 78 | 79 | def on_audio_result(self, audio_bytes): 80 | ''' 81 | audio_bytes: 二进制音频,类型 bytes 82 | ''' 83 | if is_python3(): 84 | super().on_audio_result(audio_bytes) 85 | else: 86 | super(MySpeechSynthesisListener, self).on_audio_result(audio_bytes) 87 | 88 | # TODO 接收到二进制音频数据,添加实时播放或保存逻辑 89 | self.audio_data += audio_bytes 90 | 91 | def on_text_result(self, response): 92 | ''' 93 | response: 文本结果,类型 dict,如下 94 | 字段名 类型 说明 95 | code int 错误码(无需处理,FlowingSpeechSynthesizer中已解析,错误消息路由至 on_synthesis_fail) 96 | message string 错误信息 97 | session_id string 回显客户端传入的 session id 98 | request_id string 请求 id,区分不同合成请求,一次 websocket 通信中,该字段相同 99 | message_id string 消息 id,区分不同 websocket 消息 100 | final bool 合成是否完成(无需处理,FlowingSpeechSynthesizer中已解析) 101 | result Result 文本结果结构体 102 | 103 | Result 结构体 104 | 字段名 类型 说明 105 | subtitles array of Subtitle 时间戳数组 106 | 107 | Subtitle 结构体 108 | 字段名 类型 说明 109 | Text string 合成文本 110 | BeginTime int 开始时间戳 111 | EndTime int 结束时间戳 112 | BeginIndex int 开始索引 113 | EndIndex int 结束索引 114 | Phoneme string 音素 115 | ''' 116 | if is_python3(): 117 | super().on_text_result(response) 118 | else: 119 | super(MySpeechSynthesisListener, self).on_text_result(response) 120 | 121 | # TODO 接收到文本数据,添加业务逻辑 122 | result = response["result"] 123 | subtitles = [] 124 | if "subtitles" in result and len(result["subtitles"]) > 0: 125 | subtitles = result["subtitles"] 126 | 127 | def on_synthesis_fail(self, response): 128 | ''' 129 | response: 文本结果,类型 dict,如下 130 | 字段名 类型 131 | code int 错误码 132 | message string 错误信息 133 | ''' 134 | if is_python3(): 135 | super().on_synthesis_fail(response) 136 | else: 137 | super(MySpeechSynthesisListener, self).on_synthesis_fail(response) 138 | 139 | # TODO 合成失败,添加错误处理逻辑 140 | err_code = response["code"] 141 | err_msg = response["message"] 142 | 143 | 144 | def process(id): 145 | listener = MySpeechSynthesisListener(id, CODEC, SAMPLE_RATE) 146 | credential_var = credential.Credential(SECRET_ID, SECRET_KEY) 147 | synthesizer = flowing_speech_synthesizer.FlowingSpeechSynthesizer( 148 | APPID, credential_var, listener) 149 | synthesizer.set_voice_type(VOICETYPE) 150 | synthesizer.set_codec(CODEC) 151 | synthesizer.set_sample_rate(SAMPLE_RATE) 152 | synthesizer.set_enable_subtitle(ENABLE_SUBTITLE) 153 | 154 | synthesizer.start() 155 | ready = synthesizer.wait_ready(5000) 156 | if not ready: 157 | logger.error("wait ready timeout") 158 | return 159 | 160 | texts = [ 161 | "五位壮士一面向顶峰攀登,一面依托大树和", 162 | "岩石向敌人射击。山路上又留下了许多具敌", 163 | "人的尸体。到了狼牙山峰顶,五壮士居高临", 164 | "下,继续向紧跟在身后的敌人射击。不少敌人", 165 | "坠落山涧,粉身碎骨。班长马宝玉负伤了,子", 166 | "弹都打完了,只有胡福才手里还剩下一颗手榴", 167 | "弹,他刚要拧开盖子,马宝玉抢前一步,夺过", 168 | "手榴弹插在腰间,他猛地举起一块磨盘大的石", 169 | "头,大声喊道:“同志们!用石头砸!”顿时,", 170 | "石头像雹子一样,带着五位壮士的决心,带着", 171 | "中国人民的仇恨,向敌人头上砸去。山坡上传", 172 | "来一阵叽里呱啦的叫声,敌人纷纷滚落深谷。", 173 | ] 174 | 175 | while True: 176 | for text in texts: 177 | synthesizer.process(text) 178 | time.sleep(5) # 模拟文本流式生成 179 | break 180 | synthesizer.complete() # 发送合成完毕指令 181 | 182 | synthesizer.wait() # 等待服务侧合成完成 183 | 184 | logger.info("process done") 185 | 186 | 187 | def process_multithread(number): 188 | thread_list = [] 189 | for i in range(0, number): 190 | thread = threading.Thread(target=process, args=(i,)) 191 | thread_list.append(thread) 192 | thread.start() 193 | print(i) 194 | 195 | for thread in thread_list: 196 | thread.join() 197 | 198 | 199 | if __name__ == "__main__": 200 | process_multithread(1) 201 | -------------------------------------------------------------------------------- /tts/speech_synthesizer_ws.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import threading 9 | from websocket import ABNF, WebSocketApp 10 | import uuid 11 | import urllib 12 | from common.log import logger 13 | 14 | 15 | _PROTOCOL = "wss://" 16 | _HOST = "tts.cloud.tencent.com" 17 | _PATH = "/stream_ws" 18 | _ACTION = "TextToStreamAudioWS" 19 | 20 | 21 | class SpeechSynthesisListener(object): 22 | ''' 23 | ''' 24 | def on_synthesis_start(self, session_id): 25 | logger.info("on_synthesis_start: session_id={}".format(session_id)) 26 | 27 | def on_synthesis_end(self): 28 | logger.info("on_synthesis_end: -") 29 | 30 | def on_audio_result(self, audio_bytes): 31 | logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes))) 32 | 33 | def on_text_result(self, response): 34 | session_id = response["session_id"] 35 | request_id = response["request_id"] 36 | message_id = response["message_id"] 37 | result = response['result'] 38 | subtitles = [] 39 | if "subtitles" in result and len(result["subtitles"]) > 0: 40 | subtitles = result["subtitles"] 41 | logger.info("on_text_result: session_id={} request_id={} message_id={}\nsubtitles={}".format( 42 | session_id, request_id, message_id, subtitles)) 43 | 44 | def on_synthesis_fail(self, response): 45 | logger.error("on_synthesis_fail: code={} msg={}".format( 46 | response['code'], response['message'] 47 | )) 48 | 49 | 50 | NOTOPEN = 0 51 | STARTED = 1 52 | OPENED = 2 53 | FINAL = 3 54 | ERROR = 4 55 | CLOSED = 5 56 | 57 | 58 | class SpeechSynthesizer: 59 | 60 | def __init__(self, appid, credential, listener): 61 | self.appid = appid 62 | self.credential = credential 63 | self.status = NOTOPEN 64 | self.ws = None 65 | self.wst = None 66 | self.listener = listener 67 | 68 | self.text = "欢迎使用腾讯云实时语音合成" 69 | self.voice_type = 0 70 | self.codec = "pcm" 71 | self.sample_rate = 16000 72 | self.volume = 0 73 | self.speed = 0 74 | self.session_id = "" 75 | self.enable_subtitle = True 76 | self.fast_voice_type = "" 77 | 78 | def set_voice_type(self, voice_type): 79 | self.voice_type = voice_type 80 | 81 | def set_codec(self, codec): 82 | self.codec = codec 83 | 84 | def set_sample_rate(self, sample_rate): 85 | self.sample_rate = sample_rate 86 | 87 | def set_speed(self, speed): 88 | self.speed = speed 89 | 90 | def set_volume(self, volume): 91 | self.volume = volume 92 | 93 | def set_text(self, text): 94 | self.text = text 95 | 96 | def set_enable_subtitle(self, enable_subtitle): 97 | self.enable_subtitle = enable_subtitle 98 | 99 | def set_fast_voice_type(self, fast_voice_type): 100 | self.fast_voice_type = fast_voice_type 101 | 102 | def __gen_signature(self, params): 103 | sort_dict = sorted(params.keys()) 104 | sign_str = "GET" + _HOST + _PATH + "?" 105 | for key in sort_dict: 106 | sign_str = sign_str + key + "=" + str(params[key]) + '&' 107 | sign_str = sign_str[:-1] 108 | secret_key = self.credential.secret_key.encode('utf-8') 109 | sign_str = sign_str.encode('utf-8') 110 | hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest() 111 | s = base64.b64encode(hmacstr) 112 | s = s.decode('utf-8') 113 | return s 114 | 115 | def __gen_params(self, session_id): 116 | self.session_id = session_id 117 | 118 | params = dict() 119 | params['Action'] = _ACTION 120 | params['AppId'] = int(self.appid) 121 | params['SecretId'] = self.credential.secret_id 122 | params['ModelType'] = 1 123 | params['VoiceType'] = self.voice_type 124 | params['Codec'] = self.codec 125 | params['SampleRate'] = self.sample_rate 126 | params['Speed'] = self.speed 127 | params['Volume'] = self.volume 128 | params['SessionId'] = self.session_id 129 | params['Text'] = self.text 130 | params['EnableSubtitle'] = self.enable_subtitle 131 | if len(self.fast_voice_type) > 0: 132 | params['FastVoiceType'] = self.fast_voice_type 133 | 134 | timestamp = int(time.time()) 135 | params['Timestamp'] = timestamp 136 | params['Expired'] = timestamp + 24 * 60 * 60 137 | return params 138 | 139 | def __create_query_string(self, param): 140 | param['Text'] = urllib.parse.quote(param['Text']) 141 | 142 | param = sorted(param.items(), key=lambda d: d[0]) 143 | 144 | url = _PROTOCOL + _HOST + _PATH 145 | 146 | signstr = url + "?" 147 | for x in param: 148 | tmp = x 149 | for t in tmp: 150 | signstr += str(t) 151 | signstr += "=" 152 | signstr = signstr[:-1] 153 | signstr += "&" 154 | signstr = signstr[:-1] 155 | return signstr 156 | 157 | def start(self): 158 | logger.info("synthesizer start: begin") 159 | 160 | def _close_conn(reason): 161 | ta = time.time() 162 | self.ws.close() 163 | tb = time.time() 164 | logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000))) 165 | 166 | def _on_data(ws, data, opcode, flag): 167 | # NOTE print all message that client received 168 | # logger.info("data={} opcode={} flag={}".format(data, opcode, flag)) 169 | if opcode == ABNF.OPCODE_BINARY: 170 | self.listener.on_audio_result(data) # 171 | pass 172 | elif opcode == ABNF.OPCODE_TEXT: 173 | resp = json.loads(data) # WSResponseMessage 174 | if resp['code'] != 0: 175 | logger.error("server synthesis fail request_id={} code={} msg={}".format( 176 | resp['request_id'], resp['code'], resp['message'] 177 | )) 178 | self.listener.on_synthesis_fail(resp) 179 | return 180 | if "final" in resp and resp['final'] == 1: 181 | logger.info("recv FINAL frame") 182 | self.status = FINAL 183 | _close_conn("after recv final") 184 | self.listener.on_synthesis_end() 185 | return 186 | if "result" in resp: 187 | if "subtitles" in resp["result"] and resp["result"]["subtitles"] is not None: 188 | self.listener.on_text_result(resp) 189 | return 190 | else: 191 | logger.error("invalid on_data code, opcode=".format(opcode)) 192 | 193 | def _on_error(ws, error): 194 | if self.status == FINAL or self.status == CLOSED: 195 | return 196 | self.status = ERROR 197 | logger.error("error={}, session_id={}".format(error, self.session_id)) 198 | _close_conn("after recv error") 199 | 200 | def _on_close(ws, close_status_code, close_msg): 201 | logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg)) 202 | self.status = CLOSED 203 | 204 | def _on_open(ws): 205 | logger.info("conn opened") 206 | self.status = OPENED 207 | 208 | session_id = str(uuid.uuid1()) 209 | params = self.__gen_params(session_id) 210 | signature = self.__gen_signature(params) 211 | requrl = self.__create_query_string(params) 212 | 213 | autho = urllib.parse.quote(signature) 214 | requrl += "&Signature=%s" % autho 215 | 216 | self.ws = WebSocketApp(requrl, None, 217 | on_error=_on_error, on_close=_on_close, 218 | on_data=_on_data) 219 | self.ws.on_open = _on_open 220 | 221 | self.wst = threading.Thread(target=self.ws.run_forever) 222 | self.wst.daemon = True 223 | self.wst.start() 224 | self.status = STARTED 225 | self.listener.on_synthesis_start(session_id) 226 | 227 | logger.info("synthesizer start: end") 228 | 229 | def wait(self): 230 | logger.info("synthesizer wait: begin") 231 | if self.ws: 232 | if self.wst and self.wst.is_alive(): 233 | self.wst.join() 234 | logger.info("synthesizer wait: end") 235 | -------------------------------------------------------------------------------- /vc/speech_convertor_ws.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import threading 9 | from websocket import ABNF, WebSocketApp 10 | import uuid 11 | import urllib 12 | from common.log import logger 13 | 14 | 15 | _PROTOCOL = "wss://" 16 | _HOST = "tts.cloud.tencent.com" 17 | _PATH = "/vc_stream" 18 | 19 | 20 | class SpeechConvertListener(object): 21 | ''' 22 | ''' 23 | def on_convert_start(self, voice_id): 24 | logger.info("on_convert_start: voice_id={}".format(voice_id)) 25 | 26 | def on_convert_end(self): 27 | logger.info("on_convert_end: -") 28 | 29 | def on_audio_result(self, audio_bytes): 30 | logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes))) 31 | 32 | def on_convert_fail(self, response): 33 | logger.error("on_convert_fail: code={} msg={}".format( 34 | response['Code'], response['Message'] 35 | )) 36 | 37 | 38 | NOTOPEN = 0 39 | STARTED = 1 40 | OPENED = 2 41 | FINAL = 3 42 | ERROR = 4 43 | CLOSED = 5 44 | 45 | class SpeechConvertor: 46 | 47 | def __init__(self, appid, credential, listener): 48 | self.appid = appid 49 | self.credential = credential 50 | self.status = NOTOPEN 51 | self.ws = None 52 | self.wst = None 53 | self.listener = listener 54 | 55 | self.voice_id = "" 56 | self.voice_type = 301005 57 | self.codec = "pcm" 58 | self.sample_rate = 16000 59 | self.volume = 0 60 | self.speed = 0 61 | 62 | def set_voice_type(self, voice_type): 63 | self.voice_type = voice_type 64 | 65 | def set_codec(self, codec): 66 | self.codec = codec 67 | 68 | def set_sample_rate(self, sample_rate): 69 | self.sample_rate = sample_rate 70 | 71 | def set_volume(self, volume): 72 | self.volume = volume 73 | 74 | def __gen_signature(self, params): 75 | sort_dict = sorted(params.keys()) 76 | sign_str = _HOST + _PATH + '/' + str(self.appid) + "?" 77 | for key in sort_dict: 78 | sign_str = sign_str + key + "=" + str(params[key]) + '&' 79 | sign_str = sign_str[:-1] 80 | logger.info("sign_url={}".format(sign_str)) 81 | secret_key = self.credential.secret_key.encode('utf-8') 82 | sign_str = sign_str.encode('utf-8') 83 | hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest() 84 | s = base64.b64encode(hmacstr) 85 | s = s.decode('utf-8') 86 | return s 87 | 88 | def __gen_params(self, voice_id): 89 | self.voice_id = voice_id 90 | 91 | params = dict() 92 | params['SecretId'] = self.credential.secret_id 93 | params['VoiceType'] = self.voice_type 94 | params['Codec'] = self.codec 95 | params['SampleRate'] = self.sample_rate 96 | params['Volume'] = self.volume 97 | params['VoiceId'] = self.voice_id 98 | params['End'] = 0 99 | 100 | timestamp = int(time.time()) 101 | params['Timestamp'] = timestamp 102 | params['Expired'] = timestamp + 24 * 60 * 60 103 | return params 104 | 105 | def __create_query_string(self, param): 106 | param = sorted(param.items(), key=lambda d: d[0]) 107 | 108 | url = _PROTOCOL + _HOST + _PATH + '/' + str(self.appid) 109 | 110 | signstr = url + "?" 111 | for x in param: 112 | tmp = x 113 | for t in tmp: 114 | signstr += str(t) 115 | signstr += "=" 116 | signstr = signstr[:-1] 117 | signstr += "&" 118 | signstr = signstr[:-1] 119 | return signstr 120 | 121 | def start(self): 122 | logger.info("convertor start: begin") 123 | 124 | def _close_conn(reason): 125 | ta = time.time() 126 | self.ws.close() 127 | tb = time.time() 128 | logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000))) 129 | 130 | def _on_data(ws, data, opcode, flag): 131 | # NOTE print all message that client received 132 | #logger.info("data={} opcode={} flag={}".format(data, opcode, flag)) 133 | 134 | if opcode == ABNF.OPCODE_BINARY: 135 | length = int.from_bytes(data[:4], byteorder='big', signed=False) 136 | json_str = bytes.decode(data[4: length + 4]) 137 | audio_data = data[4 + length:] 138 | logger.info("recv raw json: {}".format(json_str)) 139 | 140 | resp = json.loads(json_str) 141 | if resp['Code'] != 0: 142 | logger.error("server convert fail voice_id={} code={} msg_id={} msg={}".format( 143 | resp['VoiceId'], resp['Code'], resp['MessageId'], resp['Message'] 144 | )) 145 | self.listener.on_convert_fail(resp) 146 | return 147 | 148 | # normal recv converted data 149 | self.listener.on_audio_result(audio_data) # 150 | if "Final" in resp and resp['Final'] == 1: 151 | logger.info("recv FINAL frame") 152 | self.status = FINAL 153 | _close_conn("after recv final") 154 | self.listener.on_convert_end() 155 | elif opcode == ABNF.OPCODE_TEXT: 156 | pass 157 | else: 158 | logger.error("invalid on_data code, opcode=".format(opcode)) 159 | 160 | def _on_error(ws, error): 161 | if self.status == FINAL or self.status == CLOSED: 162 | return 163 | self.status = ERROR 164 | logger.error("error={}, voice_id={}".format(error, self.voice_id)) 165 | _close_conn("after recv error") 166 | 167 | def _on_close(ws, close_status_code, close_msg): 168 | logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg)) 169 | self.status = CLOSED 170 | 171 | def _on_open(ws): 172 | logger.info("conn opened") 173 | self.status = OPENED 174 | 175 | voice_id = str(uuid.uuid1()) 176 | params = self.__gen_params(voice_id) 177 | signature = self.__gen_signature(params) 178 | requrl = self.__create_query_string(params) 179 | 180 | autho = urllib.parse.quote(signature) 181 | requrl += "&Signature=%s" % autho 182 | logger.info("req_url={}".format(requrl)) 183 | 184 | self.ws = WebSocketApp(requrl, None, 185 | on_error=_on_error, on_close=_on_close, 186 | on_data=_on_data) 187 | self.ws.on_open = _on_open 188 | 189 | self.wst = threading.Thread(target=self.ws.run_forever) 190 | self.wst.daemon = True 191 | self.wst.start() 192 | self.status = STARTED 193 | self.listener.on_convert_start(voice_id) 194 | 195 | logger.info("convertor start: end") 196 | 197 | def wait(self): 198 | logger.info("convertor wait: begin") 199 | if self.ws: 200 | if self.wst and self.wst.is_alive(): 201 | self.wst.join() 202 | logger.info("convertor wait: end") 203 | 204 | def send(self, audio_data, is_end=False): 205 | logger.info("convertor send: begin") 206 | if not self.ws: 207 | logger.error("convertor send: ws is None") 208 | return False 209 | if self.status != OPENED: 210 | logger.error("ws not opened, status={}".format(self.status)) 211 | return False 212 | 213 | # message format: HEAD + JSON + AUDIO 214 | # refer to https://cloud.tencent.com/document/product/1664/85973#edac94f7-2e9d-4e59-aac3-fd1bea693be0 215 | json_body = json.dumps({ 216 | "End": 1 if is_end else 0, 217 | }) 218 | json_body_bytes = bytes(json_body, encoding='utf-8') 219 | json_body_len = len(json_body_bytes) 220 | 221 | head = json_body_len.to_bytes(4, byteorder='big') 222 | message = head + json_body_bytes + audio_data 223 | logger.info("send json_body_len={} json_body={} audio_len={}".format( 224 | json_body_len, json_body, len(audio_data))) 225 | 226 | self.ws.send(message, ABNF.OPCODE_BINARY) 227 | logger.info("convertor send: end") 228 | return True 229 | 230 | def wait_to_send(self): 231 | while True: 232 | if self.status < OPENED: 233 | time.sleep(0.01) 234 | else: 235 | break 236 | logger.info("wait_to_send: status={}".format(self.status)) 237 | return self.status == OPENED 238 | -------------------------------------------------------------------------------- /soe/speaking_assessment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import threading 9 | import urllib 10 | 11 | import websocket 12 | import uuid 13 | from urllib.parse import quote 14 | from common.log import logger 15 | 16 | 17 | def is_python3(): 18 | if sys.version > '3': 19 | return True 20 | return False 21 | 22 | 23 | # 实时识别语音使用 24 | class SpeakingAssessmentListener(): 25 | ''' 26 | reponse: 27 | on_recognition_start的返回只有voice_id字段。 28 | on_fail 只有voice_id、code、message字段。 29 | on_recognition_complete没有result字段。 30 | 其余消息包含所有字段。 31 | 字段名 类型 32 | code Integer 33 | message String 34 | voice_id String 35 | message_id String 36 | result 37 | final Integer 38 | 39 | # Result的结构体格式为: 40 | # slice_type Integer 41 | # index Integer 42 | # start_time Integer 43 | # end_time Integer 44 | # voice_text_str String 45 | # word_size Integer 46 | # word_list Word Array 47 | # 48 | # Word的类型为: 49 | # word String 50 | # start_time Integer 51 | # end_time Integer 52 | # stable_flag:Integer 53 | ''' 54 | 55 | def on_recognition_start(self, response): 56 | pass 57 | 58 | def on_intermediate_result(self, response): 59 | pass 60 | 61 | def on_recognition_complete(self, response): 62 | pass 63 | 64 | def on_fail(self, response): 65 | pass 66 | 67 | 68 | NOTOPEN = 0 69 | STARTED = 1 70 | OPENED = 2 71 | FINAL = 3 72 | ERROR = 4 73 | CLOSED = 5 74 | 75 | 76 | def quote_autho(autho): 77 | if sys.version_info >= (3, 0): 78 | import urllib.parse as urlparse 79 | return urlparse.quote(autho) 80 | else: 81 | return urllib.quote(autho) 82 | 83 | 84 | # 实时识别使用 85 | class SpeakingAssessment: 86 | 87 | def __init__(self, appid, credential, engine_model_type, listener): 88 | self.result = "" 89 | self.credential = credential 90 | self.appid = appid 91 | self.server_engine_type = engine_model_type 92 | self.status = NOTOPEN 93 | self.ws = None 94 | self.wst = None 95 | self.voice_id = "" 96 | self.new_start = 0 97 | self.listener = listener 98 | self.text_mode = 0 99 | self.ref_text = "" 100 | self.keyword = "" 101 | self.eval_mode = 0 102 | self.score_coeff = 1.0 103 | self.sentence_info_enabled = 0 104 | self.voice_format = 0 105 | self.nonce = "" 106 | self.rec_mode = 0 107 | 108 | def set_text_mode(self, text_mode): 109 | self.text_mode = text_mode 110 | 111 | def set_rec_mode(self, rec_mode): 112 | self.rec_mode = rec_mode 113 | 114 | def set_ref_text(self, ref_text): 115 | self.ref_text = ref_text 116 | 117 | def set_keyword(self, keyword): 118 | self.keyword = keyword 119 | 120 | def set_eval_mode(self, eval_mode): 121 | self.eval_mode = eval_mode 122 | 123 | def set_sentence_info_enabled(self, sentence_info_enabled): 124 | self.sentence_info_enabled = sentence_info_enabled 125 | 126 | def set_voice_format(self, voice_format): 127 | self.voice_format = voice_format 128 | 129 | def set_nonce(self, nonce): 130 | self.nonce = nonce 131 | 132 | def format_sign_string(self, param): 133 | signstr = "soe.cloud.tencent.com/soe/api/" 134 | for t in param: 135 | if 'appid' in t: 136 | signstr += str(t[1]) 137 | break 138 | signstr += "?" 139 | for x in param: 140 | tmp = x 141 | if 'appid' in x: 142 | continue 143 | for t in tmp: 144 | signstr += str(t) 145 | signstr += "=" 146 | signstr = signstr[:-1] 147 | signstr += "&" 148 | signstr = signstr[:-1] 149 | return signstr 150 | 151 | def create_query_string(self, param): 152 | signstr = "" 153 | for key, value in param.items(): 154 | if key == 'appid': 155 | signstr += str(value) 156 | break 157 | signstr += "?" 158 | for key, value in param.items(): 159 | if key == 'appid': 160 | continue 161 | value = quote_autho(str(value)) 162 | signstr += str(key) + "=" + str(value) + "&" 163 | signstr = signstr[:-1] 164 | return "wss://soe.cloud.tencent.com/soe/api/" + signstr 165 | 166 | def sign(self, signstr, secret_key): 167 | hmacstr = hmac.new(secret_key.encode('utf-8'), 168 | signstr.encode('utf-8'), hashlib.sha1).digest() 169 | s = base64.b64encode(hmacstr) 170 | s = s.decode('utf-8') 171 | return s 172 | 173 | def create_query_arr(self): 174 | query_arr = dict() 175 | 176 | query_arr['appid'] = self.appid 177 | query_arr['server_engine_type'] = self.server_engine_type 178 | query_arr['text_mode'] = self.text_mode 179 | query_arr['rec_mode'] = self.rec_mode 180 | query_arr['ref_text'] = self.ref_text 181 | query_arr['keyword'] = self.keyword 182 | query_arr['eval_mode'] = self.eval_mode 183 | query_arr['score_coeff'] = self.score_coeff 184 | query_arr['sentence_info_enabled'] = self.sentence_info_enabled 185 | query_arr['secretid'] = self.credential.secret_id 186 | if self.credential.token != "": 187 | query_arr['token'] = self.credential.token 188 | query_arr['voice_format'] = self.voice_format 189 | query_arr['voice_id'] = self.voice_id 190 | query_arr['timestamp'] = str(int(time.time())) 191 | if self.nonce != "": 192 | query_arr['nonce'] = self.nonce 193 | else: 194 | query_arr['nonce'] = query_arr['timestamp'] 195 | query_arr['expired'] = int(time.time()) + 24 * 60 * 60 196 | return query_arr 197 | 198 | def stop(self): 199 | if self.status == OPENED: 200 | msg = {'type': "end"} 201 | text_str = json.dumps(msg) 202 | self.ws.sock.send(text_str) 203 | if self.ws: 204 | if self.wst and self.wst.is_alive(): 205 | self.wst.join() 206 | self.ws.close() 207 | 208 | def write(self, data): 209 | while self.status == STARTED: 210 | time.sleep(0.1) 211 | if self.status == OPENED: 212 | self.ws.sock.send_binary(data) 213 | 214 | def start(self): 215 | def on_message(ws, message): 216 | # print(message) 217 | response = json.loads(message) 218 | response['voice_id'] = self.voice_id 219 | if response['code'] != 0: 220 | logger.error("%s server recognition fail %s" % 221 | (response['voice_id'], response['message'])) 222 | self.listener.on_fail(response) 223 | return 224 | if "final" in response and response["final"] == 1: 225 | self.status = FINAL 226 | self.result = message 227 | self.listener.on_recognition_complete(response) 228 | logger.info("%s recognition complete" % response['voice_id']) 229 | self.ws.close() 230 | return 231 | else: 232 | if response["result"] is not None: 233 | self.listener.on_intermediate_result(response) 234 | logger.info("%s recognition doing" % response['voice_id']) 235 | return 236 | 237 | def on_error(ws, error): 238 | if self.status == FINAL: 239 | return 240 | logger.error("websocket error %s voice id %s" % 241 | (format(error), self.voice_id)) 242 | self.status = ERROR 243 | 244 | def on_close(ws): 245 | self.status = CLOSED 246 | logger.info("websocket closed voice id %s" % 247 | self.voice_id) 248 | 249 | def on_open(ws): 250 | self.status = OPENED 251 | 252 | query_arr = self.create_query_arr() 253 | if self.voice_id == "": 254 | query_arr['voice_id'] = str(uuid.uuid1()) 255 | self.voice_id = query_arr['voice_id'] 256 | query = sorted(query_arr.items(), key=lambda d: d[0]) 257 | signstr = self.format_sign_string(query) 258 | autho = self.sign(signstr, self.credential.secret_key) 259 | requrl = self.create_query_string(query_arr) 260 | print(requrl) 261 | if is_python3(): 262 | autho = urllib.parse.quote(autho) 263 | else: 264 | autho = urllib.quote(autho) 265 | requrl += "&signature=%s" % autho 266 | print(requrl) 267 | self.ws = websocket.WebSocketApp(requrl, None, 268 | on_error=on_error, on_close=on_close, on_message=on_message) 269 | self.ws.on_open = on_open 270 | self.wst = threading.Thread(target=self.ws.run_forever) 271 | self.wst.daemon = True 272 | self.wst.start() 273 | self.status = STARTED 274 | response = {'voice_id': self.voice_id} 275 | self.listener.on_recognition_start(response) 276 | logger.info("%s recognition start" % response['voice_id']) 277 | -------------------------------------------------------------------------------- /asr/speech_recognizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import threading 9 | import websocket 10 | import uuid 11 | import urllib 12 | from common.log import logger 13 | 14 | 15 | def is_python3(): 16 | if sys.version > '3': 17 | return True 18 | return False 19 | 20 | 21 | #实时识别语音使用 22 | class SpeechRecognitionListener(): 23 | ''' 24 | reponse: 25 | on_recognition_start的返回只有voice_id字段。 26 | on_fail 只有voice_id、code、message字段。 27 | on_recognition_complete没有result字段。 28 | 其余消息包含所有字段。 29 | 字段名 类型 30 | code Integer 31 | message String 32 | voice_id String 33 | message_id String 34 | result Result 35 | final Integer 36 | 37 | Result的结构体格式为: 38 | slice_type Integer 39 | index Integer 40 | start_time Integer 41 | end_time Integer 42 | voice_text_str String 43 | word_size Integer 44 | word_list Word Array 45 | 46 | Word的类型为: 47 | word String 48 | start_time Integer 49 | end_time Integer 50 | stable_flag:Integer 51 | ''' 52 | 53 | def on_recognition_start(self, response): 54 | pass 55 | 56 | def on_sentence_begin(self, response): 57 | pass 58 | 59 | def on_recognition_result_change(self, response): 60 | pass 61 | 62 | def on_sentence_end(self, response): 63 | pass 64 | 65 | def on_recognition_complete(self, response): 66 | pass 67 | 68 | def on_fail(self, response): 69 | pass 70 | 71 | 72 | NOTOPEN = 0 73 | STARTED = 1 74 | OPENED = 2 75 | FINAL = 3 76 | ERROR = 4 77 | CLOSED = 5 78 | 79 | #实时识别语音使用 80 | class SpeechRecognizer: 81 | 82 | def __init__(self, appid, credential, engine_model_type, listener): 83 | self.result = "" 84 | self.credential = credential 85 | self.appid = appid 86 | self.engine_model_type = engine_model_type 87 | self.status = NOTOPEN 88 | self.ws = None 89 | self.wst = None 90 | self.voice_id = "" 91 | self.new_start = 0 92 | self.listener = listener 93 | self.filter_dirty = 0 94 | self.filter_modal = 0 95 | self.filter_punc = 0 96 | self.convert_num_mode = 0 97 | self.word_info = 0 98 | self.need_vad = 0 99 | self.vad_silence_time = 0 100 | self.hotword_id = "" 101 | self.hotword_list = "" 102 | self.reinforce_hotword = 0 103 | self.noise_threshold = 0 104 | self.voice_format = 4 105 | self.nonce = "" 106 | self.replace_text_id = "" 107 | self.language_judgment = 0 108 | #适用于中英粤的语种识别参考参数 109 | def set_language_judgment(self, language_judgment): 110 | self.language_judgment = language_judgment 111 | 112 | def set_filter_dirty(self, filter_dirty): 113 | self.filter_dirty = filter_dirty 114 | 115 | def set_filter_modal(self, filter_modal): 116 | self.filter_modal = filter_modal 117 | 118 | def set_filter_punc(self, filter_punc): 119 | self.filter_punc = filter_punc 120 | 121 | def set_convert_num_mode(self, convert_num_mode): 122 | self.convert_num_mode = convert_num_mode 123 | 124 | def set_word_info(self, word_info): 125 | self.word_info = word_info 126 | 127 | def set_need_vad(self, need_vad): 128 | self.need_vad = need_vad 129 | 130 | def set_vad_silence_time(self, vad_silence_time): 131 | self.vad_silence_time = vad_silence_time 132 | 133 | def set_hotword_id(self, hotword_id): 134 | self.hotword_id = hotword_id 135 | 136 | def set_hotword_list(self, hotword_list): 137 | self.hotword_list = hotword_list 138 | 139 | def set_voice_format(self, voice_format): 140 | self.voice_format = voice_format 141 | 142 | def set_nonce(self, nonce): 143 | self.nonce = nonce 144 | 145 | def set_reinforce_hotword(self, reinforce_hotword): 146 | self.reinforce_hotword = reinforce_hotword 147 | 148 | def set_noise_threshold(self, noise_threshold): 149 | self.noise_threshold = noise_threshold 150 | 151 | def set_replace_text_id(self, replace_text_id): 152 | self.replace_text_id = replace_text_id 153 | 154 | def format_sign_string(self, param): 155 | signstr = "asr.cloud.tencent.com/asr/v2/" 156 | for t in param: 157 | if 'appid' in t: 158 | signstr += str(t[1]) 159 | break 160 | signstr += "?" 161 | for x in param: 162 | tmp = x 163 | if 'appid' in x: 164 | continue 165 | for t in tmp: 166 | signstr += str(t) 167 | signstr += "=" 168 | signstr = signstr[:-1] 169 | signstr += "&" 170 | signstr = signstr[:-1] 171 | return signstr 172 | 173 | def create_query_string(self, param): 174 | signstr = "wss://asr.cloud.tencent.com/asr/v2/" 175 | for t in param: 176 | if 'appid' in t: 177 | signstr += str(t[1]) 178 | break 179 | signstr += "?" 180 | for x in param: 181 | tmp = x 182 | if 'appid' in x: 183 | continue 184 | for t in tmp: 185 | signstr += str(t) 186 | signstr += "=" 187 | signstr = signstr[:-1] 188 | signstr += "&" 189 | signstr = signstr[:-1] 190 | return signstr 191 | 192 | def sign(self, signstr, secret_key): 193 | hmacstr = hmac.new(secret_key.encode('utf-8'), 194 | signstr.encode('utf-8'), hashlib.sha1).digest() 195 | s = base64.b64encode(hmacstr) 196 | s = s.decode('utf-8') 197 | return s 198 | 199 | def create_query_arr(self): 200 | query_arr = dict() 201 | 202 | query_arr['appid'] = self.appid 203 | query_arr['sub_service_type'] = 1 204 | query_arr['engine_model_type'] = self.engine_model_type 205 | query_arr['filter_dirty'] = self.filter_dirty 206 | query_arr['filter_modal'] = self.filter_modal 207 | query_arr['filter_punc'] = self.filter_punc 208 | query_arr['needvad'] = self.need_vad 209 | query_arr['convert_num_mode'] = self.convert_num_mode 210 | query_arr['word_info'] = self.word_info 211 | query_arr['language_judgment'] = self.language_judgment 212 | if self.vad_silence_time != 0: 213 | query_arr['vad_silence_time'] = self.vad_silence_time 214 | if self.hotword_id != "": 215 | query_arr['hotword_id'] = self.hotword_id 216 | if self.hotword_list != "": 217 | query_arr['hotword_list'] = self.hotword_list 218 | if self.replace_text_id != "": 219 | query_arr['replace_text_id'] = self.replace_text_id 220 | query_arr['secretid'] = self.credential.secret_id 221 | query_arr['voice_format'] = self.voice_format 222 | query_arr['voice_id'] = self.voice_id 223 | query_arr['timestamp'] = str(int(time.time())) 224 | if self.nonce != "": 225 | query_arr['nonce'] = self.nonce 226 | else: 227 | query_arr['nonce'] = query_arr['timestamp'] 228 | query_arr['expired'] = int(time.time()) + 24 * 60 * 60 229 | query_arr['reinforce_hotword'] = self.reinforce_hotword 230 | query_arr['noise_threshold'] = self.noise_threshold 231 | return query_arr 232 | 233 | def stop(self): 234 | if self.status == OPENED: 235 | msg = {} 236 | msg['type'] = "end" 237 | text_str = json.dumps(msg) 238 | self.ws.sock.send(text_str) 239 | if self.ws: 240 | if self.wst and self.wst.is_alive(): 241 | self.wst.join() 242 | self.ws.close() 243 | 244 | 245 | def write(self, data): 246 | while self.status == STARTED: 247 | time.sleep(0.1) 248 | if self.status == OPENED: 249 | self.ws.sock.send_binary(data) 250 | 251 | def start(self): 252 | def on_message(ws, message): 253 | response = json.loads(message) 254 | response['voice_id'] = self.voice_id 255 | if response['code'] != 0: 256 | logger.error("%s server recognition fail %s" % 257 | (response['voice_id'], response['message'])) 258 | self.listener.on_fail(response) 259 | return 260 | if "final" in response and response["final"] == 1: 261 | self.status = FINAL 262 | self.result = message 263 | self.listener.on_recognition_complete(response) 264 | logger.info("%s recognition complete" % response['voice_id']) 265 | return 266 | if "result" in response.keys(): 267 | if response["result"]['slice_type'] == 0: 268 | self.listener.on_sentence_begin(response) 269 | return 270 | elif response["result"]["slice_type"] == 2: 271 | self.listener.on_sentence_end(response) 272 | return 273 | elif response["result"]["slice_type"] == 1: 274 | self.listener.on_recognition_result_change(response) 275 | return 276 | 277 | def on_error(ws, error): 278 | if self.status == FINAL : 279 | return 280 | logger.error("websocket error %s voice id %s" % 281 | (format(error), self.voice_id)) 282 | self.status = ERROR 283 | 284 | def on_close(ws): 285 | self.status = CLOSED 286 | logger.info("websocket closed voice id %s" % 287 | self.voice_id) 288 | 289 | def on_open(ws): 290 | self.status = OPENED 291 | 292 | query_arr = self.create_query_arr() 293 | if self.voice_id == "": 294 | query_arr['voice_id'] = str(uuid.uuid1()) 295 | self.voice_id = query_arr['voice_id'] 296 | query = sorted(query_arr.items(), key=lambda d: d[0]) 297 | signstr = self.format_sign_string(query) 298 | 299 | autho = self.sign(signstr, self.credential.secret_key) 300 | requrl = self.create_query_string(query) 301 | if is_python3(): 302 | autho = urllib.parse.quote(autho) 303 | else: 304 | autho = urllib.quote(autho) 305 | requrl += "&signature=%s" % autho 306 | self.ws = websocket.WebSocketApp(requrl, None, 307 | on_error=on_error, on_close=on_close, on_message=on_message) 308 | self.ws.on_open = on_open 309 | self.wst = threading.Thread(target=self.ws.run_forever) 310 | self.wst.daemon = True 311 | self.wst.start() 312 | self.status = STARTED 313 | response = {} 314 | response['voice_id'] = self.voice_id 315 | self.listener.on_recognition_start(response) 316 | logger.info("%s recognition start" % response['voice_id']) 317 | -------------------------------------------------------------------------------- /tts/flowing_speech_synthesizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import hmac 4 | import hashlib 5 | import base64 6 | import time 7 | import json 8 | import threading 9 | import websocket 10 | import uuid 11 | import urllib 12 | from common.log import logger 13 | from common.utils import is_python3 14 | 15 | 16 | _PROTOCOL = "wss://" 17 | _HOST = "tts.cloud.tencent.com" 18 | _PATH = "/stream_wsv2" 19 | _ACTION = "TextToStreamAudioWSv2" 20 | 21 | 22 | class FlowingSpeechSynthesisListener(object): 23 | ''' 24 | ''' 25 | def on_synthesis_start(self, session_id): 26 | logger.info("on_synthesis_start: session_id={}".format(session_id)) 27 | 28 | def on_synthesis_end(self): 29 | logger.info("on_synthesis_end: -") 30 | 31 | def on_audio_result(self, audio_bytes): 32 | logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes))) 33 | 34 | def on_text_result(self, response): 35 | session_id = response["session_id"] 36 | request_id = response["request_id"] 37 | message_id = response["message_id"] 38 | result = response['result'] 39 | subtitles = [] 40 | if "subtitles" in result and len(result["subtitles"]) > 0: 41 | subtitles = result["subtitles"] 42 | logger.info("on_text_result: session_id={} request_id={} message_id={}\nsubtitles={}".format( 43 | session_id, request_id, message_id, subtitles)) 44 | 45 | def on_synthesis_fail(self, response): 46 | logger.error("on_synthesis_fail: code={} msg={}".format( 47 | response['code'], response['message'] 48 | )) 49 | 50 | 51 | NOTOPEN = 0 52 | STARTED = 1 53 | OPENED = 2 54 | FINAL = 3 55 | ERROR = 4 56 | CLOSED = 5 57 | 58 | FlowingSpeechSynthesizer_ACTION_SYNTHESIS = "ACTION_SYNTHESIS" 59 | FlowingSpeechSynthesizer_ACTION_COMPLETE = "ACTION_COMPLETE" 60 | FlowingSpeechSynthesizer_ACTION_RESET = "ACTION_RESET" 61 | 62 | 63 | class FlowingSpeechSynthesizer: 64 | 65 | def __init__(self, appid, credential, listener): 66 | self.appid = appid 67 | self.credential = credential 68 | self.status = NOTOPEN 69 | self.ws = None 70 | self.wst = None 71 | self.listener = listener 72 | 73 | self.ready = False 74 | 75 | self.voice_type = 0 76 | self.codec = "pcm" 77 | self.sample_rate = 16000 78 | self.volume = 10 79 | self.speed = 0 80 | self.session_id = "" 81 | self.enable_subtitle = 0 82 | self.emotion_category = "" 83 | self.emotion_intensity = 100 84 | 85 | def set_voice_type(self, voice_type): 86 | self.voice_type = voice_type 87 | 88 | def set_emotion_category(self, emotion_category): 89 | self.emotion_category = emotion_category 90 | 91 | def set_emotion_intensity(self, emotion_intensity): 92 | self.emotion_intensity = emotion_intensity 93 | 94 | def set_codec(self, codec): 95 | self.codec = codec 96 | 97 | def set_sample_rate(self, sample_rate): 98 | self.sample_rate = sample_rate 99 | 100 | def set_speed(self, speed): 101 | self.speed = speed 102 | 103 | def set_volume(self, volume): 104 | self.volume = volume 105 | 106 | def set_enable_subtitle(self, enable_subtitle): 107 | self.enable_subtitle = enable_subtitle 108 | 109 | def __gen_signature(self, params): 110 | sort_dict = sorted(params.keys()) 111 | sign_str = "GET" + _HOST + _PATH + "?" 112 | for key in sort_dict: 113 | sign_str = sign_str + key + "=" + str(params[key]) + '&' 114 | sign_str = sign_str[:-1] 115 | print(sign_str) 116 | if is_python3(): 117 | secret_key = self.credential.secret_key.encode('utf-8') 118 | sign_str = sign_str.encode('utf-8') 119 | else: 120 | secret_key = self.credential.secret_key 121 | hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest() 122 | s = base64.b64encode(hmacstr) 123 | s = s.decode('utf-8') 124 | return s 125 | 126 | def __gen_params(self, session_id): 127 | self.session_id = session_id 128 | 129 | params = dict() 130 | params['Action'] = _ACTION 131 | params['AppId'] = int(self.appid) 132 | params['SecretId'] = self.credential.secret_id 133 | params['ModelType'] = 1 134 | params['VoiceType'] = self.voice_type 135 | params['Codec'] = self.codec 136 | params['SampleRate'] = self.sample_rate 137 | params['Speed'] = self.speed 138 | params['Volume'] = self.volume 139 | params['SessionId'] = self.session_id 140 | params['EnableSubtitle'] = self.enable_subtitle 141 | if self.emotion_category != "": 142 | params['EmotionCategory']= self.emotion_category 143 | params['EmotionIntensity']= self.emotion_intensity 144 | 145 | timestamp = int(time.time()) 146 | params['Timestamp'] = timestamp 147 | params['Expired'] = timestamp + 24 * 60 * 60 148 | return params 149 | 150 | def __create_query_string(self, param): 151 | param = sorted(param.items(), key=lambda d: d[0]) 152 | 153 | url = _PROTOCOL + _HOST + _PATH 154 | 155 | signstr = url + "?" 156 | for x in param: 157 | tmp = x 158 | for t in tmp: 159 | signstr += str(t) 160 | signstr += "=" 161 | signstr = signstr[:-1] 162 | signstr += "&" 163 | signstr = signstr[:-1] 164 | return signstr 165 | 166 | def __new_ws_request_message(self, action, data): 167 | return { 168 | "session_id": self.session_id, 169 | "message_id": str(uuid.uuid1()), 170 | 171 | "action": action, 172 | "data": data, 173 | } 174 | 175 | def __do_send(self, action, text): 176 | WSRequestMessage = self.__new_ws_request_message(action, text) 177 | data = json.dumps(WSRequestMessage) 178 | opcode = websocket.ABNF.OPCODE_TEXT 179 | logger.info("ws send opcode={} data={}".format(opcode, data)) 180 | self.ws.send(data, opcode) 181 | 182 | def process(self, text, action=FlowingSpeechSynthesizer_ACTION_SYNTHESIS): 183 | logger.info("process: action={} data={}".format(action, text)) 184 | self.__do_send(action, text) 185 | 186 | def complete(self, action = FlowingSpeechSynthesizer_ACTION_COMPLETE): 187 | logger.info("complete: action={}".format(action)) 188 | self.__do_send(action, "") 189 | 190 | def reset(self, action = FlowingSpeechSynthesizer_ACTION_RESET): 191 | logger.info("reset: action={}".format(action)) 192 | self.__do_send(action, "") 193 | 194 | def wait_ready(self, timeout_ms): 195 | timeout_start = int(time.time() * 1000) 196 | while True: 197 | if self.ready: 198 | return True 199 | if int(time.time() * 1000) - timeout_start > timeout_ms: 200 | break 201 | time.sleep(0.01) 202 | return False 203 | 204 | def start(self): 205 | logger.info("synthesizer start: begin") 206 | 207 | def _close_conn(reason): 208 | ta = time.time() 209 | self.ws.close() 210 | tb = time.time() 211 | logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000))) 212 | 213 | def _on_data(ws, data, opcode, flag): 214 | logger.debug("data={} opcode={} flag={}".format(data, opcode, flag)) 215 | if opcode == websocket.ABNF.OPCODE_BINARY: 216 | self.listener.on_audio_result(data) # 217 | pass 218 | elif opcode == websocket.ABNF.OPCODE_TEXT: 219 | resp = json.loads(data) # WSResponseMessage 220 | if resp['code'] != 0: 221 | logger.error("server synthesis fail request_id={} code={} msg={}".format( 222 | resp['request_id'], resp['code'], resp['message'] 223 | )) 224 | self.listener.on_synthesis_fail(resp) 225 | return 226 | if "final" in resp and resp['final'] == 1: 227 | logger.info("recv FINAL frame") 228 | self.status = FINAL 229 | _close_conn("after recv final") 230 | self.listener.on_synthesis_end() 231 | return 232 | if "ready" in resp and resp['ready'] == 1: 233 | logger.info("recv READY frame") 234 | self.ready = True 235 | return 236 | if "reset" in resp and resp['reset'] == 1: 237 | logger.info("recv RESET frame") 238 | return 239 | if "heartbeat" in resp and resp['heartbeat'] == 1: 240 | logger.info("recv HEARTBEAT frame") 241 | return 242 | if "result" in resp: 243 | if "subtitles" in resp["result"] and resp["result"]["subtitles"] is not None: 244 | self.listener.on_text_result(resp) 245 | return 246 | else: 247 | logger.error("invalid on_data code, opcode=".format(opcode)) 248 | 249 | def _on_error(ws, error): 250 | if self.status == FINAL or self.status == CLOSED: 251 | return 252 | self.status = ERROR 253 | logger.error("error={}, session_id={}".format(error, self.session_id)) 254 | _close_conn("after recv error") 255 | 256 | def _on_close(ws, close_status_code, close_msg): 257 | logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg)) 258 | self.status = CLOSED 259 | 260 | def _on_open(ws): 261 | logger.info("conn opened") 262 | self.status = OPENED 263 | 264 | session_id = str(uuid.uuid1()) 265 | params = self.__gen_params(session_id) 266 | signature = self.__gen_signature(params) 267 | requrl = self.__create_query_string(params) 268 | 269 | if is_python3(): 270 | autho = urllib.parse.quote(signature) 271 | else: 272 | autho = urllib.quote(signature) 273 | requrl += "&Signature=%s" % autho 274 | print(requrl) 275 | 276 | self.ws = websocket.WebSocketApp(requrl, None,# header=headers, 277 | on_error=_on_error, on_close=_on_close, 278 | on_data=_on_data) 279 | self.ws.on_open = _on_open 280 | 281 | self.status = STARTED 282 | self.wst = threading.Thread(target=self.ws.run_forever) 283 | self.wst.daemon = True 284 | self.wst.start() 285 | self.listener.on_synthesis_start(session_id) 286 | 287 | logger.info("synthesizer start: end") 288 | 289 | def wait(self): 290 | logger.info("synthesizer wait: begin") 291 | if self.ws: 292 | if self.wst and self.wst.is_alive(): 293 | self.wst.join() 294 | logger.info("synthesizer wait: end") 295 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright (c) 2017-2018 Tencent Ltd. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------