├── .gitignore ├── 1.启动ASR服务-SSL.bat ├── 1.启动ASR服务.bat ├── 2.启动WebUI.bat ├── 7.激活conda环境.bat ├── README.md ├── funasr_client_api.py ├── funasr_wss_client.py ├── funasr_wss_server.py ├── requirements.txt ├── requirements_client.txt ├── requirements_server.txt ├── web ├── index.html ├── main.js ├── pcm.js ├── recorder-core.js ├── wav.js └── wsconnecter.js └── webui.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | build/ 3 | *.egg-info/ 4 | *.so 5 | *.mp4 6 | 7 | tmp* 8 | trial*/ 9 | 10 | data 11 | data_utils/face_tracking/3DMM/* 12 | data_utils/face_parsing/79999_iter.pth 13 | 14 | pretrained 15 | *.mp4 16 | .DS_Store 17 | workspace/log_ngp.txt 18 | .idea 19 | 20 | Miniconda3/ 21 | hf_download/ 22 | 23 | *.pth 24 | *.pt 25 | *log.txt 26 | log.txt 27 | 28 | wav2lip/results/ 29 | 30 | metahuman-stream* 31 | *.zip 32 | FunASR_WS/ 33 | *.pem -------------------------------------------------------------------------------- /1.启动ASR服务-SSL.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | chcp 65001 3 | 4 | SET CONDA_PATH=.\Miniconda3 5 | 6 | REM 激活base环境 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH% 8 | 9 | SET KMP_DUPLICATE_LIB_OK=TRUE 10 | SET CONDA_PATH=.\Miniconda3 11 | set HF_ENDPOINT=https://hf-mirror.com 12 | set HF_HOME=%CD%\hf_download 13 | set MODELSCOPE_CACHE=%CD%\hf_download 14 | 15 | set disable_update=True 16 | 17 | python funasr_wss_server.py --port 10096 --certfile "cert.pem" --keyfile "key.pem" --asr_model iic/SenseVoiceSmall --asr_model_revision master --asr_model_online iic/SenseVoiceSmall --asr_model_online_revision master 18 | 19 | cmd /k -------------------------------------------------------------------------------- /1.启动ASR服务.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | chcp 65001 3 | 4 | SET CONDA_PATH=.\Miniconda3 5 | 6 | REM 激活base环境 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH% 8 | 9 | SET KMP_DUPLICATE_LIB_OK=TRUE 10 | SET CONDA_PATH=.\Miniconda3 11 | set HF_ENDPOINT=https://hf-mirror.com 12 | set HF_HOME=%CD%\hf_download 13 | set MODELSCOPE_CACHE=%CD%\hf_download 14 | 15 | set disable_update=True 16 | 17 | python funasr_wss_server.py --port 10096 --certfile "" --asr_model iic/SenseVoiceSmall --asr_model_revision master --asr_model_online iic/SenseVoiceSmall --asr_model_online_revision master 18 | 19 | cmd /k -------------------------------------------------------------------------------- /2.启动WebUI.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | chcp 65001 3 | 4 | SET CONDA_PATH=.\Miniconda3 5 | 6 | REM 激活base环境 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH% 8 | 9 | SET KMP_DUPLICATE_LIB_OK=TRUE 10 | SET CONDA_PATH=.\Miniconda3 11 | set HF_ENDPOINT=https://hf-mirror.com 12 | set HF_HOME=%CD%\hf_download 13 | set MODELSCOPE_CACHE=%CD%\hf_download 14 | 15 | set disable_update=True 16 | 17 | start "" "http://127.0.0.1:8101/web/index.html" 18 | 19 | python webui.py 20 | 21 | cmd /k -------------------------------------------------------------------------------- /7.激活conda环境.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | SET CONDA_PATH=.\Miniconda3 4 | 5 | REM 激活base环境 6 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH% 7 | 8 | cmd /k -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于FunASR官方Demo修改的WS服务端,配合FastAPI提供HTTP服务,可以在浏览器中进行实时ASR测试 2 | 3 | 安装依赖: 4 | ```shell 5 | pip install -r requirements.txt 6 | ``` 7 | 8 | 启动ASR服务: 9 | ```shell 10 | python main.py 11 | ``` 12 | 13 | 启动WebUI: 14 | ```shell 15 | python webui.py 16 | ``` 17 | 18 | 19 | 浏览器访问: 20 | ```shell 21 | http://127.0.0.1:8101 22 | ``` 23 | 24 | 效果预览: 25 | ![image](https://github.com/user-attachments/assets/56628b48-65de-454d-b941-7eb6432f347c) 26 | 27 | 28 | 29 | # Service with websocket-python 30 | 31 | This is a demo using funasr pipeline with websocket python-api. It supports the offline, online, offline/online-2pass unifying speech recognition. 32 | 33 | ## For the Server 34 | 35 | ### Install the modelscope and funasr 36 | 37 | ```shell 38 | pip install -U modelscope funasr 39 | # For the users in China, you could install with the command: 40 | # pip install -U modelscope funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple 41 | git clone https://github.com/alibaba/FunASR.git && cd FunASR 42 | ``` 43 | 44 | ### Install the requirements for server 45 | 46 | ```shell 47 | cd runtime/python/websocket 48 | pip install -r requirements_server.txt 49 | ``` 50 | 51 | ### Start server 52 | 53 | ##### API-reference 54 | ```shell 55 | python funasr_wss_server.py \ 56 | --port [port id] \ 57 | --asr_model [asr model_name] \ 58 | --asr_model_online [asr model_name] \ 59 | --punc_model [punc model_name] \ 60 | --ngpu [0 or 1] \ 61 | --ncpu [1 or 4] \ 62 | --certfile [path of certfile for ssl] \ 63 | --keyfile [path of keyfile for ssl] 64 | ``` 65 | ##### Usage examples 66 | ```shell 67 | python funasr_wss_server.py --port 10095 68 | ``` 69 | 70 | ## For the client 71 | 72 | Install the requirements for client 73 | ```shell 74 | git clone https://github.com/alibaba/FunASR.git && cd FunASR 75 | cd funasr/runtime/python/websocket 76 | pip install -r requirements_client.txt 77 | ``` 78 | If you want infer from videos, you should install `ffmpeg` 79 | ```shell 80 | apt-get install -y ffmpeg #ubuntu 81 | # yum install -y ffmpeg # centos 82 | # brew install ffmpeg # mac 83 | # winget install ffmpeg # wins 84 | pip3 install websockets ffmpeg-python 85 | ``` 86 | 87 | ### Start client 88 | #### API-reference 89 | ```shell 90 | python funasr_wss_client.py \ 91 | --host [ip_address] \ 92 | --port [port id] \ 93 | --chunk_size ["5,10,5"=600ms, "8,8,4"=480ms] \ 94 | --chunk_interval [duration of send chunk_size/chunk_interval] \ 95 | --words_max_print [max number of words to print] \ 96 | --audio_in [if set, loadding from wav.scp, else recording from mircrophone] \ 97 | --output_dir [if set, write the results to output_dir] \ 98 | --mode [`online` for streaming asr, `offline` for non-streaming, `2pass` for unifying streaming and non-streaming asr] \ 99 | --thread_num [thread_num for send data] 100 | ``` 101 | 102 | #### Usage examples 103 | ##### ASR offline client 104 | Recording from mircrophone 105 | ```shell 106 | # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms 107 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode offline 108 | ``` 109 | Loadding from wav.scp(kaldi style) 110 | ```shell 111 | # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms 112 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode offline --audio_in "./data/wav.scp" --output_dir "./results" 113 | ``` 114 | 115 | ##### ASR streaming client 116 | Recording from mircrophone 117 | ```shell 118 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms 119 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5" 120 | ``` 121 | Loadding from wav.scp(kaldi style) 122 | ```shell 123 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms 124 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5" --audio_in "./data/wav.scp" --output_dir "./results" 125 | ``` 126 | 127 | ##### ASR offline/online 2pass client 128 | Recording from mircrophone 129 | ```shell 130 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms 131 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4" 132 | ``` 133 | Loadding from wav.scp(kaldi style) 134 | ```shell 135 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms 136 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4" --audio_in "./data/wav.scp" --output_dir "./results" 137 | ``` 138 | 139 | #### Websocket api 140 | ```shell 141 | # class Funasr_websocket_recognizer example with 3 step 142 | # 1.create an recognizer 143 | rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass") 144 | # 2.send pcm data to asr engine and get asr result 145 | text=rcg.feed_chunk(data) 146 | print("text",text) 147 | # 3.get last result, set timeout=3 148 | text=rcg.close(timeout=3) 149 | print("text",text) 150 | ``` 151 | 152 | ## Acknowledge 153 | 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR). 154 | 2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service. 155 | 3. We acknowledge [cgisky1980](https://github.com/cgisky1980/FunASR) for contributing the websocket service of offline model. 156 | -------------------------------------------------------------------------------- /funasr_client_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights 3 | Reserved. MIT License (https://opensource.org/licenses/MIT) 4 | 5 | 2022-2023 by zhaomingwork@qq.com 6 | """ 7 | 8 | # pip install websocket-client 9 | import ssl 10 | from websocket import ABNF 11 | from websocket import create_connection 12 | from queue import Queue 13 | import threading 14 | import traceback 15 | import json 16 | import time 17 | import numpy as np 18 | 19 | 20 | # class for recognizer in websocket 21 | class Funasr_websocket_recognizer: 22 | """ 23 | python asr recognizer lib 24 | 25 | """ 26 | 27 | def __init__( 28 | self, 29 | host="127.0.0.1", 30 | port="30035", 31 | is_ssl=True, 32 | chunk_size="0, 10, 5", 33 | chunk_interval=10, 34 | mode="offline", 35 | wav_name="default", 36 | ): 37 | """ 38 | host: server host ip 39 | port: server port 40 | is_ssl: True for wss protocal, False for ws 41 | """ 42 | try: 43 | if is_ssl == True: 44 | ssl_context = ssl.SSLContext() 45 | ssl_context.check_hostname = False 46 | ssl_context.verify_mode = ssl.CERT_NONE 47 | uri = "wss://{}:{}".format(host, port) 48 | ssl_opt = {"cert_reqs": ssl.CERT_NONE} 49 | else: 50 | uri = "ws://{}:{}".format(host, port) 51 | ssl_context = None 52 | ssl_opt = None 53 | self.host = host 54 | self.port = port 55 | 56 | self.msg_queue = Queue() # used for recognized result text 57 | 58 | print("connect to url", uri) 59 | self.websocket = create_connection(uri, ssl=ssl_context, sslopt=ssl_opt) 60 | 61 | self.thread_msg = threading.Thread( 62 | target=Funasr_websocket_recognizer.thread_rec_msg, args=(self,) 63 | ) 64 | self.thread_msg.start() 65 | chunk_size = [int(x) for x in chunk_size.split(",")] 66 | stride = int(60 * chunk_size[1] / chunk_interval / 1000 * 16000 * 2) 67 | chunk_num = (len(audio_bytes) - 1) // stride + 1 68 | 69 | message = json.dumps( 70 | { 71 | "mode": mode, 72 | "chunk_size": chunk_size, 73 | "encoder_chunk_look_back": 4, 74 | "decoder_chunk_look_back": 1, 75 | "chunk_interval": chunk_interval, 76 | "wav_name": wav_name, 77 | "is_speaking": True, 78 | } 79 | ) 80 | 81 | self.websocket.send(message) 82 | 83 | print("send json", message) 84 | 85 | except Exception as e: 86 | print("Exception:", e) 87 | traceback.print_exc() 88 | 89 | # threads for rev msg 90 | def thread_rec_msg(self): 91 | try: 92 | while True: 93 | msg = self.websocket.recv() 94 | if msg is None or len(msg) == 0: 95 | continue 96 | msg = json.loads(msg) 97 | 98 | self.msg_queue.put(msg) 99 | except Exception as e: 100 | print("client closed") 101 | 102 | # feed data to asr engine, wait_time means waiting for result until time out 103 | def feed_chunk(self, chunk, wait_time=0.01): 104 | try: 105 | self.websocket.send(chunk, ABNF.OPCODE_BINARY) 106 | # loop to check if there is a message, timeout in 0.01s 107 | while True: 108 | msg = self.msg_queue.get(timeout=wait_time) 109 | if self.msg_queue.empty(): 110 | break 111 | 112 | return msg 113 | except: 114 | return "" 115 | 116 | def close(self, timeout=1): 117 | message = json.dumps({"is_speaking": False}) 118 | self.websocket.send(message) 119 | # sleep for timeout seconds to wait for result 120 | time.sleep(timeout) 121 | msg = "" 122 | while not self.msg_queue.empty(): 123 | msg = self.msg_queue.get() 124 | 125 | self.websocket.close() 126 | # only resturn the last msg 127 | return msg 128 | 129 | 130 | if __name__ == "__main__": 131 | 132 | print("example for Funasr_websocket_recognizer") 133 | import wave 134 | 135 | wav_path = "/Users/zhifu/Downloads/modelscope_models/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav" 136 | with wave.open(wav_path, "rb") as wav_file: 137 | params = wav_file.getparams() 138 | frames = wav_file.readframes(wav_file.getnframes()) 139 | audio_bytes = bytes(frames) 140 | 141 | stride = int(60 * 10 / 10 / 1000 * 16000 * 2) 142 | chunk_num = (len(audio_bytes) - 1) // stride + 1 143 | # create an recognizer 144 | rcg = Funasr_websocket_recognizer( 145 | host="127.0.0.1", port="10095", is_ssl=True, mode="2pass", chunk_size="0,10,5" 146 | ) 147 | # loop to send chunk 148 | for i in range(chunk_num): 149 | 150 | beg = i * stride 151 | data = audio_bytes[beg : beg + stride] 152 | 153 | text = rcg.feed_chunk(data, wait_time=0.02) 154 | if len(text) > 0: 155 | print("text", text) 156 | time.sleep(0.05) 157 | 158 | # get last message 159 | text = rcg.close(timeout=3) 160 | print("text", text) 161 | -------------------------------------------------------------------------------- /funasr_wss_client.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import os 3 | import time 4 | import websockets, ssl 5 | import asyncio 6 | 7 | # import threading 8 | import argparse 9 | import json 10 | import traceback 11 | from multiprocessing import Process 12 | 13 | # from funasr.fileio.datadir_writer import DatadirWriter 14 | 15 | import logging 16 | 17 | logging.basicConfig(level=logging.ERROR) 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument( 21 | "--host", type=str, default="localhost", required=False, help="host ip, localhost, 0.0.0.0" 22 | ) 23 | parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port") 24 | parser.add_argument("--chunk_size", type=str, default="5, 10, 5", help="chunk") 25 | parser.add_argument("--encoder_chunk_look_back", type=int, default=4, help="chunk") 26 | parser.add_argument("--decoder_chunk_look_back", type=int, default=0, help="chunk") 27 | parser.add_argument("--chunk_interval", type=int, default=10, help="chunk") 28 | parser.add_argument( 29 | "--hotword", 30 | type=str, 31 | default="", 32 | help="hotword file path, one hotword perline (e.g.:阿里巴巴 20)", 33 | ) 34 | parser.add_argument("--audio_in", type=str, default=None, help="audio_in") 35 | parser.add_argument("--audio_fs", type=int, default=16000, help="audio_fs") 36 | parser.add_argument( 37 | "--send_without_sleep", 38 | action="store_true", 39 | default=True, 40 | help="if audio_in is set, send_without_sleep", 41 | ) 42 | parser.add_argument("--thread_num", type=int, default=1, help="thread_num") 43 | parser.add_argument("--words_max_print", type=int, default=10000, help="chunk") 44 | parser.add_argument("--output_dir", type=str, default=None, help="output_dir") 45 | parser.add_argument("--ssl", type=int, default=1, help="1 for ssl connect, 0 for no ssl") 46 | parser.add_argument("--use_itn", type=int, default=1, help="1 for using itn, 0 for not itn") 47 | parser.add_argument("--mode", type=str, default="2pass", help="offline, online, 2pass") 48 | 49 | args = parser.parse_args() 50 | args.chunk_size = [int(x) for x in args.chunk_size.split(",")] 51 | print(args) 52 | # voices = asyncio.Queue() 53 | from queue import Queue 54 | 55 | voices = Queue() 56 | offline_msg_done = False 57 | 58 | if args.output_dir is not None: 59 | # if os.path.exists(args.output_dir): 60 | # os.remove(args.output_dir) 61 | 62 | if not os.path.exists(args.output_dir): 63 | os.makedirs(args.output_dir) 64 | 65 | 66 | async def record_microphone(): 67 | is_finished = False 68 | import pyaudio 69 | 70 | # print("2") 71 | global voices 72 | FORMAT = pyaudio.paInt16 73 | CHANNELS = 1 74 | RATE = 16000 75 | chunk_size = 60 * args.chunk_size[1] / args.chunk_interval 76 | CHUNK = int(RATE / 1000 * chunk_size) 77 | 78 | p = pyaudio.PyAudio() 79 | 80 | stream = p.open( 81 | format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK 82 | ) 83 | # hotwords 84 | fst_dict = {} 85 | hotword_msg = "" 86 | if args.hotword.strip() != "": 87 | if os.path.exists(args.hotword): 88 | f_scp = open(args.hotword) 89 | hot_lines = f_scp.readlines() 90 | for line in hot_lines: 91 | words = line.strip().split(" ") 92 | if len(words) < 2: 93 | print("Please checkout format of hotwords") 94 | continue 95 | try: 96 | fst_dict[" ".join(words[:-1])] = int(words[-1]) 97 | except ValueError: 98 | print("Please checkout format of hotwords") 99 | hotword_msg = json.dumps(fst_dict) 100 | else: 101 | hotword_msg = args.hotword 102 | 103 | use_itn = True 104 | if args.use_itn == 0: 105 | use_itn = False 106 | 107 | message = json.dumps( 108 | { 109 | "mode": args.mode, 110 | "chunk_size": args.chunk_size, 111 | "chunk_interval": args.chunk_interval, 112 | "encoder_chunk_look_back": args.encoder_chunk_look_back, 113 | "decoder_chunk_look_back": args.decoder_chunk_look_back, 114 | "wav_name": "microphone", 115 | "is_speaking": True, 116 | "hotwords": hotword_msg, 117 | "itn": use_itn, 118 | } 119 | ) 120 | # voices.put(message) 121 | await websocket.send(message) 122 | while True: 123 | data = stream.read(CHUNK) 124 | message = data 125 | # voices.put(message) 126 | await websocket.send(message) 127 | await asyncio.sleep(0.005) 128 | 129 | 130 | async def record_from_scp(chunk_begin, chunk_size): 131 | global voices 132 | is_finished = False 133 | if args.audio_in.endswith(".scp"): 134 | f_scp = open(args.audio_in) 135 | wavs = f_scp.readlines() 136 | else: 137 | wavs = [args.audio_in] 138 | 139 | # hotwords 140 | fst_dict = {} 141 | hotword_msg = "" 142 | if args.hotword.strip() != "": 143 | if os.path.exists(args.hotword): 144 | f_scp = open(args.hotword) 145 | hot_lines = f_scp.readlines() 146 | for line in hot_lines: 147 | words = line.strip().split(" ") 148 | if len(words) < 2: 149 | print("Please checkout format of hotwords") 150 | continue 151 | try: 152 | fst_dict[" ".join(words[:-1])] = int(words[-1]) 153 | except ValueError: 154 | print("Please checkout format of hotwords") 155 | hotword_msg = json.dumps(fst_dict) 156 | else: 157 | hotword_msg = args.hotword 158 | print(hotword_msg) 159 | 160 | sample_rate = args.audio_fs 161 | wav_format = "pcm" 162 | use_itn = True 163 | if args.use_itn == 0: 164 | use_itn = False 165 | 166 | if chunk_size > 0: 167 | wavs = wavs[chunk_begin : chunk_begin + chunk_size] 168 | for wav in wavs: 169 | wav_splits = wav.strip().split() 170 | 171 | wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo" 172 | wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0] 173 | if not len(wav_path.strip()) > 0: 174 | continue 175 | if wav_path.endswith(".pcm"): 176 | with open(wav_path, "rb") as f: 177 | audio_bytes = f.read() 178 | elif wav_path.endswith(".wav"): 179 | import wave 180 | 181 | with wave.open(wav_path, "rb") as wav_file: 182 | params = wav_file.getparams() 183 | sample_rate = wav_file.getframerate() 184 | frames = wav_file.readframes(wav_file.getnframes()) 185 | audio_bytes = bytes(frames) 186 | else: 187 | wav_format = "others" 188 | with open(wav_path, "rb") as f: 189 | audio_bytes = f.read() 190 | 191 | stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * sample_rate * 2) 192 | chunk_num = (len(audio_bytes) - 1) // stride + 1 193 | # print(stride) 194 | 195 | # send first time 196 | message = json.dumps( 197 | { 198 | "mode": args.mode, 199 | "chunk_size": args.chunk_size, 200 | "chunk_interval": args.chunk_interval, 201 | "encoder_chunk_look_back": args.encoder_chunk_look_back, 202 | "decoder_chunk_look_back": args.decoder_chunk_look_back, 203 | "audio_fs": sample_rate, 204 | "wav_name": wav_name, 205 | "wav_format": wav_format, 206 | "is_speaking": True, 207 | "hotwords": hotword_msg, 208 | "itn": use_itn, 209 | } 210 | ) 211 | 212 | # voices.put(message) 213 | await websocket.send(message) 214 | is_speaking = True 215 | for i in range(chunk_num): 216 | 217 | beg = i * stride 218 | data = audio_bytes[beg : beg + stride] 219 | message = data 220 | # voices.put(message) 221 | await websocket.send(message) 222 | if i == chunk_num - 1: 223 | is_speaking = False 224 | message = json.dumps({"is_speaking": is_speaking}) 225 | # voices.put(message) 226 | await websocket.send(message) 227 | 228 | sleep_duration = ( 229 | 0.001 230 | if args.mode == "offline" 231 | else 60 * args.chunk_size[1] / args.chunk_interval / 1000 232 | ) 233 | 234 | await asyncio.sleep(sleep_duration) 235 | 236 | if not args.mode == "offline": 237 | await asyncio.sleep(2) 238 | # offline model need to wait for message recved 239 | 240 | if args.mode == "offline": 241 | global offline_msg_done 242 | while not offline_msg_done: 243 | await asyncio.sleep(1) 244 | 245 | await websocket.close() 246 | 247 | 248 | async def message(id): 249 | global websocket, voices, offline_msg_done 250 | text_print = "" 251 | text_print_2pass_online = "" 252 | text_print_2pass_offline = "" 253 | if args.output_dir is not None: 254 | ibest_writer = open( 255 | os.path.join(args.output_dir, "text.{}".format(id)), "a", encoding="utf-8" 256 | ) 257 | else: 258 | ibest_writer = None 259 | try: 260 | while True: 261 | 262 | meg = await websocket.recv() 263 | meg = json.loads(meg) 264 | wav_name = meg.get("wav_name", "demo") 265 | text = meg["text"] 266 | timestamp = "" 267 | offline_msg_done = meg.get("is_final", False) 268 | if "timestamp" in meg: 269 | timestamp = meg["timestamp"] 270 | 271 | if ibest_writer is not None: 272 | if timestamp != "": 273 | text_write_line = "{}\t{}\t{}\n".format(wav_name, text, timestamp) 274 | else: 275 | text_write_line = "{}\t{}\n".format(wav_name, text) 276 | ibest_writer.write(text_write_line) 277 | 278 | if "mode" not in meg: 279 | continue 280 | if meg["mode"] == "online": 281 | text_print += "{}".format(text) 282 | text_print = text_print[-args.words_max_print :] 283 | os.system("clear") 284 | print("\rpid" + str(id) + ": " + text_print) 285 | elif meg["mode"] == "offline": 286 | if timestamp != "": 287 | text_print += "{} timestamp: {}".format(text, timestamp) 288 | else: 289 | text_print += "{}".format(text) 290 | 291 | # text_print = text_print[-args.words_max_print:] 292 | # os.system('clear') 293 | print("\rpid" + str(id) + ": " + wav_name + ": " + text_print) 294 | offline_msg_done = True 295 | else: 296 | if meg["mode"] == "2pass-online": 297 | text_print_2pass_online += "{}".format(text) 298 | text_print = text_print_2pass_offline + text_print_2pass_online 299 | else: 300 | text_print_2pass_online = "" 301 | text_print = text_print_2pass_offline + "{}".format(text) 302 | text_print_2pass_offline += "{}".format(text) 303 | text_print = text_print[-args.words_max_print :] 304 | os.system("clear") 305 | print("\rpid" + str(id) + ": " + text_print) 306 | # offline_msg_done=True 307 | 308 | except Exception as e: 309 | print("Exception:", e) 310 | # traceback.print_exc() 311 | # await websocket.close() 312 | 313 | 314 | async def ws_client(id, chunk_begin, chunk_size): 315 | if args.audio_in is None: 316 | chunk_begin = 0 317 | chunk_size = 1 318 | global websocket, voices, offline_msg_done 319 | 320 | for i in range(chunk_begin, chunk_begin + chunk_size): 321 | offline_msg_done = False 322 | voices = Queue() 323 | if args.ssl == 1: 324 | ssl_context = ssl.SSLContext() 325 | ssl_context.check_hostname = False 326 | ssl_context.verify_mode = ssl.CERT_NONE 327 | uri = "wss://{}:{}".format(args.host, args.port) 328 | else: 329 | uri = "ws://{}:{}".format(args.host, args.port) 330 | ssl_context = None 331 | print("connect to", uri) 332 | async with websockets.connect( 333 | uri, subprotocols=["binary"], ping_interval=None, ssl=ssl_context 334 | ) as websocket: 335 | if args.audio_in is not None: 336 | task = asyncio.create_task(record_from_scp(i, 1)) 337 | else: 338 | task = asyncio.create_task(record_microphone()) 339 | task3 = asyncio.create_task(message(str(id) + "_" + str(i))) # processid+fileid 340 | await asyncio.gather(task, task3) 341 | exit(0) 342 | 343 | 344 | def one_thread(id, chunk_begin, chunk_size): 345 | asyncio.get_event_loop().run_until_complete(ws_client(id, chunk_begin, chunk_size)) 346 | asyncio.get_event_loop().run_forever() 347 | 348 | 349 | if __name__ == "__main__": 350 | # for microphone 351 | if args.audio_in is None: 352 | p = Process(target=one_thread, args=(0, 0, 0)) 353 | p.start() 354 | p.join() 355 | print("end") 356 | else: 357 | # calculate the number of wavs for each preocess 358 | if args.audio_in.endswith(".scp"): 359 | f_scp = open(args.audio_in) 360 | wavs = f_scp.readlines() 361 | else: 362 | wavs = [args.audio_in] 363 | for wav in wavs: 364 | wav_splits = wav.strip().split() 365 | wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo" 366 | wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0] 367 | audio_type = os.path.splitext(wav_path)[-1].lower() 368 | 369 | total_len = len(wavs) 370 | if total_len >= args.thread_num: 371 | chunk_size = int(total_len / args.thread_num) 372 | remain_wavs = total_len - chunk_size * args.thread_num 373 | else: 374 | chunk_size = 1 375 | remain_wavs = 0 376 | 377 | process_list = [] 378 | chunk_begin = 0 379 | for i in range(args.thread_num): 380 | now_chunk_size = chunk_size 381 | if remain_wavs > 0: 382 | now_chunk_size = chunk_size + 1 383 | remain_wavs = remain_wavs - 1 384 | # process i handle wavs at chunk_begin and size of now_chunk_size 385 | p = Process(target=one_thread, args=(i, chunk_begin, now_chunk_size)) 386 | chunk_begin = chunk_begin + now_chunk_size 387 | p.start() 388 | process_list.append(p) 389 | 390 | for p in process_list: 391 | p.join() 392 | 393 | print("end") 394 | -------------------------------------------------------------------------------- /funasr_wss_server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import websockets 4 | import time 5 | import logging 6 | import tracemalloc 7 | import numpy as np 8 | import argparse 9 | import ssl 10 | from loguru import logger 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument( 14 | "--host", type=str, default="0.0.0.0", required=False, help="host ip, localhost, 0.0.0.0" 15 | ) 16 | parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port") 17 | parser.add_argument( 18 | "--asr_model", 19 | type=str, 20 | default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", 21 | help="model from modelscope", 22 | ) 23 | parser.add_argument("--asr_model_revision", type=str, default="v2.0.4", help="") 24 | parser.add_argument( 25 | "--asr_model_online", 26 | type=str, 27 | default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", 28 | help="model from modelscope", 29 | ) 30 | parser.add_argument("--asr_model_online_revision", type=str, default="v2.0.4", help="") 31 | parser.add_argument( 32 | "--vad_model", 33 | type=str, 34 | default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", 35 | help="model from modelscope", 36 | ) 37 | parser.add_argument("--vad_model_revision", type=str, default="v2.0.4", help="") 38 | parser.add_argument( 39 | "--punc_model", 40 | type=str, 41 | default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", 42 | help="model from modelscope", 43 | ) 44 | parser.add_argument("--punc_model_revision", type=str, default="v2.0.4", help="") 45 | parser.add_argument("--ngpu", type=int, default=1, help="0 for cpu, 1 for gpu") 46 | parser.add_argument("--device", type=str, default="cuda", help="cuda, cpu") 47 | parser.add_argument("--ncpu", type=int, default=4, help="cpu cores") 48 | parser.add_argument( 49 | "--certfile", 50 | type=str, 51 | default="cert.pem", 52 | required=False, 53 | help="certfile for ssl", 54 | ) 55 | 56 | parser.add_argument( 57 | "--keyfile", 58 | type=str, 59 | default="key.pem", 60 | required=False, 61 | help="keyfile for ssl", 62 | ) 63 | args = parser.parse_args() 64 | 65 | 66 | websocket_users = set() 67 | 68 | logger.info("模型加载中,请耐心等待...") 69 | from funasr import AutoModel 70 | 71 | # asr 72 | model_asr = AutoModel( 73 | model=args.asr_model, 74 | model_revision=args.asr_model_revision, 75 | ngpu=args.ngpu, 76 | ncpu=args.ncpu, 77 | device=args.device, 78 | disable_pbar=True, 79 | disable_log=True, 80 | disable_update=True, 81 | ) 82 | # asr 83 | model_asr_streaming = AutoModel( 84 | model=args.asr_model_online, 85 | model_revision=args.asr_model_online_revision, 86 | ngpu=args.ngpu, 87 | ncpu=args.ncpu, 88 | device=args.device, 89 | disable_pbar=True, 90 | disable_log=True, 91 | disable_update=True, 92 | ) 93 | # vad 94 | model_vad = AutoModel( 95 | model=args.vad_model, 96 | model_revision=args.vad_model_revision, 97 | ngpu=args.ngpu, 98 | ncpu=args.ncpu, 99 | device=args.device, 100 | disable_pbar=True, 101 | disable_log=True, 102 | # chunk_size=60, 103 | disable_update=True, 104 | ) 105 | 106 | if args.punc_model != "": 107 | model_punc = AutoModel( 108 | model=args.punc_model, 109 | model_revision=args.punc_model_revision, 110 | ngpu=args.ngpu, 111 | ncpu=args.ncpu, 112 | device=args.device, 113 | disable_pbar=True, 114 | disable_log=True, 115 | disable_update=True, 116 | ) 117 | else: 118 | model_punc = None 119 | 120 | 121 | logger.info("模型已加载!现在只能同时支持一个客户端!!!!") 122 | 123 | 124 | async def ws_reset(websocket): 125 | logger.info("WS已重置, 总连接数 ", len(websocket_users)) 126 | 127 | websocket.status_dict_asr_online["cache"] = {} 128 | websocket.status_dict_asr_online["is_final"] = True 129 | websocket.status_dict_vad["cache"] = {} 130 | websocket.status_dict_vad["is_final"] = True 131 | websocket.status_dict_punc["cache"] = {} 132 | 133 | await websocket.close() 134 | 135 | 136 | async def clear_websocket(): 137 | for websocket in websocket_users: 138 | await ws_reset(websocket) 139 | websocket_users.clear() 140 | 141 | 142 | async def ws_serve(websocket, path): 143 | frames = [] 144 | frames_asr = [] 145 | frames_asr_online = [] 146 | global websocket_users 147 | # await clear_websocket() 148 | websocket_users.add(websocket) 149 | websocket.status_dict_asr = {} 150 | websocket.status_dict_asr_online = {"cache": {}, "is_final": False} 151 | websocket.status_dict_vad = {"cache": {}, "is_final": False} 152 | websocket.status_dict_punc = {"cache": {}} 153 | websocket.chunk_interval = 10 154 | websocket.vad_pre_idx = 0 155 | speech_start = False 156 | speech_end_i = -1 157 | websocket.wav_name = "microphone" 158 | websocket.mode = "2pass" 159 | logger.info("新用户已连接") 160 | 161 | try: 162 | async for message in websocket: 163 | if isinstance(message, str): 164 | messagejson = json.loads(message) 165 | 166 | if "is_speaking" in messagejson: 167 | websocket.is_speaking = messagejson["is_speaking"] 168 | websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking 169 | if "chunk_interval" in messagejson: 170 | websocket.chunk_interval = messagejson["chunk_interval"] 171 | if "wav_name" in messagejson: 172 | websocket.wav_name = messagejson.get("wav_name") 173 | if "chunk_size" in messagejson: 174 | chunk_size = messagejson["chunk_size"] 175 | if isinstance(chunk_size, str): 176 | chunk_size = chunk_size.split(",") 177 | websocket.status_dict_asr_online["chunk_size"] = [int(x) for x in chunk_size] 178 | if "encoder_chunk_look_back" in messagejson: 179 | websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson[ 180 | "encoder_chunk_look_back" 181 | ] 182 | if "decoder_chunk_look_back" in messagejson: 183 | websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson[ 184 | "decoder_chunk_look_back" 185 | ] 186 | if "hotword" in messagejson: 187 | websocket.status_dict_asr["hotword"] = messagejson["hotwords"] 188 | if "mode" in messagejson: 189 | websocket.mode = messagejson["mode"] 190 | 191 | websocket.status_dict_vad["chunk_size"] = int( 192 | websocket.status_dict_asr_online["chunk_size"][1] * 60 / websocket.chunk_interval 193 | ) 194 | if len(frames_asr_online) > 0 or len(frames_asr) >= 0 or not isinstance(message, str): 195 | if not isinstance(message, str): 196 | frames.append(message) 197 | duration_ms = len(message) // 32 198 | websocket.vad_pre_idx += duration_ms 199 | 200 | # asr online 201 | frames_asr_online.append(message) 202 | websocket.status_dict_asr_online["is_final"] = speech_end_i != -1 203 | if ( 204 | len(frames_asr_online) % websocket.chunk_interval == 0 205 | or websocket.status_dict_asr_online["is_final"] 206 | ): 207 | if websocket.mode == "2pass" or websocket.mode == "online": 208 | audio_in = b"".join(frames_asr_online) 209 | try: 210 | await async_asr_online(websocket, audio_in) 211 | except: 212 | logger.error(f"error in asr streaming, {websocket.status_dict_asr_online}") 213 | frames_asr_online = [] 214 | if speech_start: 215 | frames_asr.append(message) 216 | # vad online 217 | try: 218 | speech_start_i, speech_end_i = await async_vad(websocket, message) 219 | except: 220 | logger.error("error in vad") 221 | if speech_start_i != -1: 222 | speech_start = True 223 | beg_bias = (websocket.vad_pre_idx - speech_start_i) // duration_ms 224 | frames_pre = frames[-beg_bias:] 225 | frames_asr = [] 226 | frames_asr.extend(frames_pre) 227 | # asr punc offline 228 | if speech_end_i != -1 or not websocket.is_speaking: 229 | # logger.info("vad end point") 230 | if websocket.mode == "2pass" or websocket.mode == "offline": 231 | audio_in = b"".join(frames_asr) 232 | try: 233 | await async_asr(websocket, audio_in) 234 | except: 235 | logger.info("error in asr offline") 236 | frames_asr = [] 237 | speech_start = False 238 | frames_asr_online = [] 239 | websocket.status_dict_asr_online["cache"] = {} 240 | if not websocket.is_speaking: 241 | websocket.vad_pre_idx = 0 242 | frames = [] 243 | websocket.status_dict_vad["cache"] = {} 244 | else: 245 | frames = frames[-20:] 246 | 247 | except websockets.ConnectionClosed: 248 | logger.info("ConnectionClosed...", websocket_users, flush=True) 249 | await ws_reset(websocket) 250 | websocket_users.remove(websocket) 251 | except websockets.InvalidState: 252 | logger.info("InvalidState...") 253 | except Exception as e: 254 | logger.info("Exception:", e) 255 | 256 | 257 | async def async_vad(websocket, audio_in): 258 | 259 | segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"] 260 | # logger.info(segments_result) 261 | 262 | speech_start = -1 263 | speech_end = -1 264 | 265 | if len(segments_result) == 0 or len(segments_result) > 1: 266 | return speech_start, speech_end 267 | if segments_result[0][0] != -1: 268 | speech_start = segments_result[0][0] 269 | if segments_result[0][1] != -1: 270 | speech_end = segments_result[0][1] 271 | return speech_start, speech_end 272 | 273 | 274 | async def async_asr(websocket, audio_in): 275 | if len(audio_in) > 0: 276 | # logger.info(len(audio_in)) 277 | rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0] 278 | # logger.info("offline_asr, ", rec_result) 279 | if model_punc is not None and len(rec_result["text"]) > 0: 280 | # logger.info("offline, before punc", rec_result, "cache", websocket.status_dict_punc) 281 | rec_result = model_punc.generate( 282 | input=rec_result["text"], **websocket.status_dict_punc 283 | )[0] 284 | # logger.info("offline, after punc", rec_result) 285 | if len(rec_result["text"]) > 0: 286 | # logger.info("offline", rec_result) 287 | mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode 288 | message = json.dumps( 289 | { 290 | "mode": mode, 291 | "text": rec_result["text"], 292 | "wav_name": websocket.wav_name, 293 | "is_final": websocket.is_speaking, 294 | } 295 | ) 296 | await websocket.send(message) 297 | 298 | else: 299 | mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode 300 | message = json.dumps( 301 | { 302 | "mode": mode, 303 | "text": "", 304 | "wav_name": websocket.wav_name, 305 | "is_final": websocket.is_speaking, 306 | } 307 | ) 308 | await websocket.send(message) 309 | 310 | async def async_asr_online(websocket, audio_in): 311 | if len(audio_in) > 0: 312 | # logger.info(websocket.status_dict_asr_online.get("is_final", False)) 313 | rec_result = model_asr_streaming.generate( 314 | input=audio_in, **websocket.status_dict_asr_online 315 | )[0] 316 | # logger.info("online, ", rec_result) 317 | if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False): 318 | return 319 | # websocket.status_dict_asr_online["cache"] = dict() 320 | if len(rec_result["text"]): 321 | mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode 322 | message = json.dumps( 323 | { 324 | "mode": mode, 325 | "text": rec_result["text"], 326 | "wav_name": websocket.wav_name, 327 | "is_final": websocket.is_speaking, 328 | } 329 | ) 330 | await websocket.send(message) 331 | 332 | 333 | if len(args.certfile) > 0: 334 | ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) 335 | 336 | # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions 337 | ssl_cert = args.certfile 338 | ssl_key = args.keyfile 339 | 340 | ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key) 341 | start_server = websockets.serve( 342 | ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None, ssl=ssl_context 343 | ) 344 | else: 345 | start_server = websockets.serve( 346 | ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None 347 | ) 348 | asyncio.get_event_loop().run_until_complete(start_server) 349 | asyncio.get_event_loop().run_forever() 350 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | loguru 3 | uvicorn 4 | websockets==12.0 5 | pydub 6 | onnxruntime 7 | onnx==1.15.0 8 | FunASR==1.1.16 -------------------------------------------------------------------------------- /requirements_client.txt: -------------------------------------------------------------------------------- 1 | websockets 2 | pyaudio 3 | -------------------------------------------------------------------------------- /requirements_server.txt: -------------------------------------------------------------------------------- 1 | websockets==12.0 2 | pydub 3 | onnxruntime 4 | onnx==1.15.0 5 | FunASR==1.1.16 -------------------------------------------------------------------------------- /web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 语音识别 8 | 127 | 128 | 129 | 130 |
131 |

语音识别

132 |
133 | 134 | 135 | 136 | 点此处手工授权 wss://127.0.0.1:10096/ 137 | 138 | 139 | 140 | 141 | 142 |
143 | 144 |
145 | 146 |
147 | 151 | 155 |
156 |
157 | 158 |
159 | 160 |
161 | 165 | 169 | 173 |
174 |
175 | 176 | 180 | 181 |
182 | 183 |
184 | 188 | 192 |
193 |
194 | 195 |
196 | 197 | 200 |
201 | 202 |
203 | 204 |
205 | 209 | 213 | 217 |
218 |
219 | 220 |
221 | 222 | 223 |
224 | 225 |
226 |
请点击开始
227 | 228 | 229 | 230 |
231 | 232 |
233 | 234 |
235 |
236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | -------------------------------------------------------------------------------- /web/main.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights 3 | * Reserved. MIT License (https://opensource.org/licenses/MIT) 4 | */ 5 | /* 2022-2023 by zhaoming,mali aihealthx.com */ 6 | 7 | 8 | // 连接; 定义socket连接类对象与语音对象 9 | var wsconnecter = new WebSocketConnectMethod({ msgHandle: getJsonMessage, stateHandle: getConnState }); 10 | var audioBlob; 11 | 12 | // 录音; 定义录音对象,wav格式 13 | var rec = Recorder({ 14 | type: "pcm", 15 | bitRate: 16, 16 | sampleRate: 16000, 17 | onProcess: recProcess 18 | }); 19 | 20 | 21 | var sampleBuf = new Int16Array(); 22 | // 定义按钮响应事件 23 | var btnStart = document.getElementById('btnStart'); 24 | btnStart.onclick = record; 25 | var btnStop = document.getElementById('btnStop'); 26 | btnStop.onclick = stop; 27 | btnStop.disabled = true; 28 | btnStart.disabled = true; 29 | 30 | btnConnect = document.getElementById('btnConnect'); 31 | btnConnect.onclick = start; 32 | 33 | var awsslink = document.getElementById('wsslink'); 34 | 35 | var rec_text = ""; // for online rec asr result 36 | var offline_text = ""; // for offline rec asr result 37 | var info_div = document.getElementById('info_div'); 38 | 39 | var upfile = document.getElementById('upfile'); 40 | 41 | var isfilemode = false; // if it is in file mode 42 | var file_ext = ""; 43 | var file_sample_rate = 16000; //for wav file sample rate 44 | var file_data_array; // array to save file data 45 | 46 | var totalsend = 0; 47 | 48 | // 数据转发模式 49 | var data_forward = "none"; 50 | 51 | 52 | // 构建url 53 | function buildUrl(baseUrl, endpoint) { 54 | // 创建一个新的URL对象并设置其pathname为endpoint 55 | const url = new URL(baseUrl); 56 | url.pathname = new URL(endpoint, 'http://dummy.com').pathname; 57 | 58 | return url.toString(); 59 | } 60 | 61 | // var now_ipaddress=window.location.href; 62 | // now_ipaddress=now_ipaddress.replace("https://","wss://"); 63 | // now_ipaddress=now_ipaddress.replace("static/index.html",""); 64 | // var localport=window.location.port; 65 | // now_ipaddress=now_ipaddress.replace(localport,"10095"); 66 | // document.getElementById('wssip').value=now_ipaddress; 67 | addresschange(); 68 | function addresschange() { 69 | 70 | var Uri = document.getElementById('wssip').value; 71 | // document.getElementById('info_wslink').innerHTML = "点此处手工授权(IOS手机)"; 72 | Uri = Uri.replace(/wss/g, "https"); 73 | console.log("addresschange uri=", Uri); 74 | 75 | awsslink.onclick = function () { 76 | window.open(Uri, '_blank'); 77 | } 78 | 79 | } 80 | 81 | upfile.onclick = function () { 82 | btnStart.disabled = true; 83 | btnStop.disabled = true; 84 | btnConnect.disabled = false; 85 | 86 | } 87 | 88 | // from https://github.com/xiangyuecn/Recorder/tree/master 89 | var readWavInfo = function (bytes) { 90 | //读取wav文件头,统一成44字节的头 91 | if (bytes.byteLength < 44) { 92 | return null; 93 | }; 94 | var wavView = bytes; 95 | var eq = function (p, s) { 96 | for (var i = 0; i < s.length; i++) { 97 | if (wavView[p + i] != s.charCodeAt(i)) { 98 | return false; 99 | }; 100 | }; 101 | return true; 102 | }; 103 | 104 | if (eq(0, "RIFF") && eq(8, "WAVEfmt ")) { 105 | 106 | var numCh = wavView[22]; 107 | if (wavView[20] == 1 && (numCh == 1 || numCh == 2)) {//raw pcm 单或双声道 108 | var sampleRate = wavView[24] + (wavView[25] << 8) + (wavView[26] << 16) + (wavView[27] << 24); 109 | var bitRate = wavView[34] + (wavView[35] << 8); 110 | var heads = [wavView.subarray(0, 12)], headSize = 12;//head只保留必要的块 111 | //搜索data块的位置 112 | var dataPos = 0; // 44 或有更多块 113 | for (var i = 12, iL = wavView.length - 8; i < iL;) { 114 | if (wavView[i] == 100 && wavView[i + 1] == 97 && wavView[i + 2] == 116 && wavView[i + 3] == 97) {//eq(i,"data") 115 | heads.push(wavView.subarray(i, i + 8)); 116 | headSize += 8; 117 | dataPos = i + 8; break; 118 | } 119 | var i0 = i; 120 | i += 4; 121 | i += 4 + wavView[i] + (wavView[i + 1] << 8) + (wavView[i + 2] << 16) + (wavView[i + 3] << 24); 122 | if (i0 == 12) {//fmt 123 | heads.push(wavView.subarray(i0, i)); 124 | headSize += i - i0; 125 | } 126 | } 127 | if (dataPos) { 128 | var wavHead = new Uint8Array(headSize); 129 | for (var i = 0, n = 0; i < heads.length; i++) { 130 | wavHead.set(heads[i], n); n += heads[i].length; 131 | } 132 | return { 133 | sampleRate: sampleRate 134 | , bitRate: bitRate 135 | , numChannels: numCh 136 | , wavHead44: wavHead 137 | , dataPos: dataPos 138 | }; 139 | }; 140 | }; 141 | }; 142 | return null; 143 | }; 144 | 145 | upfile.onchange = function () { 146 | var len = this.files.length; 147 | for (let i = 0; i < len; i++) { 148 | 149 | let fileAudio = new FileReader(); 150 | fileAudio.readAsArrayBuffer(this.files[i]); 151 | 152 | file_ext = this.files[i].name.split('.').pop().toLowerCase(); 153 | var audioblob; 154 | fileAudio.onload = function () { 155 | audioblob = fileAudio.result; 156 | file_data_array = audioblob; 157 | info_div.innerHTML = '请点击连接进行识别'; 158 | } 159 | 160 | fileAudio.onerror = function (e) { 161 | console.log('error' + e); 162 | } 163 | } 164 | // for wav file, we get the sample rate 165 | if (file_ext == "wav") 166 | for (let i = 0; i < len; i++) { 167 | 168 | let fileAudio = new FileReader(); 169 | fileAudio.readAsArrayBuffer(this.files[i]); 170 | fileAudio.onload = function () { 171 | audioblob = new Uint8Array(fileAudio.result); 172 | 173 | // for wav file, we can get the sample rate 174 | var info = readWavInfo(audioblob); 175 | console.log(info); 176 | file_sample_rate = info.sampleRate; 177 | } 178 | } 179 | } 180 | 181 | function play_file() { 182 | var audioblob = new Blob([new Uint8Array(file_data_array)], { type: "audio/wav" }); 183 | var audio_record = document.getElementById('audio_record'); 184 | audio_record.src = (window.URL || webkitURL).createObjectURL(audioblob); 185 | audio_record.controls = true; 186 | //audio_record.play(); //not auto play 187 | } 188 | function start_file_send() { 189 | sampleBuf = new Uint8Array(file_data_array); 190 | 191 | var chunk_size = 960; // for asr chunk_size [5, 10, 5] 192 | 193 | while (sampleBuf.length >= chunk_size) { 194 | 195 | sendBuf = sampleBuf.slice(0, chunk_size); 196 | totalsend = totalsend + sampleBuf.length; 197 | sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); 198 | wsconnecter.wsSend(sendBuf); 199 | } 200 | 201 | stop(); 202 | } 203 | 204 | // 修改数据转发模式 205 | function on_data_forward_change() { 206 | var item = null; 207 | var obj = document.getElementsByName("data_forward"); 208 | for (var i = 0; i < obj.length; i++) { //遍历Radio 209 | if (obj[i].checked) { 210 | item = obj[i].value; 211 | break; 212 | } 213 | } 214 | 215 | data_forward = item; 216 | } 217 | 218 | function on_recoder_mode_change() { 219 | var item = null; 220 | var obj = document.getElementsByName("recoder_mode"); 221 | for (var i = 0; i < obj.length; i++) { //遍历Radio 222 | if (obj[i].checked) { 223 | item = obj[i].value; 224 | break; 225 | } 226 | } 227 | if (item == "mic") { 228 | document.getElementById("mic_mode_div").style.display = 'block'; 229 | document.getElementById("rec_mode_div").style.display = 'none'; 230 | 231 | btnStart.disabled = true; 232 | btnStop.disabled = true; 233 | btnConnect.disabled = false; 234 | isfilemode = false; 235 | } else { 236 | document.getElementById("mic_mode_div").style.display = 'none'; 237 | document.getElementById("rec_mode_div").style.display = 'block'; 238 | 239 | btnStart.disabled = true; 240 | btnStop.disabled = true; 241 | btnConnect.disabled = true; 242 | isfilemode = true; 243 | info_div.innerHTML = '请点击选择文件'; 244 | } 245 | } 246 | 247 | 248 | function getHotwords() { 249 | var obj = document.getElementById("varHot"); 250 | 251 | if (typeof (obj) == 'undefined' || obj == null || obj.value.length <= 0) { 252 | return null; 253 | } 254 | let val = obj.value.toString(); 255 | 256 | console.log("hotwords=" + val); 257 | let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n 258 | var jsonresult = {}; 259 | const regexNum = /^[0-9]*$/; // test number 260 | for (item of items) { 261 | 262 | let result = item.split(" "); 263 | if (result.length >= 2 && regexNum.test(result[result.length - 1])) { 264 | var wordstr = ""; 265 | for (var i = 0; i < result.length - 1; i++) 266 | wordstr = wordstr + result[i] + " "; 267 | 268 | jsonresult[wordstr.trim()] = parseInt(result[result.length - 1]); 269 | } 270 | } 271 | console.log("jsonresult=" + JSON.stringify(jsonresult)); 272 | return JSON.stringify(jsonresult); 273 | 274 | } 275 | function getAsrMode() { 276 | 277 | var item = null; 278 | var obj = document.getElementsByName("asr_mode"); 279 | for (var i = 0; i < obj.length; i++) { //遍历Radio 280 | if (obj[i].checked) { 281 | item = obj[i].value; 282 | break; 283 | } 284 | } 285 | if (isfilemode) { 286 | item = "offline"; 287 | } 288 | console.log("asr mode" + item); 289 | 290 | return item; 291 | } 292 | 293 | function handleWithTimestamp(tmptext, tmptime) { 294 | console.log("tmptext: " + tmptext); 295 | console.log("tmptime: " + tmptime); 296 | if (tmptime == null || tmptime == "undefined" || tmptext.length <= 0) { 297 | return tmptext; 298 | } 299 | tmptext = tmptext.replace(/。|?|,|、|\?|\.|\ /g, ","); // in case there are a lot of "。" 300 | var words = tmptext.split(","); // split to chinese sentence or english words 301 | var jsontime = JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD 302 | var char_index = 0; // index for timestamp 303 | var text_withtime = ""; 304 | for (var i = 0; i < words.length; i++) { 305 | if (words[i] == "undefined" || words[i].length <= 0) { 306 | continue; 307 | } 308 | console.log("words===", words[i]); 309 | console.log("words: " + words[i] + ",time=" + jsontime[char_index][0] / 1000); 310 | if (/^[a-zA-Z]+$/.test(words[i])) { // if it is english 311 | text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n"; 312 | char_index = char_index + 1; //for english, timestamp unit is about a word 313 | } 314 | else { 315 | // if it is chinese 316 | text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n"; 317 | char_index = char_index + words[i].length; //for chinese, timestamp unit is about a char 318 | } 319 | } 320 | return text_withtime; 321 | } 322 | 323 | const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay)) 324 | async function is_speaking() { 325 | try { 326 | if (data_forward == "livetalking") { 327 | const response = await fetch(buildUrl(document.getElementById("livetalking_api_url").value, '/is_speaking'), { 328 | body: JSON.stringify({ 329 | sessionid: 0, 330 | }), 331 | headers: { 332 | 'Content-Type': 'application/json' 333 | }, 334 | method: 'POST' 335 | }); 336 | const data = await response.json(); 337 | console.log('is_speaking res:', data) 338 | return data.data 339 | } else if (data_forward == "ai_vtuber") { 340 | const response = await fetch(buildUrl(document.getElementById("ai_vtuber_api_url").value, '/get_sys_info'), { 341 | headers: { 342 | 'Content-Type': 'application/json' 343 | }, 344 | method: 'GET' 345 | }); 346 | const data = await response.json(); 347 | console.log('is_speaking res:', data) 348 | 349 | // 如果等待播放和等待合成的消息数量都为0,则认为没有在说话 350 | if (data["data"]["audio"]["wait_play_audio_num"] == 0 && data["data"]["audio"]["wait_synthesis_msg_num"] == 0 && 351 | data["data"]["metahuman-stream"]["wait_play_audio_num"] == 0 && data["data"]["metahuman-stream"]["wait_synthesis_msg_num"] == 0 352 | ) { 353 | return false; 354 | } else { 355 | return true; 356 | } 357 | } 358 | 359 | return false 360 | } catch (error) { 361 | console.error('is_speaking error:', error) 362 | return false 363 | } 364 | } 365 | 366 | async function waitSpeakingEnd() { 367 | if (data_forward == "none") { 368 | return 369 | } else if (data_forward == "livetalking" || data_forward == "ai_vtuber") { 370 | rec.stop() //关闭录音 371 | for (let i = 0; i < 10; i++) { //等待数字人开始讲话,最长等待10s 372 | bspeak = await is_speaking() 373 | if (bspeak) { 374 | break 375 | } 376 | await sleep(1000) 377 | } 378 | 379 | while (true) { //等待数字人讲话结束 380 | bspeak = await is_speaking() 381 | if (!bspeak) { 382 | break 383 | } 384 | await sleep(1000) 385 | } 386 | await sleep(2000) 387 | rec.start() 388 | } 389 | } 390 | // 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中 391 | function getJsonMessage(jsonMsg) { 392 | //console.log(jsonMsg); 393 | // console.log("message: " + JSON.parse(jsonMsg.data)['text']); 394 | var rectxt = "" + JSON.parse(jsonMsg.data)['text']; 395 | var asrmodel = JSON.parse(jsonMsg.data)['mode']; 396 | var is_final = JSON.parse(jsonMsg.data)['is_final']; 397 | var timestamp = JSON.parse(jsonMsg.data)['timestamp']; 398 | if (asrmodel == "2pass-offline" || asrmodel == "offline") { 399 | // 过滤特殊字符 400 | rectxt = rectxt.replace(/<[^>]*>/g, ''); 401 | 402 | offline_text = offline_text + rectxt.replace(/ +/g, "") + '\n'; //handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,""); 403 | rec_text = offline_text; 404 | 405 | if (data_forward == "livetalking") { 406 | fetch(buildUrl(document.getElementById("livetalking_api_url").value, '/human'), { 407 | body: JSON.stringify({ 408 | text: rectxt.replace(/ +/g, ""), 409 | type: 'chat', 410 | }), 411 | headers: { 412 | 'Content-Type': 'application/json' 413 | }, 414 | method: 'POST' 415 | }); 416 | } else if (data_forward == "ai_vtuber") { 417 | fetch(buildUrl(document.getElementById("ai_vtuber_api_url").value, '/send'), { 418 | body: JSON.stringify({ 419 | type: 'comment', 420 | data: { 421 | "type": 'comment', 422 | "username": '主人', 423 | "content": rectxt.replace(/ +/g, ""), 424 | } 425 | }), 426 | headers: { 427 | 'Content-Type': 'application/json' 428 | }, 429 | method: 'POST' 430 | }); 431 | } 432 | 433 | waitSpeakingEnd(); 434 | } 435 | else { 436 | rec_text = rec_text + rectxt; //.replace(/ +/g,""); 437 | } 438 | var varArea = document.getElementById('varArea'); 439 | 440 | // 过滤特殊字符 441 | rec_text = rec_text.replace(/<[^>]*>/g, ''); 442 | varArea.value = rec_text; 443 | // console.log("offline_text: " + asrmodel + "," + offline_text); 444 | // console.log("rec_text: " + rec_text); 445 | if (isfilemode == true && is_final == true) { 446 | console.log("call stop ws!"); 447 | play_file(); 448 | wsconnecter.wsStop(); 449 | 450 | info_div.innerHTML = "请点击连接"; 451 | 452 | btnStart.disabled = true; 453 | btnStop.disabled = true; 454 | btnConnect.disabled = false; 455 | } 456 | } 457 | 458 | // 连接状态响应 459 | function getConnState(connState) { 460 | if (connState === 0) { //on open 461 | 462 | 463 | info_div.innerHTML = '连接成功!请点击开始'; 464 | if (isfilemode == true) { 465 | info_div.innerHTML = '请耐心等待,大文件等待时间更长'; 466 | start_file_send(); 467 | } 468 | else { 469 | btnStart.disabled = false; 470 | btnStop.disabled = true; 471 | btnConnect.disabled = true; 472 | } 473 | } else if (connState === 1) { 474 | //stop(); 475 | } else if (connState === 2) { 476 | stop(); 477 | console.log('connecttion error'); 478 | 479 | alert("连接地址" + document.getElementById('wssip').value + "失败,请检查asr地址和端口。或试试界面上手动授权,再连接。"); 480 | btnStart.disabled = true; 481 | btnStop.disabled = true; 482 | btnConnect.disabled = false; 483 | 484 | info_div.innerHTML = '请点击连接'; 485 | } 486 | } 487 | 488 | function record() { 489 | rec.open(function () { 490 | rec.start(); 491 | console.log("开始"); 492 | btnStart.disabled = true; 493 | btnStop.disabled = false; 494 | btnConnect.disabled = true; 495 | }); 496 | } 497 | 498 | // 识别启动、停止、清空操作 499 | function start() { 500 | // 清除显示 501 | clear(); 502 | //控件状态更新 503 | console.log("isfilemode" + isfilemode); 504 | 505 | //启动连接 506 | var ret = wsconnecter.wsStart(); 507 | // 1 is ok, 0 is error 508 | if (ret == 1) { 509 | info_div.innerHTML = "正在连接asr服务器,请等待..."; 510 | isRec = true; 511 | btnStart.disabled = true; 512 | btnStop.disabled = true; 513 | btnConnect.disabled = true; 514 | 515 | return 1; 516 | } 517 | else { 518 | info_div.innerHTML = "请点击开始"; 519 | btnStart.disabled = true; 520 | btnStop.disabled = true; 521 | btnConnect.disabled = false; 522 | 523 | return 0; 524 | } 525 | } 526 | 527 | 528 | function stop() { 529 | var chunk_size = new Array(5, 10, 5); 530 | var request = { 531 | "chunk_size": chunk_size, 532 | "wav_name": "h5", 533 | "is_speaking": false, 534 | "chunk_interval": 10, 535 | "mode": getAsrMode(), 536 | "url": document.getElementById('audio_record').src, 537 | }; 538 | console.log(request); 539 | if (sampleBuf.length > 0) { 540 | wsconnecter.wsSend(sampleBuf); 541 | console.log("sampleBuf.length" + sampleBuf.length); 542 | sampleBuf = new Int16Array(); 543 | } 544 | wsconnecter.wsSend(JSON.stringify(request)); 545 | 546 | // 控件状态更新 547 | 548 | isRec = false; 549 | info_div.innerHTML = "发送完数据,请等候,正在识别..."; 550 | 551 | if (isfilemode == false) { 552 | btnStop.disabled = true; 553 | btnStart.disabled = true; 554 | btnConnect.disabled = true; 555 | //wait 3s for asr result 556 | setTimeout(function () { 557 | console.log("call stop ws!"); 558 | wsconnecter.wsStop(); 559 | btnConnect.disabled = false; 560 | info_div.innerHTML = "请点击连接"; 561 | }, 3000); 562 | 563 | rec.stop(function (blob, duration) { 564 | console.log(blob); 565 | var audioBlob = Recorder.pcm2wav(data = { sampleRate: 16000, bitRate: 16, blob: blob }, 566 | function (theblob, duration) { 567 | console.log(theblob); 568 | var audio_record = document.getElementById('audio_record'); 569 | audio_record.src = (window.URL || webkitURL).createObjectURL(theblob); 570 | audio_record.controls = true; 571 | //audio_record.play(); 572 | }, function (msg) { 573 | console.log(msg); 574 | } 575 | ); 576 | }, function (errMsg) { 577 | console.log("errMsg: " + errMsg); 578 | }); 579 | } 580 | // 停止连接 581 | } 582 | 583 | function clear() { 584 | var varArea = document.getElementById('varArea'); 585 | 586 | varArea.value = ""; 587 | rec_text = ""; 588 | offline_text = ""; 589 | } 590 | 591 | function recProcess(buffer, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd) { 592 | if (isRec === true) { 593 | var data_48k = buffer[buffer.length - 1]; 594 | 595 | var array_48k = new Array(data_48k); 596 | var data_16k = Recorder.SampleData(array_48k, bufferSampleRate, 16000).data; 597 | 598 | sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]); 599 | var chunk_size = 960; // for asr chunk_size [5, 10, 5] 600 | info_div.innerHTML = "" + bufferDuration / 1000 + "s"; 601 | while (sampleBuf.length >= chunk_size) { 602 | sendBuf = sampleBuf.slice(0, chunk_size); 603 | sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); 604 | wsconnecter.wsSend(sendBuf); 605 | } 606 | } 607 | } 608 | 609 | function getUseITN() { 610 | var obj = document.getElementsByName("use_itn"); 611 | for (var i = 0; i < obj.length; i++) { 612 | if (obj[i].checked) { 613 | return obj[i].value === "true"; 614 | } 615 | } 616 | return false; 617 | } 618 | -------------------------------------------------------------------------------- /web/pcm.js: -------------------------------------------------------------------------------- 1 | /* 2 | pcm编码器+编码引擎 3 | https://github.com/xiangyuecn/Recorder 4 | 5 | 编码原理:本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据(经过了重新采样),16位时为LE小端模式(Little Endian),并未经过任何编码处理 6 | 7 | 编码的代码和wav.js区别不大,pcm加上一个44字节wav头即成wav文件;所以要播放pcm就很简单了,直接转成wav文件来播放,已提供转换函数 Recorder.pcm2wav 8 | */ 9 | (function(){ 10 | "use strict"; 11 | 12 | Recorder.prototype.enc_pcm={ 13 | stable:true 14 | ,testmsg:"pcm为未封装的原始音频数据,pcm数据文件无法直接播放;支持位数8位、16位(填在比特率里面),采样率取值无限制" 15 | }; 16 | Recorder.prototype.pcm=function(res,True,False){ 17 | var This=this,set=This.set 18 | ,size=res.length 19 | ,bitRate=set.bitRate==8?8:16; 20 | 21 | var buffer=new ArrayBuffer(size*(bitRate/8)); 22 | var data=new DataView(buffer); 23 | var offset=0; 24 | 25 | // 写入采样数据 26 | if(bitRate==8) { 27 | for(var i=0;i>8)+128; 30 | data.setInt8(offset,val,true); 31 | }; 32 | }else{ 33 | for (var i=0;i=pcmSampleRate时不会进行任何处理,小于时会进行重新采样 424 | prevChunkInfo:{} 可选,上次调用时的返回值,用于连续转换,本次调用将从上次结束位置开始进行处理。或可自行定义一个ChunkInfo从pcmDatas指定的位置开始进行转换 425 | option:{ 可选,配置项 426 | frameSize:123456 帧大小,每帧的PCM Int16的数量,采样率转换后的pcm长度为frameSize的整数倍,用于连续转换。目前仅在mp3格式时才有用,frameSize取值为1152,这样编码出来的mp3时长和pcm的时长完全一致,否则会因为mp3最后一帧录音不够填满时添加填充数据导致mp3的时长变长。 427 | frameType:"" 帧类型,一般为rec.set.type,提供此参数时无需提供frameSize,会自动使用最佳的值给frameSize赋值,目前仅支持mp3=1152(MPEG1 Layer3的每帧采采样数),其他类型=1。 428 | 以上两个参数用于连续转换时使用,最多使用一个,不提供时不进行帧的特殊处理,提供时必须同时提供prevChunkInfo才有作用。最后一段数据处理时无需提供帧大小以便输出最后一丁点残留数据。 429 | } 430 | 431 | 返回ChunkInfo:{ 432 | //可定义,从指定位置开始转换到结尾 433 | index:0 pcmDatas已处理到的索引 434 | offset:0.0 已处理到的index对应的pcm中的偏移的下一个位置 435 | 436 | //仅作为返回值 437 | frameNext:null||[Int16,...] 下一帧的部分数据,frameSize设置了的时候才可能会有 438 | sampleRate:16000 结果的采样率,<=newSampleRate 439 | data:[Int16,...] 转换后的PCM结果;如果是连续转换,并且pcmDatas中并没有新数据时,data的长度可能为0 440 | } 441 | */ 442 | Recorder.SampleData=function(pcmDatas,pcmSampleRate,newSampleRate,prevChunkInfo,option){ 443 | prevChunkInfo||(prevChunkInfo={}); 444 | var index=prevChunkInfo.index||0; 445 | var offset=prevChunkInfo.offset||0; 446 | 447 | var frameNext=prevChunkInfo.frameNext||[]; 448 | option||(option={}); 449 | var frameSize=option.frameSize||1; 450 | if(option.frameType){ 451 | frameSize=option.frameType=="mp3"?1152:1; 452 | }; 453 | 454 | var nLen=pcmDatas.length; 455 | if(index>nLen+1){ 456 | CLog("SampleData似乎传入了未重置chunk "+index+">"+nLen,3); 457 | }; 458 | var size=0; 459 | for(var i=index;i1){//新采样低于录音采样,进行抽样 467 | size=Math.floor(size/step); 468 | }else{//新采样高于录音采样不处理,省去了插值处理 469 | step=1; 470 | newSampleRate=pcmSampleRate; 471 | }; 472 | 473 | size+=frameNext.length; 474 | var res=new Int16Array(size); 475 | var idx=0; 476 | //添加上一次不够一帧的剩余数据 477 | for(var i=0;i0){ 510 | var u8Pos=(res.length-frameNextSize)*2; 511 | frameNext=new Int16Array(res.buffer.slice(u8Pos)); 512 | res=new Int16Array(res.buffer.slice(0,u8Pos)); 513 | }; 514 | 515 | return { 516 | index:index 517 | ,offset:offset 518 | 519 | ,frameNext:frameNext 520 | ,sampleRate:newSampleRate 521 | ,data:res 522 | }; 523 | }; 524 | 525 | 526 | /*计算音量百分比的一个方法 527 | pcmAbsSum: pcm Int16所有采样的绝对值的和 528 | pcmLength: pcm长度 529 | 返回值:0-100,主要当做百分比用 530 | 注意:这个不是分贝,因此没用volume当做名称*/ 531 | Recorder.PowerLevel=function(pcmAbsSum,pcmLength){ 532 | /*计算音量 https://blog.csdn.net/jody1989/article/details/73480259 533 | 更高灵敏度算法: 534 | 限定最大感应值10000 535 | 线性曲线:低音量不友好 536 | power/10000*100 537 | 对数曲线:低音量友好,但需限定最低感应值 538 | (1+Math.log10(power/10000))*100 539 | */ 540 | var power=(pcmAbsSum/pcmLength) || 0;//NaN 541 | var level; 542 | if(power<1251){//1250的结果10%,更小的音量采用线性取值 543 | level=Math.round(power/1250*10); 544 | }else{ 545 | level=Math.round(Math.min(100,Math.max(0,(1+Math.log(power/10000)/Math.log(10))*100))); 546 | }; 547 | return level; 548 | }; 549 | 550 | /*计算音量,单位dBFS(满刻度相对电平) 551 | maxSample: 为16位pcm采样的绝对值中最大的一个(计算峰值音量),或者为pcm中所有采样的绝对值的平局值 552 | 返回值:-100~0 (最大值0dB,最小值-100代替-∞) 553 | */ 554 | Recorder.PowerDBFS=function(maxSample){ 555 | var val=Math.max(0.1, maxSample||0),Pref=0x7FFF; 556 | val=Math.min(val,Pref); 557 | //https://www.logiclocmusic.com/can-you-tell-the-decibel/ 558 | //https://blog.csdn.net/qq_17256689/article/details/120442510 559 | val=20*Math.log(val/Pref)/Math.log(10); 560 | return Math.max(-100,Math.round(val)); 561 | }; 562 | 563 | 564 | 565 | 566 | //带时间的日志输出,可设为一个空函数来屏蔽日志输出 567 | //CLog(msg,errOrLogMsg, logMsg...) err为数字时代表日志类型1:error 2:log默认 3:warn,否则当做内容输出,第一个参数不能是对象因为要拼接时间,后面可以接无数个输出参数 568 | Recorder.CLog=function(msg,err){ 569 | var now=new Date(); 570 | var t=("0"+now.getMinutes()).substr(-2) 571 | +":"+("0"+now.getSeconds()).substr(-2) 572 | +"."+("00"+now.getMilliseconds()).substr(-3); 573 | var recID=this&&this.envIn&&this.envCheck&&this.id; 574 | var arr=["["+t+" "+RecTxt+(recID?":"+recID:"")+"]"+msg]; 575 | var a=arguments,console=window.console||{}; 576 | var i=2,fn=console.log; 577 | if(typeof(err)=="number"){ 578 | fn=err==1?console.error:err==3?console.warn:fn; 579 | }else{ 580 | i=1; 581 | }; 582 | for(;i1?arr:""); 587 | }else{ 588 | fn.apply(console,arr); 589 | }; 590 | }; 591 | var CLog=function(){ Recorder.CLog.apply(this,arguments); }; 592 | var IsLoser=true;try{IsLoser=!console.log.apply;}catch(e){}; 593 | 594 | 595 | 596 | 597 | var ID=0; 598 | function initFn(set){ 599 | this.id=++ID; 600 | 601 | //如果开启了流量统计,这里将发送一个图片请求 602 | Traffic(); 603 | 604 | 605 | var o={ 606 | type:"mp3" //输出类型:mp3,wav,wav输出文件尺寸超大不推荐使用,但mp3编码支持会导致js文件超大,如果不需支持mp3可以使js文件大幅减小 607 | ,bitRate:16 //比特率 wav:16或8位,MP3:8kbps 1k/s,8kbps 2k/s 录音文件很小 608 | 609 | ,sampleRate:16000 //采样率,wav格式大小=sampleRate*时间;mp3此项对低比特率有影响,高比特率几乎无影响。 610 | //wav任意值,mp3取值范围:48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000 611 | //采样率参考https://www.cnblogs.com/devin87/p/mp3-recorder.html 612 | 613 | ,onProcess:NOOP //fn(buffers,powerLevel,bufferDuration,bufferSampleRate,newBufferIdx,asyncEnd) buffers=[[Int16,...],...]:缓冲的PCM数据,为从开始录音到现在的所有pcm片段;powerLevel:当前缓冲的音量级别0-100,bufferDuration:已缓冲时长,bufferSampleRate:缓冲使用的采样率(当type支持边录边转码(Worker)时,此采样率和设置的采样率相同,否则不一定相同);newBufferIdx:本次回调新增的buffer起始索引;asyncEnd:fn() 如果onProcess是异步的(返回值为true时),处理完成时需要调用此回调,如果不是异步的请忽略此参数,此方法回调时必须是真异步(不能真异步时需用setTimeout包裹)。onProcess返回值:如果返回true代表开启异步模式,在某些大量运算的场合异步是必须的,必须在异步处理完成时调用asyncEnd(不能真异步时需用setTimeout包裹),在onProcess执行后新增的buffer会全部替换成空数组,因此本回调开头应立即将newBufferIdx到本次回调结尾位置的buffer全部保存到另外一个数组内,处理完成后写回buffers中本次回调的结尾位置。 614 | 615 | //*******高级设置****** 616 | //,sourceStream:MediaStream Object 617 | //可选直接提供一个媒体流,从这个流中录制、实时处理音频数据(当前Recorder实例独享此流);不提供时为普通的麦克风录音,由getUserMedia提供音频流(所有Recorder实例共享同一个流) 618 | //比如:audio、video标签dom节点的captureStream方法(实验特性,不同浏览器支持程度不高)返回的流;WebRTC中的remote流;自己创建的流等 619 | //注意:流内必须至少存在一条音轨(Audio Track),比如audio标签必须等待到可以开始播放后才会有音轨,否则open会失败 620 | 621 | //,audioTrackSet:{ deviceId:"",groupId:"", autoGainControl:true, echoCancellation:true, noiseSuppression:true } 622 | //普通麦克风录音时getUserMedia方法的audio配置参数,比如指定设备id,回声消除、降噪开关;注意:提供的任何配置值都不一定会生效 623 | //由于麦克风是全局共享的,所以新配置后需要close掉以前的再重新open 624 | //更多参考: https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints 625 | 626 | //,disableEnvInFix:false 内部参数,禁用设备卡顿时音频输入丢失补偿功能 627 | 628 | //,takeoffEncodeChunk:NOOP //fn(chunkBytes) chunkBytes=[Uint8,...]:实时编码环境下接管编码器输出,当编码器实时编码出一块有效的二进制音频数据时实时回调此方法;参数为二进制的Uint8Array,就是编码出来的音频数据片段,所有的chunkBytes拼接在一起即为完整音频。本实现的想法最初由QQ2543775048提出 629 | //当提供此回调方法时,将接管编码器的数据输出,编码器内部将放弃存储生成的音频数据;环境要求比较苛刻:如果当前环境不支持实时编码处理,将在open时直接走fail逻辑 630 | //因此提供此回调后调用stop方法将无法获得有效的音频数据,因为编码器内没有音频数据,因此stop时返回的blob将是一个字节长度为0的blob 631 | //目前只有mp3格式实现了实时编码,在支持实时处理的环境中将会实时的将编码出来的mp3片段通过此方法回调,所有的chunkBytes拼接到一起即为完整的mp3,此种拼接的结果比mock方法实时生成的音质更加,因为天然避免了首尾的静默 632 | //目前除mp3外其他格式不可以提供此回调,提供了将在open时直接走fail逻辑 633 | }; 634 | 635 | for(var k in set){ 636 | o[k]=set[k]; 637 | }; 638 | this.set=o; 639 | 640 | this._S=9;//stop同步锁,stop可以阻止open过程中还未运行的start 641 | this.Sync={O:9,C:9};//和Recorder.Sync一致,只不过这个是非全局的,仅用来简化代码逻辑,无实际作用 642 | }; 643 | //同步锁,控制对Stream的竞争;用于close时中断异步的open;一个对象open如果变化了都要阻止close,Stream的控制权交个新的对象 644 | Recorder.Sync={/*open*/O:9,/*close*/C:9}; 645 | 646 | Recorder.prototype=initFn.prototype={ 647 | CLog:CLog 648 | 649 | //流相关的数据存储在哪个对象里面;如果提供了sourceStream,数据直接存储在当前对象中,否则存储在全局 650 | ,_streamStore:function(){ 651 | if(this.set.sourceStream){ 652 | return this; 653 | }else{ 654 | return Recorder; 655 | } 656 | } 657 | 658 | //打开录音资源True(),False(msg,isUserNotAllow),需要调用close。注意:此方法是异步的;一般使用时打开,用完立即关闭;可重复调用,可用来测试是否能录音 659 | ,open:function(True,False){ 660 | var This=this,streamStore=This._streamStore(); 661 | True=True||NOOP; 662 | var failCall=function(errMsg,isUserNotAllow){ 663 | isUserNotAllow=!!isUserNotAllow; 664 | This.CLog("录音open失败:"+errMsg+",isUserNotAllow:"+isUserNotAllow,1); 665 | False&&False(errMsg,isUserNotAllow); 666 | }; 667 | 668 | var ok=function(){ 669 | This.CLog("open ok id:"+This.id); 670 | True(); 671 | 672 | This._SO=0;//解除stop对open中的start调用的阻止 673 | }; 674 | 675 | 676 | //同步锁 677 | var Lock=streamStore.Sync; 678 | var lockOpen=++Lock.O,lockClose=Lock.C; 679 | This._O=This._O_=lockOpen;//记住当前的open,如果变化了要阻止close,这里假定了新对象已取代当前对象并且不再使用 680 | This._SO=This._S;//记住open过程中的stop,中途任何stop调用后都不能继续open中的start 681 | var lockFail=function(){ 682 | //允许多次open,但不允许任何一次close,或者自身已经调用了关闭 683 | if(lockClose!=Lock.C || !This._O){ 684 | var err="open被取消"; 685 | if(lockOpen==Lock.O){ 686 | //无新的open,已经调用了close进行取消,此处应让上次的close明确生效 687 | This.close(); 688 | }else{ 689 | err="open被中断"; 690 | }; 691 | failCall(err); 692 | return true; 693 | }; 694 | }; 695 | 696 | //环境配置检查 697 | var checkMsg=This.envCheck({envName:"H5",canProcess:true}); 698 | if(checkMsg){ 699 | failCall("不能录音:"+checkMsg); 700 | return; 701 | }; 702 | 703 | 704 | //***********已直接提供了音频流************ 705 | if(This.set.sourceStream){ 706 | if(!Recorder.GetContext()){ 707 | failCall("不支持此浏览器从流中获取录音"); 708 | return; 709 | }; 710 | 711 | Disconnect(streamStore);//可能已open过,直接先尝试断开 712 | This.Stream=This.set.sourceStream; 713 | This.Stream._call={}; 714 | 715 | try{ 716 | Connect(streamStore); 717 | }catch(e){ 718 | failCall("从流中打开录音失败:"+e.message); 719 | return; 720 | } 721 | ok(); 722 | return; 723 | }; 724 | 725 | 726 | //***********打开麦克风得到全局的音频流************ 727 | var codeFail=function(code,msg){ 728 | try{//跨域的优先检测一下 729 | window.top.a; 730 | }catch(e){ 731 | failCall('无权录音(跨域,请尝试给iframe添加麦克风访问策略,如allow="camera;microphone")'); 732 | return; 733 | }; 734 | 735 | if(/Permission|Allow/i.test(code)){ 736 | failCall("用户拒绝了录音权限",true); 737 | }else if(window.isSecureContext===false){ 738 | failCall("浏览器禁止不安全页面录音,可开启https解决"); 739 | }else if(/Found/i.test(code)){//可能是非安全环境导致的没有设备 740 | failCall(msg+",无可用麦克风"); 741 | }else{ 742 | failCall(msg); 743 | }; 744 | }; 745 | 746 | 747 | //如果已打开并且有效就不要再打开了 748 | if(Recorder.IsOpen()){ 749 | ok(); 750 | return; 751 | }; 752 | if(!Recorder.Support()){ 753 | codeFail("","此浏览器不支持录音"); 754 | return; 755 | }; 756 | 757 | //请求权限,如果从未授权,一般浏览器会弹出权限请求弹框 758 | var f1=function(stream){ 759 | //https://github.com/xiangyuecn/Recorder/issues/14 获取到的track.readyState!="live",刚刚回调时可能是正常的,但过一下可能就被关掉了,原因不明。延迟一下保证真异步。对正常浏览器不影响 760 | setTimeout(function(){ 761 | stream._call={}; 762 | var oldStream=Recorder.Stream; 763 | if(oldStream){ 764 | Disconnect(); //直接断开已存在的,旧的Connect未完成会自动终止 765 | stream._call=oldStream._call; 766 | }; 767 | Recorder.Stream=stream; 768 | if(lockFail())return; 769 | 770 | if(Recorder.IsOpen()){ 771 | if(oldStream)This.CLog("发现同时多次调用open",1); 772 | 773 | Connect(streamStore,1); 774 | ok(); 775 | }else{ 776 | failCall("录音功能无效:无音频流"); 777 | }; 778 | },100); 779 | }; 780 | var f2=function(e){ 781 | var code=e.name||e.message||e.code+":"+e; 782 | This.CLog("请求录音权限错误",1,e); 783 | 784 | codeFail(code,"无法录音:"+code); 785 | }; 786 | 787 | var trackSet={ 788 | noiseSuppression:false //默认禁用降噪,原声录制,免得移动端表现怪异(包括系统播放声音变小) 789 | ,echoCancellation:false //回声消除 790 | }; 791 | var trackSet2=This.set.audioTrackSet; 792 | for(var k in trackSet2)trackSet[k]=trackSet2[k]; 793 | trackSet.sampleRate=Recorder.Ctx.sampleRate;//必须指明采样率,不然手机上MediaRecorder采样率16k 794 | 795 | try{ 796 | var pro=Recorder.Scope[getUserMediaTxt]({audio:trackSet},f1,f2); 797 | }catch(e){//不能设置trackSet就算了 798 | This.CLog(getUserMediaTxt,3,e); 799 | pro=Recorder.Scope[getUserMediaTxt]({audio:true},f1,f2); 800 | }; 801 | if(pro&&pro.then){ 802 | pro.then(f1)[CatchTxt](f2); //fix 关键字,保证catch压缩时保持字符串形式 803 | }; 804 | } 805 | //关闭释放录音资源 806 | ,close:function(call){ 807 | call=call||NOOP; 808 | 809 | var This=this,streamStore=This._streamStore(); 810 | This._stop(); 811 | 812 | var Lock=streamStore.Sync; 813 | This._O=0; 814 | if(This._O_!=Lock.O){ 815 | //唯一资源Stream的控制权已交给新对象,这里不能关闭。此处在每次都弹权限的浏览器内可能存在泄漏,新对象被拒绝权限可能不会调用close,忽略这种不处理 816 | This.CLog("close被忽略(因为同时open了多个rec,只有最后一个会真正close)",3); 817 | call(); 818 | return; 819 | }; 820 | Lock.C++;//获得控制权 821 | 822 | Disconnect(streamStore); 823 | 824 | This.CLog("close"); 825 | call(); 826 | } 827 | 828 | 829 | 830 | 831 | 832 | /*模拟一段录音数据,后面可以调用stop进行编码,需提供pcm数据[1,2,3...],pcm的采样率*/ 833 | ,mock:function(pcmData,pcmSampleRate){ 834 | var This=this; 835 | This._stop();//清理掉已有的资源 836 | 837 | This.isMock=1; 838 | This.mockEnvInfo=null; 839 | This.buffers=[pcmData]; 840 | This.recSize=pcmData.length; 841 | This[srcSampleRateTxt]=pcmSampleRate; 842 | return This; 843 | } 844 | ,envCheck:function(envInfo){//平台环境下的可用性检查,任何时候都可以调用检查,返回errMsg:""正常,"失败原因" 845 | //envInfo={envName:"H5",canProcess:true} 846 | var errMsg,This=this,set=This.set; 847 | 848 | //检测CPU的数字字节序,TypedArray字节序是个迷,直接拒绝罕见的大端模式,因为找不到这种CPU进行测试 849 | var tag="CPU_BE"; 850 | if(!errMsg && !Recorder[tag] && window.Int8Array && !new Int8Array(new Int32Array([1]).buffer)[0]){ 851 | Traffic(tag); //如果开启了流量统计,这里将发送一个图片请求 852 | errMsg="不支持"+tag+"架构"; 853 | }; 854 | 855 | //编码器检查环境下配置是否可用 856 | if(!errMsg){ 857 | var type=set.type; 858 | if(This[type+"_envCheck"]){//编码器已实现环境检查 859 | errMsg=This[type+"_envCheck"](envInfo,set); 860 | }else{//未实现检查的手动检查配置是否有效 861 | if(set.takeoffEncodeChunk){ 862 | errMsg=type+"类型"+(This[type]?"":"(未加载编码器)")+"不支持设置takeoffEncodeChunk"; 863 | }; 864 | }; 865 | }; 866 | 867 | return errMsg||""; 868 | } 869 | ,envStart:function(mockEnvInfo,sampleRate){//平台环境相关的start调用 870 | var This=this,set=This.set; 871 | This.isMock=mockEnvInfo?1:0;//非H5环境需要启用mock,并提供envCheck需要的环境信息 872 | This.mockEnvInfo=mockEnvInfo; 873 | This.buffers=[];//数据缓冲 874 | This.recSize=0;//数据大小 875 | 876 | This.envInLast=0;//envIn接收到最后录音内容的时间 877 | This.envInFirst=0;//envIn接收到的首个录音内容的录制时间 878 | This.envInFix=0;//补偿的总时间 879 | This.envInFixTs=[];//补偿计数列表 880 | 881 | //engineCtx需要提前确定最终的采样率 882 | var setSr=set[sampleRateTxt]; 883 | if(setSr>sampleRate){ 884 | set[sampleRateTxt]=sampleRate; 885 | }else{ setSr=0 } 886 | This[srcSampleRateTxt]=sampleRate; 887 | This.CLog(srcSampleRateTxt+": "+sampleRate+" set."+sampleRateTxt+": "+set[sampleRateTxt]+(setSr?" 忽略"+setSr:""), setSr?3:0); 888 | 889 | This.engineCtx=0; 890 | //此类型有边录边转码(Worker)支持 891 | if(This[set.type+"_start"]){ 892 | var engineCtx=This.engineCtx=This[set.type+"_start"](set); 893 | if(engineCtx){ 894 | engineCtx.pcmDatas=[]; 895 | engineCtx.pcmSize=0; 896 | }; 897 | }; 898 | } 899 | ,envResume:function(){//和平台环境无关的恢复录音 900 | //重新开始计数 901 | this.envInFixTs=[]; 902 | } 903 | ,envIn:function(pcm,sum){//和平台环境无关的pcm[Int16]输入 904 | var This=this,set=This.set,engineCtx=This.engineCtx; 905 | var bufferSampleRate=This[srcSampleRateTxt]; 906 | var size=pcm.length; 907 | var powerLevel=Recorder.PowerLevel(sum,size); 908 | 909 | var buffers=This.buffers; 910 | var bufferFirstIdx=buffers.length;//之前的buffer都是经过onProcess处理好的,不允许再修改 911 | buffers.push(pcm); 912 | 913 | //有engineCtx时会被覆盖,这里保存一份 914 | var buffersThis=buffers; 915 | var bufferFirstIdxThis=bufferFirstIdx; 916 | 917 | //卡顿丢失补偿:因为设备很卡的时候导致H5接收到的数据量不够造成播放时候变速,结果比实际的时长要短,此处保证了不会变短,但不能修复丢失的音频数据造成音质变差。当前算法采用输入时间侦测下一帧是否需要添加补偿帧,需要(6次输入||超过1秒)以上才会开始侦测,如果滑动窗口内丢失超过1/3就会进行补偿 918 | var now=Date.now(); 919 | var pcmTime=Math.round(size/bufferSampleRate*1000); 920 | This.envInLast=now; 921 | if(This.buffers.length==1){//记下首个录音数据的录制时间 922 | This.envInFirst=now-pcmTime; 923 | }; 924 | var envInFixTs=This.envInFixTs; 925 | envInFixTs.splice(0,0,{t:now,d:pcmTime}); 926 | //保留3秒的计数滑动窗口,另外超过3秒的停顿不补偿 927 | var tsInStart=now,tsPcm=0; 928 | for(var i=0;i3000){ 931 | envInFixTs.length=i; 932 | break; 933 | }; 934 | tsInStart=o.t; 935 | tsPcm+=o.d; 936 | }; 937 | //达到需要的数据量,开始侦测是否需要补偿 938 | var tsInPrev=envInFixTs[1]; 939 | var tsIn=now-tsInStart; 940 | var lost=tsIn-tsPcm; 941 | if( lost>tsIn/3 && (tsInPrev&&tsIn>1000 || envInFixTs.length>=6) ){ 942 | //丢失过多,开始执行补偿 943 | var addTime=now-tsInPrev.t-pcmTime;//距离上次输入丢失这么多ms 944 | if(addTime>pcmTime/5){//丢失超过本帧的1/5 945 | var fixOpen=!set.disableEnvInFix; 946 | This.CLog("["+now+"]"+(fixOpen?"":"未")+"补偿"+addTime+"ms",3); 947 | This.envInFix+=addTime; 948 | 949 | //用静默进行补偿 950 | if(fixOpen){ 951 | var addPcm=new Int16Array(addTime*bufferSampleRate/1000); 952 | size+=addPcm.length; 953 | buffers.push(addPcm); 954 | }; 955 | }; 956 | }; 957 | 958 | 959 | var sizeOld=This.recSize,addSize=size; 960 | var bufferSize=sizeOld+addSize; 961 | This.recSize=bufferSize;//此值在onProcess后需要修正,可能新数据被修改 962 | 963 | 964 | //此类型有边录边转码(Worker)支持,开启实时转码 965 | if(engineCtx){ 966 | //转换成set的采样率 967 | var chunkInfo=Recorder.SampleData(buffers,bufferSampleRate,set[sampleRateTxt],engineCtx.chunkInfo); 968 | engineCtx.chunkInfo=chunkInfo; 969 | 970 | sizeOld=engineCtx.pcmSize; 971 | addSize=chunkInfo.data.length; 972 | bufferSize=sizeOld+addSize; 973 | engineCtx.pcmSize=bufferSize;//此值在onProcess后需要修正,可能新数据被修改 974 | 975 | buffers=engineCtx.pcmDatas; 976 | bufferFirstIdx=buffers.length; 977 | buffers.push(chunkInfo.data); 978 | bufferSampleRate=chunkInfo[sampleRateTxt]; 979 | }; 980 | 981 | var duration=Math.round(bufferSize/bufferSampleRate*1000); 982 | var bufferNextIdx=buffers.length; 983 | var bufferNextIdxThis=buffersThis.length; 984 | 985 | //允许异步处理buffer数据 986 | var asyncEnd=function(){ 987 | //重新计算size,异步的早已减去添加的,同步的需去掉本次添加的然后重新计算 988 | var num=asyncBegin?0:-addSize; 989 | var hasClear=buffers[0]==null; 990 | for(var i=bufferFirstIdx;i10 && This.envInFirst-now>1000){ //1秒后开始onProcess性能监测 1038 | This.CLog(procTxt+"低性能,耗时"+slowT+"ms",3); 1039 | }; 1040 | 1041 | if(asyncBegin===true){ 1042 | //开启了异步模式,onProcess已接管buffers新数据,立即清空,避免出现未处理的数据 1043 | var hasClear=0; 1044 | for(var i=bufferFirstIdx;i"+res.length+" 花:"+(Date.now()-t1)+"ms"); 1250 | 1251 | setTimeout(function(){ 1252 | t1=Date.now(); 1253 | This[set.type](res,function(blob){ 1254 | ok(blob,duration); 1255 | },function(msg){ 1256 | err(msg); 1257 | }); 1258 | }); 1259 | } 1260 | 1261 | }; 1262 | 1263 | if(window[RecTxt]){ 1264 | CLog("重复引入"+RecTxt,3); 1265 | window[RecTxt].Destroy(); 1266 | }; 1267 | window[RecTxt]=Recorder; 1268 | 1269 | 1270 | 1271 | 1272 | //=======从WebM字节流中提取pcm数据,提取成功返回Float32Array,失败返回null||-1===== 1273 | var WebM_Extract=function(inBytes, scope){ 1274 | if(!scope.pos){ 1275 | scope.pos=[0]; scope.tracks={}; scope.bytes=[]; 1276 | }; 1277 | var tracks=scope.tracks, position=[scope.pos[0]]; 1278 | var endPos=function(){ scope.pos[0]=position[0] }; 1279 | 1280 | var sBL=scope.bytes.length; 1281 | var bytes=new Uint8Array(sBL+inBytes.length); 1282 | bytes.set(scope.bytes); bytes.set(inBytes,sBL); 1283 | scope.bytes=bytes; 1284 | 1285 | //先读取文件头和Track信息 1286 | if(!scope._ht){ 1287 | readMatroskaVInt(bytes, position);//EBML Header 1288 | readMatroskaBlock(bytes, position);//跳过EBML Header内容 1289 | if(!BytesEq(readMatroskaVInt(bytes, position), [0x18,0x53,0x80,0x67])){ 1290 | return;//未识别到Segment 1291 | } 1292 | readMatroskaVInt(bytes, position);//跳过Segment长度值 1293 | while(position[0]1){//多声道,提取一个声道 1403 | var arr2=[]; 1404 | for(var i=0;i=arr.length)return; 1432 | var b0=arr[i],b2=("0000000"+b0.toString(2)).substr(-8); 1433 | var m=/^(0*1)(\d*)$/.exec(b2); 1434 | if(!m)return; 1435 | var len=m[1].length, val=[]; 1436 | if(i+len>arr.length)return; 1437 | for(var i2=0;i2arr.length)return; 1450 | for(var i2=0;i2>8)+128; 75 | data.setInt8(offset,val,true); 76 | }; 77 | }else{ 78 | for (var i=0;i