├── .gitignore
├── 1.启动ASR服务-SSL.bat
├── 1.启动ASR服务.bat
├── 2.启动WebUI.bat
├── 7.激活conda环境.bat
├── README.md
├── funasr_client_api.py
├── funasr_wss_client.py
├── funasr_wss_server.py
├── requirements.txt
├── requirements_client.txt
├── requirements_server.txt
├── web
    ├── index.html
    ├── main.js
    ├── pcm.js
    ├── recorder-core.js
    ├── wav.js
    └── wsconnecter.js
└── webui.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | build/
 3 | *.egg-info/
 4 | *.so
 5 | *.mp4
 6 | 
 7 | tmp*
 8 | trial*/
 9 | 
10 | data
11 | data_utils/face_tracking/3DMM/*
12 | data_utils/face_parsing/79999_iter.pth
13 | 
14 | pretrained
15 | *.mp4
16 | .DS_Store
17 | workspace/log_ngp.txt
18 | .idea
19 | 
20 | Miniconda3/
21 | hf_download/
22 | 
23 | *.pth
24 | *.pt
25 | *log.txt
26 | log.txt
27 | 
28 | wav2lip/results/
29 | 
30 | metahuman-stream*
31 | *.zip
32 | FunASR_WS/
33 | *.pem


--------------------------------------------------------------------------------
/1.启动ASR服务-SSL.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001
 3 | 
 4 | SET CONDA_PATH=.\Miniconda3
 5 | 
 6 | REM 激活base环境
 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH%
 8 | 
 9 | SET KMP_DUPLICATE_LIB_OK=TRUE
10 | SET CONDA_PATH=.\Miniconda3
11 | set HF_ENDPOINT=https://hf-mirror.com
12 | set HF_HOME=%CD%\hf_download
13 | set MODELSCOPE_CACHE=%CD%\hf_download
14 | 
15 | set disable_update=True
16 | 
17 | python funasr_wss_server.py --port 10096 --certfile "cert.pem" --keyfile "key.pem" --asr_model iic/SenseVoiceSmall --asr_model_revision master --asr_model_online iic/SenseVoiceSmall --asr_model_online_revision master
18 | 
19 | cmd /k


--------------------------------------------------------------------------------
/1.启动ASR服务.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001
 3 | 
 4 | SET CONDA_PATH=.\Miniconda3
 5 | 
 6 | REM 激活base环境
 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH%
 8 | 
 9 | SET KMP_DUPLICATE_LIB_OK=TRUE
10 | SET CONDA_PATH=.\Miniconda3
11 | set HF_ENDPOINT=https://hf-mirror.com
12 | set HF_HOME=%CD%\hf_download
13 | set MODELSCOPE_CACHE=%CD%\hf_download
14 | 
15 | set disable_update=True
16 | 
17 | python funasr_wss_server.py --port 10096 --certfile "" --asr_model iic/SenseVoiceSmall --asr_model_revision master --asr_model_online iic/SenseVoiceSmall --asr_model_online_revision master
18 | 
19 | cmd /k


--------------------------------------------------------------------------------
/2.启动WebUI.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001
 3 | 
 4 | SET CONDA_PATH=.\Miniconda3
 5 | 
 6 | REM 激活base环境
 7 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH%
 8 | 
 9 | SET KMP_DUPLICATE_LIB_OK=TRUE
10 | SET CONDA_PATH=.\Miniconda3
11 | set HF_ENDPOINT=https://hf-mirror.com
12 | set HF_HOME=%CD%\hf_download
13 | set MODELSCOPE_CACHE=%CD%\hf_download
14 | 
15 | set disable_update=True
16 | 
17 | start "" "http://127.0.0.1:8101/web/index.html"
18 | 
19 | python webui.py
20 | 
21 | cmd /k


--------------------------------------------------------------------------------
/7.激活conda环境.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | 
3 | SET CONDA_PATH=.\Miniconda3
4 | 
5 | REM 激活base环境
6 | CALL %CONDA_PATH%\Scripts\activate.bat %CONDA_PATH%
7 | 
8 | cmd /k


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 基于FunASR官方Demo修改的WS服务端，配合FastAPI提供HTTP服务，可以在浏览器中进行实时ASR测试
  2 | 
  3 | 安装依赖：
  4 | ```shell
  5 | pip install -r requirements.txt
  6 | ```
  7 | 
  8 | 启动ASR服务：
  9 | ```shell
 10 | python main.py
 11 | ```
 12 | 
 13 | 启动WebUI：
 14 | ```shell
 15 | python webui.py
 16 | ```
 17 | 
 18 | 
 19 | 浏览器访问：
 20 | ```shell
 21 | http://127.0.0.1:8101
 22 | ```
 23 | 
 24 | 效果预览：
 25 | ![image](https://github.com/user-attachments/assets/56628b48-65de-454d-b941-7eb6432f347c)
 26 | 
 27 | 
 28 | 
 29 | # Service with websocket-python
 30 | 
 31 | This is a demo using funasr pipeline with websocket python-api. It supports the offline, online, offline/online-2pass unifying speech recognition. 
 32 | 
 33 | ## For the Server
 34 | 
 35 | ### Install the modelscope and funasr
 36 | 
 37 | ```shell
 38 | pip install -U modelscope funasr
 39 | # For the users in China, you could install with the command:
 40 | # pip install -U modelscope funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
 41 | git clone https://github.com/alibaba/FunASR.git && cd FunASR
 42 | ```
 43 | 
 44 | ### Install the requirements for server
 45 | 
 46 | ```shell
 47 | cd runtime/python/websocket
 48 | pip install -r requirements_server.txt
 49 | ```
 50 | 
 51 | ### Start server
 52 | 
 53 | ##### API-reference
 54 | ```shell
 55 | python funasr_wss_server.py \
 56 | --port [port id] \
 57 | --asr_model [asr model_name] \
 58 | --asr_model_online [asr model_name] \
 59 | --punc_model [punc model_name] \
 60 | --ngpu [0 or 1] \
 61 | --ncpu [1 or 4] \
 62 | --certfile [path of certfile for ssl] \
 63 | --keyfile [path of keyfile for ssl] 
 64 | ```
 65 | ##### Usage examples
 66 | ```shell
 67 | python funasr_wss_server.py --port 10095
 68 | ```
 69 | 
 70 | ## For the client
 71 | 
 72 | Install the requirements for client
 73 | ```shell
 74 | git clone https://github.com/alibaba/FunASR.git && cd FunASR
 75 | cd funasr/runtime/python/websocket
 76 | pip install -r requirements_client.txt
 77 | ```
 78 | If you want infer from videos, you should install `ffmpeg`
 79 | ```shell
 80 | apt-get install -y ffmpeg #ubuntu
 81 | # yum install -y ffmpeg # centos
 82 | # brew install ffmpeg # mac
 83 | # winget install ffmpeg # wins
 84 | pip3 install websockets ffmpeg-python
 85 | ```
 86 | 
 87 | ### Start client
 88 | #### API-reference
 89 | ```shell
 90 | python funasr_wss_client.py \
 91 | --host [ip_address] \
 92 | --port [port id] \
 93 | --chunk_size ["5,10,5"=600ms, "8,8,4"=480ms] \
 94 | --chunk_interval [duration of send chunk_size/chunk_interval] \
 95 | --words_max_print [max number of words to print] \
 96 | --audio_in [if set, loadding from wav.scp, else recording from mircrophone] \
 97 | --output_dir [if set, write the results to output_dir] \
 98 | --mode [`online` for streaming asr, `offline` for non-streaming, `2pass` for unifying streaming and non-streaming asr] \
 99 | --thread_num [thread_num for send data]
100 | ```
101 | 
102 | #### Usage examples
103 | ##### ASR offline client
104 | Recording from mircrophone
105 | ```shell
106 | # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms
107 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode offline
108 | ```
109 | Loadding from wav.scp(kaldi style)
110 | ```shell
111 | # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms
112 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode offline --audio_in "./data/wav.scp" --output_dir "./results"
113 | ```
114 | 
115 | ##### ASR streaming client
116 | Recording from mircrophone
117 | ```shell
118 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms
119 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5"
120 | ```
121 | Loadding from wav.scp(kaldi style)
122 | ```shell
123 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms
124 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5" --audio_in "./data/wav.scp" --output_dir "./results"
125 | ```
126 | 
127 | ##### ASR offline/online 2pass client
128 | Recording from mircrophone
129 | ```shell
130 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms
131 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4"
132 | ```
133 | Loadding from wav.scp(kaldi style)
134 | ```shell
135 | # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms
136 | python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4" --audio_in "./data/wav.scp" --output_dir "./results"
137 | ```
138 | 
139 | #### Websocket api
140 | ```shell
141 | # class Funasr_websocket_recognizer example with 3 step
142 | # 1.create an recognizer 
143 | rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass")
144 | # 2.send pcm data to asr engine and get asr result
145 | text=rcg.feed_chunk(data)
146 | print("text",text)
147 | # 3.get last result, set timeout=3
148 | text=rcg.close(timeout=3)
149 | print("text",text)
150 | ```
151 | 
152 | ## Acknowledge
153 | 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
154 | 2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service.
155 | 3. We acknowledge [cgisky1980](https://github.com/cgisky1980/FunASR) for contributing the websocket service of offline model.
156 | 


--------------------------------------------------------------------------------
/funasr_client_api.py:
--------------------------------------------------------------------------------
  1 | """
  2 |   Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3 |   Reserved. MIT License  (https://opensource.org/licenses/MIT)
  4 |   
  5 |   2022-2023 by zhaomingwork@qq.com  
  6 | """
  7 | 
  8 | # pip install websocket-client
  9 | import ssl
 10 | from websocket import ABNF
 11 | from websocket import create_connection
 12 | from queue import Queue
 13 | import threading
 14 | import traceback
 15 | import json
 16 | import time
 17 | import numpy as np
 18 | 
 19 | 
 20 | # class for recognizer in websocket
 21 | class Funasr_websocket_recognizer:
 22 |     """
 23 |     python asr recognizer lib
 24 | 
 25 |     """
 26 | 
 27 |     def __init__(
 28 |         self,
 29 |         host="127.0.0.1",
 30 |         port="30035",
 31 |         is_ssl=True,
 32 |         chunk_size="0, 10, 5",
 33 |         chunk_interval=10,
 34 |         mode="offline",
 35 |         wav_name="default",
 36 |     ):
 37 |         """
 38 |         host: server host ip
 39 |         port: server port
 40 |         is_ssl: True for wss protocal, False for ws
 41 |         """
 42 |         try:
 43 |             if is_ssl == True:
 44 |                 ssl_context = ssl.SSLContext()
 45 |                 ssl_context.check_hostname = False
 46 |                 ssl_context.verify_mode = ssl.CERT_NONE
 47 |                 uri = "wss://{}:{}".format(host, port)
 48 |                 ssl_opt = {"cert_reqs": ssl.CERT_NONE}
 49 |             else:
 50 |                 uri = "ws://{}:{}".format(host, port)
 51 |                 ssl_context = None
 52 |                 ssl_opt = None
 53 |             self.host = host
 54 |             self.port = port
 55 | 
 56 |             self.msg_queue = Queue()  # used for recognized result text
 57 | 
 58 |             print("connect to url", uri)
 59 |             self.websocket = create_connection(uri, ssl=ssl_context, sslopt=ssl_opt)
 60 | 
 61 |             self.thread_msg = threading.Thread(
 62 |                 target=Funasr_websocket_recognizer.thread_rec_msg, args=(self,)
 63 |             )
 64 |             self.thread_msg.start()
 65 |             chunk_size = [int(x) for x in chunk_size.split(",")]
 66 |             stride = int(60 * chunk_size[1] / chunk_interval / 1000 * 16000 * 2)
 67 |             chunk_num = (len(audio_bytes) - 1) // stride + 1
 68 | 
 69 |             message = json.dumps(
 70 |                 {
 71 |                     "mode": mode,
 72 |                     "chunk_size": chunk_size,
 73 |                     "encoder_chunk_look_back": 4,
 74 |                     "decoder_chunk_look_back": 1,
 75 |                     "chunk_interval": chunk_interval,
 76 |                     "wav_name": wav_name,
 77 |                     "is_speaking": True,
 78 |                 }
 79 |             )
 80 | 
 81 |             self.websocket.send(message)
 82 | 
 83 |             print("send json", message)
 84 | 
 85 |         except Exception as e:
 86 |             print("Exception:", e)
 87 |             traceback.print_exc()
 88 | 
 89 |     # threads for rev msg
 90 |     def thread_rec_msg(self):
 91 |         try:
 92 |             while True:
 93 |                 msg = self.websocket.recv()
 94 |                 if msg is None or len(msg) == 0:
 95 |                     continue
 96 |                 msg = json.loads(msg)
 97 | 
 98 |                 self.msg_queue.put(msg)
 99 |         except Exception as e:
100 |             print("client closed")
101 | 
102 |     # feed data to asr engine, wait_time means waiting for result until time out
103 |     def feed_chunk(self, chunk, wait_time=0.01):
104 |         try:
105 |             self.websocket.send(chunk, ABNF.OPCODE_BINARY)
106 |             # loop to check if there is a message, timeout in 0.01s
107 |             while True:
108 |                 msg = self.msg_queue.get(timeout=wait_time)
109 |                 if self.msg_queue.empty():
110 |                     break
111 | 
112 |             return msg
113 |         except:
114 |             return ""
115 | 
116 |     def close(self, timeout=1):
117 |         message = json.dumps({"is_speaking": False})
118 |         self.websocket.send(message)
119 |         # sleep for timeout seconds to wait for result
120 |         time.sleep(timeout)
121 |         msg = ""
122 |         while not self.msg_queue.empty():
123 |             msg = self.msg_queue.get()
124 | 
125 |         self.websocket.close()
126 |         # only resturn the last msg
127 |         return msg
128 | 
129 | 
130 | if __name__ == "__main__":
131 | 
132 |     print("example for Funasr_websocket_recognizer")
133 |     import wave
134 | 
135 |     wav_path = "/Users/zhifu/Downloads/modelscope_models/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav"
136 |     with wave.open(wav_path, "rb") as wav_file:
137 |         params = wav_file.getparams()
138 |         frames = wav_file.readframes(wav_file.getnframes())
139 |         audio_bytes = bytes(frames)
140 | 
141 |     stride = int(60 * 10 / 10 / 1000 * 16000 * 2)
142 |     chunk_num = (len(audio_bytes) - 1) // stride + 1
143 |     # create an recognizer
144 |     rcg = Funasr_websocket_recognizer(
145 |         host="127.0.0.1", port="10095", is_ssl=True, mode="2pass", chunk_size="0,10,5"
146 |     )
147 |     # loop to send chunk
148 |     for i in range(chunk_num):
149 | 
150 |         beg = i * stride
151 |         data = audio_bytes[beg : beg + stride]
152 | 
153 |         text = rcg.feed_chunk(data, wait_time=0.02)
154 |         if len(text) > 0:
155 |             print("text", text)
156 |         time.sleep(0.05)
157 | 
158 |     # get last message
159 |     text = rcg.close(timeout=3)
160 |     print("text", text)
161 | 


--------------------------------------------------------------------------------
/funasr_wss_client.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | import os
  3 | import time
  4 | import websockets, ssl
  5 | import asyncio
  6 | 
  7 | # import threading
  8 | import argparse
  9 | import json
 10 | import traceback
 11 | from multiprocessing import Process
 12 | 
 13 | # from funasr.fileio.datadir_writer import DatadirWriter
 14 | 
 15 | import logging
 16 | 
 17 | logging.basicConfig(level=logging.ERROR)
 18 | 
 19 | parser = argparse.ArgumentParser()
 20 | parser.add_argument(
 21 |     "--host", type=str, default="localhost", required=False, help="host ip, localhost, 0.0.0.0"
 22 | )
 23 | parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port")
 24 | parser.add_argument("--chunk_size", type=str, default="5, 10, 5", help="chunk")
 25 | parser.add_argument("--encoder_chunk_look_back", type=int, default=4, help="chunk")
 26 | parser.add_argument("--decoder_chunk_look_back", type=int, default=0, help="chunk")
 27 | parser.add_argument("--chunk_interval", type=int, default=10, help="chunk")
 28 | parser.add_argument(
 29 |     "--hotword",
 30 |     type=str,
 31 |     default="",
 32 |     help="hotword file path, one hotword perline (e.g.:阿里巴巴 20)",
 33 | )
 34 | parser.add_argument("--audio_in", type=str, default=None, help="audio_in")
 35 | parser.add_argument("--audio_fs", type=int, default=16000, help="audio_fs")
 36 | parser.add_argument(
 37 |     "--send_without_sleep",
 38 |     action="store_true",
 39 |     default=True,
 40 |     help="if audio_in is set, send_without_sleep",
 41 | )
 42 | parser.add_argument("--thread_num", type=int, default=1, help="thread_num")
 43 | parser.add_argument("--words_max_print", type=int, default=10000, help="chunk")
 44 | parser.add_argument("--output_dir", type=str, default=None, help="output_dir")
 45 | parser.add_argument("--ssl", type=int, default=1, help="1 for ssl connect, 0 for no ssl")
 46 | parser.add_argument("--use_itn", type=int, default=1, help="1 for using itn, 0 for not itn")
 47 | parser.add_argument("--mode", type=str, default="2pass", help="offline, online, 2pass")
 48 | 
 49 | args = parser.parse_args()
 50 | args.chunk_size = [int(x) for x in args.chunk_size.split(",")]
 51 | print(args)
 52 | # voices = asyncio.Queue()
 53 | from queue import Queue
 54 | 
 55 | voices = Queue()
 56 | offline_msg_done = False
 57 | 
 58 | if args.output_dir is not None:
 59 |     # if os.path.exists(args.output_dir):
 60 |     #     os.remove(args.output_dir)
 61 | 
 62 |     if not os.path.exists(args.output_dir):
 63 |         os.makedirs(args.output_dir)
 64 | 
 65 | 
 66 | async def record_microphone():
 67 |     is_finished = False
 68 |     import pyaudio
 69 | 
 70 |     # print("2")
 71 |     global voices
 72 |     FORMAT = pyaudio.paInt16
 73 |     CHANNELS = 1
 74 |     RATE = 16000
 75 |     chunk_size = 60 * args.chunk_size[1] / args.chunk_interval
 76 |     CHUNK = int(RATE / 1000 * chunk_size)
 77 | 
 78 |     p = pyaudio.PyAudio()
 79 | 
 80 |     stream = p.open(
 81 |         format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK
 82 |     )
 83 |     # hotwords
 84 |     fst_dict = {}
 85 |     hotword_msg = ""
 86 |     if args.hotword.strip() != "":
 87 |         if os.path.exists(args.hotword):
 88 |             f_scp = open(args.hotword)
 89 |             hot_lines = f_scp.readlines()
 90 |             for line in hot_lines:
 91 |                 words = line.strip().split(" ")
 92 |                 if len(words) < 2:
 93 |                     print("Please checkout format of hotwords")
 94 |                     continue
 95 |                 try:
 96 |                     fst_dict[" ".join(words[:-1])] = int(words[-1])
 97 |                 except ValueError:
 98 |                     print("Please checkout format of hotwords")
 99 |             hotword_msg = json.dumps(fst_dict)
100 |         else:
101 |             hotword_msg = args.hotword
102 | 
103 |     use_itn = True
104 |     if args.use_itn == 0:
105 |         use_itn = False
106 | 
107 |     message = json.dumps(
108 |         {
109 |             "mode": args.mode,
110 |             "chunk_size": args.chunk_size,
111 |             "chunk_interval": args.chunk_interval,
112 |             "encoder_chunk_look_back": args.encoder_chunk_look_back,
113 |             "decoder_chunk_look_back": args.decoder_chunk_look_back,
114 |             "wav_name": "microphone",
115 |             "is_speaking": True,
116 |             "hotwords": hotword_msg,
117 |             "itn": use_itn,
118 |         }
119 |     )
120 |     # voices.put(message)
121 |     await websocket.send(message)
122 |     while True:
123 |         data = stream.read(CHUNK)
124 |         message = data
125 |         # voices.put(message)
126 |         await websocket.send(message)
127 |         await asyncio.sleep(0.005)
128 | 
129 | 
130 | async def record_from_scp(chunk_begin, chunk_size):
131 |     global voices
132 |     is_finished = False
133 |     if args.audio_in.endswith(".scp"):
134 |         f_scp = open(args.audio_in)
135 |         wavs = f_scp.readlines()
136 |     else:
137 |         wavs = [args.audio_in]
138 | 
139 |     # hotwords
140 |     fst_dict = {}
141 |     hotword_msg = ""
142 |     if args.hotword.strip() != "":
143 |         if os.path.exists(args.hotword):
144 |             f_scp = open(args.hotword)
145 |             hot_lines = f_scp.readlines()
146 |             for line in hot_lines:
147 |                 words = line.strip().split(" ")
148 |                 if len(words) < 2:
149 |                     print("Please checkout format of hotwords")
150 |                     continue
151 |                 try:
152 |                     fst_dict[" ".join(words[:-1])] = int(words[-1])
153 |                 except ValueError:
154 |                     print("Please checkout format of hotwords")
155 |             hotword_msg = json.dumps(fst_dict)
156 |         else:
157 |             hotword_msg = args.hotword
158 |         print(hotword_msg)
159 | 
160 |     sample_rate = args.audio_fs
161 |     wav_format = "pcm"
162 |     use_itn = True
163 |     if args.use_itn == 0:
164 |         use_itn = False
165 | 
166 |     if chunk_size > 0:
167 |         wavs = wavs[chunk_begin : chunk_begin + chunk_size]
168 |     for wav in wavs:
169 |         wav_splits = wav.strip().split()
170 | 
171 |         wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo"
172 |         wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0]
173 |         if not len(wav_path.strip()) > 0:
174 |             continue
175 |         if wav_path.endswith(".pcm"):
176 |             with open(wav_path, "rb") as f:
177 |                 audio_bytes = f.read()
178 |         elif wav_path.endswith(".wav"):
179 |             import wave
180 | 
181 |             with wave.open(wav_path, "rb") as wav_file:
182 |                 params = wav_file.getparams()
183 |                 sample_rate = wav_file.getframerate()
184 |                 frames = wav_file.readframes(wav_file.getnframes())
185 |                 audio_bytes = bytes(frames)
186 |         else:
187 |             wav_format = "others"
188 |             with open(wav_path, "rb") as f:
189 |                 audio_bytes = f.read()
190 | 
191 |         stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * sample_rate * 2)
192 |         chunk_num = (len(audio_bytes) - 1) // stride + 1
193 |         # print(stride)
194 | 
195 |         # send first time
196 |         message = json.dumps(
197 |             {
198 |                 "mode": args.mode,
199 |                 "chunk_size": args.chunk_size,
200 |                 "chunk_interval": args.chunk_interval,
201 |                 "encoder_chunk_look_back": args.encoder_chunk_look_back,
202 |                 "decoder_chunk_look_back": args.decoder_chunk_look_back,
203 |                 "audio_fs": sample_rate,
204 |                 "wav_name": wav_name,
205 |                 "wav_format": wav_format,
206 |                 "is_speaking": True,
207 |                 "hotwords": hotword_msg,
208 |                 "itn": use_itn,
209 |             }
210 |         )
211 | 
212 |         # voices.put(message)
213 |         await websocket.send(message)
214 |         is_speaking = True
215 |         for i in range(chunk_num):
216 | 
217 |             beg = i * stride
218 |             data = audio_bytes[beg : beg + stride]
219 |             message = data
220 |             # voices.put(message)
221 |             await websocket.send(message)
222 |             if i == chunk_num - 1:
223 |                 is_speaking = False
224 |                 message = json.dumps({"is_speaking": is_speaking})
225 |                 # voices.put(message)
226 |                 await websocket.send(message)
227 | 
228 |             sleep_duration = (
229 |                 0.001
230 |                 if args.mode == "offline"
231 |                 else 60 * args.chunk_size[1] / args.chunk_interval / 1000
232 |             )
233 | 
234 |             await asyncio.sleep(sleep_duration)
235 | 
236 |     if not args.mode == "offline":
237 |         await asyncio.sleep(2)
238 |     # offline model need to wait for message recved
239 | 
240 |     if args.mode == "offline":
241 |         global offline_msg_done
242 |         while not offline_msg_done:
243 |             await asyncio.sleep(1)
244 | 
245 |     await websocket.close()
246 | 
247 | 
248 | async def message(id):
249 |     global websocket, voices, offline_msg_done
250 |     text_print = ""
251 |     text_print_2pass_online = ""
252 |     text_print_2pass_offline = ""
253 |     if args.output_dir is not None:
254 |         ibest_writer = open(
255 |             os.path.join(args.output_dir, "text.{}".format(id)), "a", encoding="utf-8"
256 |         )
257 |     else:
258 |         ibest_writer = None
259 |     try:
260 |         while True:
261 | 
262 |             meg = await websocket.recv()
263 |             meg = json.loads(meg)
264 |             wav_name = meg.get("wav_name", "demo")
265 |             text = meg["text"]
266 |             timestamp = ""
267 |             offline_msg_done = meg.get("is_final", False)
268 |             if "timestamp" in meg:
269 |                 timestamp = meg["timestamp"]
270 | 
271 |             if ibest_writer is not None:
272 |                 if timestamp != "":
273 |                     text_write_line = "{}\t{}\t{}\n".format(wav_name, text, timestamp)
274 |                 else:
275 |                     text_write_line = "{}\t{}\n".format(wav_name, text)
276 |                 ibest_writer.write(text_write_line)
277 | 
278 |             if "mode" not in meg:
279 |                 continue
280 |             if meg["mode"] == "online":
281 |                 text_print += "{}".format(text)
282 |                 text_print = text_print[-args.words_max_print :]
283 |                 os.system("clear")
284 |                 print("\rpid" + str(id) + ": " + text_print)
285 |             elif meg["mode"] == "offline":
286 |                 if timestamp != "":
287 |                     text_print += "{} timestamp: {}".format(text, timestamp)
288 |                 else:
289 |                     text_print += "{}".format(text)
290 | 
291 |                 # text_print = text_print[-args.words_max_print:]
292 |                 # os.system('clear')
293 |                 print("\rpid" + str(id) + ": " + wav_name + ": " + text_print)
294 |                 offline_msg_done = True
295 |             else:
296 |                 if meg["mode"] == "2pass-online":
297 |                     text_print_2pass_online += "{}".format(text)
298 |                     text_print = text_print_2pass_offline + text_print_2pass_online
299 |                 else:
300 |                     text_print_2pass_online = ""
301 |                     text_print = text_print_2pass_offline + "{}".format(text)
302 |                     text_print_2pass_offline += "{}".format(text)
303 |                 text_print = text_print[-args.words_max_print :]
304 |                 os.system("clear")
305 |                 print("\rpid" + str(id) + ": " + text_print)
306 |                 # offline_msg_done=True
307 | 
308 |     except Exception as e:
309 |         print("Exception:", e)
310 |         # traceback.print_exc()
311 |         # await websocket.close()
312 | 
313 | 
314 | async def ws_client(id, chunk_begin, chunk_size):
315 |     if args.audio_in is None:
316 |         chunk_begin = 0
317 |         chunk_size = 1
318 |     global websocket, voices, offline_msg_done
319 | 
320 |     for i in range(chunk_begin, chunk_begin + chunk_size):
321 |         offline_msg_done = False
322 |         voices = Queue()
323 |         if args.ssl == 1:
324 |             ssl_context = ssl.SSLContext()
325 |             ssl_context.check_hostname = False
326 |             ssl_context.verify_mode = ssl.CERT_NONE
327 |             uri = "wss://{}:{}".format(args.host, args.port)
328 |         else:
329 |             uri = "ws://{}:{}".format(args.host, args.port)
330 |             ssl_context = None
331 |         print("connect to", uri)
332 |         async with websockets.connect(
333 |             uri, subprotocols=["binary"], ping_interval=None, ssl=ssl_context
334 |         ) as websocket:
335 |             if args.audio_in is not None:
336 |                 task = asyncio.create_task(record_from_scp(i, 1))
337 |             else:
338 |                 task = asyncio.create_task(record_microphone())
339 |             task3 = asyncio.create_task(message(str(id) + "_" + str(i)))  # processid+fileid
340 |             await asyncio.gather(task, task3)
341 |     exit(0)
342 | 
343 | 
344 | def one_thread(id, chunk_begin, chunk_size):
345 |     asyncio.get_event_loop().run_until_complete(ws_client(id, chunk_begin, chunk_size))
346 |     asyncio.get_event_loop().run_forever()
347 | 
348 | 
349 | if __name__ == "__main__":
350 |     # for microphone
351 |     if args.audio_in is None:
352 |         p = Process(target=one_thread, args=(0, 0, 0))
353 |         p.start()
354 |         p.join()
355 |         print("end")
356 |     else:
357 |         # calculate the number of wavs for each preocess
358 |         if args.audio_in.endswith(".scp"):
359 |             f_scp = open(args.audio_in)
360 |             wavs = f_scp.readlines()
361 |         else:
362 |             wavs = [args.audio_in]
363 |         for wav in wavs:
364 |             wav_splits = wav.strip().split()
365 |             wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo"
366 |             wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0]
367 |             audio_type = os.path.splitext(wav_path)[-1].lower()
368 | 
369 |         total_len = len(wavs)
370 |         if total_len >= args.thread_num:
371 |             chunk_size = int(total_len / args.thread_num)
372 |             remain_wavs = total_len - chunk_size * args.thread_num
373 |         else:
374 |             chunk_size = 1
375 |             remain_wavs = 0
376 | 
377 |         process_list = []
378 |         chunk_begin = 0
379 |         for i in range(args.thread_num):
380 |             now_chunk_size = chunk_size
381 |             if remain_wavs > 0:
382 |                 now_chunk_size = chunk_size + 1
383 |                 remain_wavs = remain_wavs - 1
384 |             # process i handle wavs at chunk_begin and size of now_chunk_size
385 |             p = Process(target=one_thread, args=(i, chunk_begin, now_chunk_size))
386 |             chunk_begin = chunk_begin + now_chunk_size
387 |             p.start()
388 |             process_list.append(p)
389 | 
390 |         for p in process_list:
391 |             p.join()
392 | 
393 |         print("end")
394 | 


--------------------------------------------------------------------------------
/funasr_wss_server.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import websockets
  4 | import time
  5 | import logging
  6 | import tracemalloc
  7 | import numpy as np
  8 | import argparse
  9 | import ssl
 10 | from loguru import logger
 11 | 
 12 | parser = argparse.ArgumentParser()
 13 | parser.add_argument(
 14 |     "--host", type=str, default="0.0.0.0", required=False, help="host ip, localhost, 0.0.0.0"
 15 | )
 16 | parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port")
 17 | parser.add_argument(
 18 |     "--asr_model",
 19 |     type=str,
 20 |     default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
 21 |     help="model from modelscope",
 22 | )
 23 | parser.add_argument("--asr_model_revision", type=str, default="v2.0.4", help="")
 24 | parser.add_argument(
 25 |     "--asr_model_online",
 26 |     type=str,
 27 |     default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
 28 |     help="model from modelscope",
 29 | )
 30 | parser.add_argument("--asr_model_online_revision", type=str, default="v2.0.4", help="")
 31 | parser.add_argument(
 32 |     "--vad_model",
 33 |     type=str,
 34 |     default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
 35 |     help="model from modelscope",
 36 | )
 37 | parser.add_argument("--vad_model_revision", type=str, default="v2.0.4", help="")
 38 | parser.add_argument(
 39 |     "--punc_model",
 40 |     type=str,
 41 |     default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
 42 |     help="model from modelscope",
 43 | )
 44 | parser.add_argument("--punc_model_revision", type=str, default="v2.0.4", help="")
 45 | parser.add_argument("--ngpu", type=int, default=1, help="0 for cpu, 1 for gpu")
 46 | parser.add_argument("--device", type=str, default="cuda", help="cuda, cpu")
 47 | parser.add_argument("--ncpu", type=int, default=4, help="cpu cores")
 48 | parser.add_argument(
 49 |     "--certfile",
 50 |     type=str,
 51 |     default="cert.pem",
 52 |     required=False,
 53 |     help="certfile for ssl",
 54 | )
 55 | 
 56 | parser.add_argument(
 57 |     "--keyfile",
 58 |     type=str,
 59 |     default="key.pem",
 60 |     required=False,
 61 |     help="keyfile for ssl",
 62 | )
 63 | args = parser.parse_args()
 64 | 
 65 | 
 66 | websocket_users = set()
 67 | 
 68 | logger.info("模型加载中，请耐心等待...")
 69 | from funasr import AutoModel
 70 | 
 71 | # asr
 72 | model_asr = AutoModel(
 73 |     model=args.asr_model,
 74 |     model_revision=args.asr_model_revision,
 75 |     ngpu=args.ngpu,
 76 |     ncpu=args.ncpu,
 77 |     device=args.device,
 78 |     disable_pbar=True,
 79 |     disable_log=True,
 80 |     disable_update=True,
 81 | )
 82 | # asr
 83 | model_asr_streaming = AutoModel(
 84 |     model=args.asr_model_online,
 85 |     model_revision=args.asr_model_online_revision,
 86 |     ngpu=args.ngpu,
 87 |     ncpu=args.ncpu,
 88 |     device=args.device,
 89 |     disable_pbar=True,
 90 |     disable_log=True,
 91 |     disable_update=True,
 92 | )
 93 | # vad
 94 | model_vad = AutoModel(
 95 |     model=args.vad_model,
 96 |     model_revision=args.vad_model_revision,
 97 |     ngpu=args.ngpu,
 98 |     ncpu=args.ncpu,
 99 |     device=args.device,
100 |     disable_pbar=True,
101 |     disable_log=True,
102 |     # chunk_size=60,
103 |     disable_update=True,
104 | )
105 | 
106 | if args.punc_model != "":
107 |     model_punc = AutoModel(
108 |         model=args.punc_model,
109 |         model_revision=args.punc_model_revision,
110 |         ngpu=args.ngpu,
111 |         ncpu=args.ncpu,
112 |         device=args.device,
113 |         disable_pbar=True,
114 |         disable_log=True,
115 |         disable_update=True,
116 |     )
117 | else:
118 |     model_punc = None
119 | 
120 | 
121 | logger.info("模型已加载！现在只能同时支持一个客户端!!!!")
122 | 
123 | 
124 | async def ws_reset(websocket):
125 |     logger.info("WS已重置, 总连接数 ", len(websocket_users))
126 | 
127 |     websocket.status_dict_asr_online["cache"] = {}
128 |     websocket.status_dict_asr_online["is_final"] = True
129 |     websocket.status_dict_vad["cache"] = {}
130 |     websocket.status_dict_vad["is_final"] = True
131 |     websocket.status_dict_punc["cache"] = {}
132 | 
133 |     await websocket.close()
134 | 
135 | 
136 | async def clear_websocket():
137 |     for websocket in websocket_users:
138 |         await ws_reset(websocket)
139 |     websocket_users.clear()
140 | 
141 | 
142 | async def ws_serve(websocket, path):
143 |     frames = []
144 |     frames_asr = []
145 |     frames_asr_online = []
146 |     global websocket_users
147 |     # await clear_websocket()
148 |     websocket_users.add(websocket)
149 |     websocket.status_dict_asr = {}
150 |     websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
151 |     websocket.status_dict_vad = {"cache": {}, "is_final": False}
152 |     websocket.status_dict_punc = {"cache": {}}
153 |     websocket.chunk_interval = 10
154 |     websocket.vad_pre_idx = 0
155 |     speech_start = False
156 |     speech_end_i = -1
157 |     websocket.wav_name = "microphone"
158 |     websocket.mode = "2pass"
159 |     logger.info("新用户已连接")
160 | 
161 |     try:
162 |         async for message in websocket:
163 |             if isinstance(message, str):
164 |                 messagejson = json.loads(message)
165 | 
166 |                 if "is_speaking" in messagejson:
167 |                     websocket.is_speaking = messagejson["is_speaking"]
168 |                     websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking
169 |                 if "chunk_interval" in messagejson:
170 |                     websocket.chunk_interval = messagejson["chunk_interval"]
171 |                 if "wav_name" in messagejson:
172 |                     websocket.wav_name = messagejson.get("wav_name")
173 |                 if "chunk_size" in messagejson:
174 |                     chunk_size = messagejson["chunk_size"]
175 |                     if isinstance(chunk_size, str):
176 |                         chunk_size = chunk_size.split(",")
177 |                     websocket.status_dict_asr_online["chunk_size"] = [int(x) for x in chunk_size]
178 |                 if "encoder_chunk_look_back" in messagejson:
179 |                     websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson[
180 |                         "encoder_chunk_look_back"
181 |                     ]
182 |                 if "decoder_chunk_look_back" in messagejson:
183 |                     websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson[
184 |                         "decoder_chunk_look_back"
185 |                     ]
186 |                 if "hotword" in messagejson:
187 |                     websocket.status_dict_asr["hotword"] = messagejson["hotwords"]
188 |                 if "mode" in messagejson:
189 |                     websocket.mode = messagejson["mode"]
190 | 
191 |             websocket.status_dict_vad["chunk_size"] = int(
192 |                 websocket.status_dict_asr_online["chunk_size"][1] * 60 / websocket.chunk_interval
193 |             )
194 |             if len(frames_asr_online) > 0 or len(frames_asr) >= 0 or not isinstance(message, str):
195 |                 if not isinstance(message, str):
196 |                     frames.append(message)
197 |                     duration_ms = len(message) // 32
198 |                     websocket.vad_pre_idx += duration_ms
199 | 
200 |                     # asr online
201 |                     frames_asr_online.append(message)
202 |                     websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
203 |                     if (
204 |                         len(frames_asr_online) % websocket.chunk_interval == 0
205 |                         or websocket.status_dict_asr_online["is_final"]
206 |                     ):
207 |                         if websocket.mode == "2pass" or websocket.mode == "online":
208 |                             audio_in = b"".join(frames_asr_online)
209 |                             try:
210 |                                 await async_asr_online(websocket, audio_in)
211 |                             except:
212 |                                 logger.error(f"error in asr streaming, {websocket.status_dict_asr_online}")
213 |                         frames_asr_online = []
214 |                     if speech_start:
215 |                         frames_asr.append(message)
216 |                     # vad online
217 |                     try:
218 |                         speech_start_i, speech_end_i = await async_vad(websocket, message)
219 |                     except:
220 |                         logger.error("error in vad")
221 |                     if speech_start_i != -1:
222 |                         speech_start = True
223 |                         beg_bias = (websocket.vad_pre_idx - speech_start_i) // duration_ms
224 |                         frames_pre = frames[-beg_bias:]
225 |                         frames_asr = []
226 |                         frames_asr.extend(frames_pre)
227 |                 # asr punc offline
228 |                 if speech_end_i != -1 or not websocket.is_speaking:
229 |                     # logger.info("vad end point")
230 |                     if websocket.mode == "2pass" or websocket.mode == "offline":
231 |                         audio_in = b"".join(frames_asr)
232 |                         try:
233 |                             await async_asr(websocket, audio_in)
234 |                         except:
235 |                             logger.info("error in asr offline")
236 |                     frames_asr = []
237 |                     speech_start = False
238 |                     frames_asr_online = []
239 |                     websocket.status_dict_asr_online["cache"] = {}
240 |                     if not websocket.is_speaking:
241 |                         websocket.vad_pre_idx = 0
242 |                         frames = []
243 |                         websocket.status_dict_vad["cache"] = {}
244 |                     else:
245 |                         frames = frames[-20:]
246 | 
247 |     except websockets.ConnectionClosed:
248 |         logger.info("ConnectionClosed...", websocket_users, flush=True)
249 |         await ws_reset(websocket)
250 |         websocket_users.remove(websocket)
251 |     except websockets.InvalidState:
252 |         logger.info("InvalidState...")
253 |     except Exception as e:
254 |         logger.info("Exception:", e)
255 | 
256 | 
257 | async def async_vad(websocket, audio_in):
258 | 
259 |     segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"]
260 |     # logger.info(segments_result)
261 | 
262 |     speech_start = -1
263 |     speech_end = -1
264 | 
265 |     if len(segments_result) == 0 or len(segments_result) > 1:
266 |         return speech_start, speech_end
267 |     if segments_result[0][0] != -1:
268 |         speech_start = segments_result[0][0]
269 |     if segments_result[0][1] != -1:
270 |         speech_end = segments_result[0][1]
271 |     return speech_start, speech_end
272 | 
273 | 
274 | async def async_asr(websocket, audio_in):
275 |     if len(audio_in) > 0:
276 |         # logger.info(len(audio_in))
277 |         rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
278 |         # logger.info("offline_asr, ", rec_result)
279 |         if model_punc is not None and len(rec_result["text"]) > 0:
280 |             # logger.info("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
281 |             rec_result = model_punc.generate(
282 |                 input=rec_result["text"], **websocket.status_dict_punc
283 |             )[0]
284 |             # logger.info("offline, after punc", rec_result)
285 |         if len(rec_result["text"]) > 0:
286 |             # logger.info("offline", rec_result)
287 |             mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
288 |             message = json.dumps(
289 |                 {
290 |                     "mode": mode,
291 |                     "text": rec_result["text"],
292 |                     "wav_name": websocket.wav_name,
293 |                     "is_final": websocket.is_speaking,
294 |                 }
295 |             )
296 |             await websocket.send(message)
297 | 
298 |     else:
299 |         mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
300 |         message = json.dumps(
301 |             {
302 |                 "mode": mode,
303 |                 "text": "",
304 |                 "wav_name": websocket.wav_name,
305 |                 "is_final": websocket.is_speaking,
306 |             }
307 |         )
308 |         await websocket.send(message)    
309 | 
310 | async def async_asr_online(websocket, audio_in):
311 |     if len(audio_in) > 0:
312 |         # logger.info(websocket.status_dict_asr_online.get("is_final", False))
313 |         rec_result = model_asr_streaming.generate(
314 |             input=audio_in, **websocket.status_dict_asr_online
315 |         )[0]
316 |         # logger.info("online, ", rec_result)
317 |         if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
318 |             return
319 |             #     websocket.status_dict_asr_online["cache"] = dict()
320 |         if len(rec_result["text"]):
321 |             mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
322 |             message = json.dumps(
323 |                 {
324 |                     "mode": mode,
325 |                     "text": rec_result["text"],
326 |                     "wav_name": websocket.wav_name,
327 |                     "is_final": websocket.is_speaking,
328 |                 }
329 |             )
330 |             await websocket.send(message)
331 | 
332 | 
333 | if len(args.certfile) > 0:
334 |     ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
335 | 
336 |     # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
337 |     ssl_cert = args.certfile
338 |     ssl_key = args.keyfile
339 | 
340 |     ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
341 |     start_server = websockets.serve(
342 |         ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None, ssl=ssl_context
343 |     )
344 | else:
345 |     start_server = websockets.serve(
346 |         ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None
347 |     )
348 | asyncio.get_event_loop().run_until_complete(start_server)
349 | asyncio.get_event_loop().run_forever()
350 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | loguru
3 | uvicorn
4 | websockets==12.0
5 | pydub
6 | onnxruntime
7 | onnx==1.15.0
8 | FunASR==1.1.16


--------------------------------------------------------------------------------
/requirements_client.txt:
--------------------------------------------------------------------------------
1 | websockets
2 | pyaudio
3 | 


--------------------------------------------------------------------------------
/requirements_server.txt:
--------------------------------------------------------------------------------
1 | websockets==12.0
2 | pydub
3 | onnxruntime
4 | onnx==1.15.0
5 | FunASR==1.1.16


--------------------------------------------------------------------------------
/web/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |   <meta charset="utf-8" />
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1" />
  7 |   <title>语音识别</title>
  8 |   <style>
  9 |     body {
 10 |       font-family: Arial, sans-serif;
 11 |       margin: 0;
 12 |       padding: 0;
 13 |       background-color: #f4f4f9;
 14 |       color: #333;
 15 |     }
 16 | 
 17 |     .container {
 18 |       max-width: 800px;
 19 |       margin: 20px auto;
 20 |       padding: 20px;
 21 |       background: #ffffff;
 22 |       border-radius: 10px;
 23 |       box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 24 |     }
 25 | 
 26 |     h1 {
 27 |       text-align: center;
 28 |       color: #444;
 29 |     }
 30 | 
 31 |     input,
 32 |     textarea {
 33 |       width: 100%;
 34 |       padding: 10px;
 35 |       margin-top: 10px;
 36 |       border: 1px solid #ccc;
 37 |       border-radius: 5px;
 38 |       font-size: 14px;
 39 |       box-sizing: border-box;
 40 |     }
 41 | 
 42 |     textarea[readonly] {
 43 |       background-color: #f9f9f9;
 44 |       color: #666;
 45 |     }
 46 | 
 47 |     label {
 48 |       display: block;
 49 |       margin-top: 15px;
 50 |       font-weight: bold;
 51 |     }
 52 | 
 53 |     .group {
 54 |       margin-bottom: 20px;
 55 |       padding: 10px;
 56 |       border: 1px solid #ddd;
 57 |       border-radius: 5px;
 58 |       background: #fafafa;
 59 |     }
 60 | 
 61 |     button {
 62 |       background-color: #007bff;
 63 |       color: white;
 64 |       border: none;
 65 |       padding: 10px 15px;
 66 |       font-size: 14px;
 67 |       cursor: pointer;
 68 |       border-radius: 5px;
 69 |       margin-right: 10px;
 70 |       transition: background-color 0.3s, opacity 0.3s;
 71 |     }
 72 | 
 73 |     button:hover {
 74 |       background-color: #0056b3;
 75 |     }
 76 | 
 77 |     button:disabled {
 78 |       background-color: #cccccc;
 79 |       color: #666666;
 80 |       cursor: not-allowed;
 81 |       opacity: 0.7;
 82 |     }
 83 | 
 84 |     a {
 85 |       color: #007bff;
 86 |       text-decoration: none;
 87 |       font-size: 14px;
 88 |     }
 89 | 
 90 |     a:hover {
 91 |       text-decoration: underline;
 92 |     }
 93 | 
 94 |     .audio-container {
 95 |       margin-top: 20px;
 96 |       text-align: center;
 97 |     }
 98 | 
 99 |     .radio-group {
100 |       display: flex;
101 |       gap: 15px;
102 |       margin-top: 10px;
103 |     }
104 | 
105 |     .radio-group label {
106 |       display: flex;
107 |       align-items: center;
108 |       gap: 5px;
109 |       font-weight: normal;
110 |     }
111 | 
112 |     @media (max-width: 600px) {
113 |       .container {
114 |         padding: 15px;
115 |       }
116 | 
117 |       button {
118 |         width: 100%;
119 |         margin-bottom: 10px;
120 |       }
121 | 
122 |       .radio-group {
123 |         flex-direction: column;
124 |       }
125 |     }
126 |   </style>
127 | </head>
128 | 
129 | <body>
130 |   <div class="container">
131 |     <h1>语音识别</h1>
132 |     <div class="group">
133 |       <label for="wssip">ASR服务器地址 (必填):</label>
134 |       <input id="wssip" type="text" onchange="addresschange()" value="ws://127.0.0.1:10096/" />
135 |       <a id="wsslink" href="#" onclick="window.open('https://127.0.0.1:10096/', '_blank')">
136 |         点此处手工授权 wss://127.0.0.1:10096/
137 |       </a>
138 |       <label for="ai_vtuber_api_url">AI Vtuber API地址:</label>
139 |       <input id="ai_vtuber_api_url" type="text" value="http://127.0.0.1:8082/" />
140 |       <label for="livetalking_api_url">LiveTalking API地址:</label>
141 |       <input id="livetalking_api_url" type="text" value="http://127.0.0.1:8010/" />
142 |     </div>
143 | 
144 |     <div class="group">
145 |       <label>选择录音模式:</label>
146 |       <div class="radio-group">
147 |         <label>
148 |           <input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="mic" checked />
149 |           麦克风
150 |         </label>
151 |         <label>
152 |           <input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="file" />
153 |           文件
154 |         </label>
155 |       </div>
156 |     </div>
157 | 
158 |     <div id="mic_mode_div" class="group">
159 |       <label>选择ASR模型模式:</label>
160 |       <div class="radio-group">
161 |         <label>
162 |           <input name="asr_mode" type="radio" value="2pass" checked />
163 |           2pass
164 |         </label>
165 |         <label>
166 |           <input name="asr_mode" type="radio" value="online" />
167 |           online
168 |         </label>
169 |         <label>
170 |           <input name="asr_mode" type="radio" value="offline" />
171 |           offline
172 |         </label>
173 |       </div>
174 |     </div>
175 | 
176 |     <div id="rec_mode_div" class="group" style="display: none;">
177 |       <label for="upfile">上传文件:</label>
178 |       <input type="file" id="upfile" />
179 |     </div>
180 | 
181 |     <div id="use_itn_div" class="group">
182 |       <label>逆文本标准化 (ITN):</label>
183 |       <div class="radio-group">
184 |         <label>
185 |           <input name="use_itn" type="radio" value="false" checked />
186 |           否
187 |         </label>
188 |         <label>
189 |           <input name="use_itn" type="radio" value="true" />
190 |           是
191 |         </label>
192 |       </div>
193 |     </div>
194 | 
195 |     <div class="group">
196 |       <label for="varHot">热词设置 (一行一个关键字，空格隔开权重):</label>
197 |       <textarea rows="3" id="varHot">
198 | 阿里巴巴 20&#13;hello world 40
199 |         </textarea>
200 |     </div>
201 | 
202 |     <div class="group">
203 |       <label>选择数据转发:</label>
204 |       <div class="radio-group">
205 |         <label>
206 |           <input name="data_forward" onclick="on_data_forward_change()" type="radio" value="ai_vtuber" />
207 |           AI Vtuber
208 |         </label>
209 |         <label>
210 |           <input name="data_forward" onclick="on_data_forward_change()" type="radio" value="livetalking" />
211 |           LiveTalking
212 |         </label>
213 |         <label>
214 |           <input name="data_forward" onclick="on_data_forward_change()" type="radio" value="none" checked />
215 |           不转发
216 |         </label>
217 |       </div>
218 |     </div>
219 | 
220 |     <div class="group">
221 |       <label for="varArea">语音识别结果显示:</label>
222 |       <textarea rows="10" id="varArea" readonly></textarea>
223 |     </div>
224 | 
225 |     <div class="group">
226 |       <div id="info_div">请点击开始</div>
227 |       <button id="btnConnect">连接</button>
228 |       <button id="btnStart">开始</button>
229 |       <button id="btnStop">停止</button>
230 |     </div>
231 | 
232 |     <div class="audio-container">
233 |       <audio id="audio_record" type="audio/wav" controls></audio>
234 |     </div>
235 |   </div>
236 | 
237 |   <script src="recorder-core.js" charset="UTF-8"></script>
238 |   <script src="wav.js" charset="UTF-8"></script>
239 |   <script src="pcm.js" charset="UTF-8"></script>
240 |   <script src="wsconnecter.js" charset="utf-8"></script>
241 |   <script src="main.js" charset="utf-8"></script>
242 | </body>
243 | 
244 | </html>


--------------------------------------------------------------------------------
/web/main.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3 |  * Reserved. MIT License  (https://opensource.org/licenses/MIT)
  4 |  */
  5 | /* 2022-2023 by zhaoming,mali aihealthx.com */
  6 | 
  7 | 
  8 | // 连接; 定义socket连接类对象与语音对象
  9 | var wsconnecter = new WebSocketConnectMethod({ msgHandle: getJsonMessage, stateHandle: getConnState });
 10 | var audioBlob;
 11 | 
 12 | // 录音; 定义录音对象,wav格式
 13 | var rec = Recorder({
 14 | 	type: "pcm",
 15 | 	bitRate: 16,
 16 | 	sampleRate: 16000,
 17 | 	onProcess: recProcess
 18 | });
 19 | 
 20 | 
 21 | var sampleBuf = new Int16Array();
 22 | // 定义按钮响应事件
 23 | var btnStart = document.getElementById('btnStart');
 24 | btnStart.onclick = record;
 25 | var btnStop = document.getElementById('btnStop');
 26 | btnStop.onclick = stop;
 27 | btnStop.disabled = true;
 28 | btnStart.disabled = true;
 29 | 
 30 | btnConnect = document.getElementById('btnConnect');
 31 | btnConnect.onclick = start;
 32 | 
 33 | var awsslink = document.getElementById('wsslink');
 34 | 
 35 | var rec_text = "";  // for online rec asr result
 36 | var offline_text = ""; // for offline rec asr result
 37 | var info_div = document.getElementById('info_div');
 38 | 
 39 | var upfile = document.getElementById('upfile');
 40 | 
 41 | var isfilemode = false;  // if it is in file mode
 42 | var file_ext = "";
 43 | var file_sample_rate = 16000; //for wav file sample rate
 44 | var file_data_array;  // array to save file data
 45 | 
 46 | var totalsend = 0;
 47 | 
 48 | // 数据转发模式
 49 | var data_forward = "none";
 50 | 
 51 | 
 52 | // 构建url
 53 | function buildUrl(baseUrl, endpoint) {
 54 |     // 创建一个新的URL对象并设置其pathname为endpoint
 55 |     const url = new URL(baseUrl);
 56 |     url.pathname = new URL(endpoint, 'http://dummy.com').pathname;
 57 |   
 58 |     return url.toString();
 59 | }
 60 | 
 61 | // var now_ipaddress=window.location.href;
 62 | // now_ipaddress=now_ipaddress.replace("https://","wss://");
 63 | // now_ipaddress=now_ipaddress.replace("static/index.html","");
 64 | // var localport=window.location.port;
 65 | // now_ipaddress=now_ipaddress.replace(localport,"10095");
 66 | // document.getElementById('wssip').value=now_ipaddress;
 67 | addresschange();
 68 | function addresschange() {
 69 | 
 70 | 	var Uri = document.getElementById('wssip').value;
 71 | 	// document.getElementById('info_wslink').innerHTML = "点此处手工授权（IOS手机）";
 72 | 	Uri = Uri.replace(/wss/g, "https");
 73 | 	console.log("addresschange uri=", Uri);
 74 | 
 75 | 	awsslink.onclick = function () {
 76 | 		window.open(Uri, '_blank');
 77 | 	}
 78 | 
 79 | }
 80 | 
 81 | upfile.onclick = function () {
 82 | 	btnStart.disabled = true;
 83 | 	btnStop.disabled = true;
 84 | 	btnConnect.disabled = false;
 85 | 
 86 | }
 87 | 
 88 | // from https://github.com/xiangyuecn/Recorder/tree/master
 89 | var readWavInfo = function (bytes) {
 90 | 	//读取wav文件头，统一成44字节的头
 91 | 	if (bytes.byteLength < 44) {
 92 | 		return null;
 93 | 	};
 94 | 	var wavView = bytes;
 95 | 	var eq = function (p, s) {
 96 | 		for (var i = 0; i < s.length; i++) {
 97 | 			if (wavView[p + i] != s.charCodeAt(i)) {
 98 | 				return false;
 99 | 			};
100 | 		};
101 | 		return true;
102 | 	};
103 | 
104 | 	if (eq(0, "RIFF") && eq(8, "WAVEfmt ")) {
105 | 
106 | 		var numCh = wavView[22];
107 | 		if (wavView[20] == 1 && (numCh == 1 || numCh == 2)) {//raw pcm 单或双声道
108 | 			var sampleRate = wavView[24] + (wavView[25] << 8) + (wavView[26] << 16) + (wavView[27] << 24);
109 | 			var bitRate = wavView[34] + (wavView[35] << 8);
110 | 			var heads = [wavView.subarray(0, 12)], headSize = 12;//head只保留必要的块
111 | 			//搜索data块的位置
112 | 			var dataPos = 0; // 44 或有更多块
113 | 			for (var i = 12, iL = wavView.length - 8; i < iL;) {
114 | 				if (wavView[i] == 100 && wavView[i + 1] == 97 && wavView[i + 2] == 116 && wavView[i + 3] == 97) {//eq(i,"data")
115 | 					heads.push(wavView.subarray(i, i + 8));
116 | 					headSize += 8;
117 | 					dataPos = i + 8; break;
118 | 				}
119 | 				var i0 = i;
120 | 				i += 4;
121 | 				i += 4 + wavView[i] + (wavView[i + 1] << 8) + (wavView[i + 2] << 16) + (wavView[i + 3] << 24);
122 | 				if (i0 == 12) {//fmt 
123 | 					heads.push(wavView.subarray(i0, i));
124 | 					headSize += i - i0;
125 | 				}
126 | 			}
127 | 			if (dataPos) {
128 | 				var wavHead = new Uint8Array(headSize);
129 | 				for (var i = 0, n = 0; i < heads.length; i++) {
130 | 					wavHead.set(heads[i], n); n += heads[i].length;
131 | 				}
132 | 				return {
133 | 					sampleRate: sampleRate
134 | 					, bitRate: bitRate
135 | 					, numChannels: numCh
136 | 					, wavHead44: wavHead
137 | 					, dataPos: dataPos
138 | 				};
139 | 			};
140 | 		};
141 | 	};
142 | 	return null;
143 | };
144 | 
145 | upfile.onchange = function () {
146 | 	var len = this.files.length;
147 | 	for (let i = 0; i < len; i++) {
148 | 
149 | 		let fileAudio = new FileReader();
150 | 		fileAudio.readAsArrayBuffer(this.files[i]);
151 | 
152 | 		file_ext = this.files[i].name.split('.').pop().toLowerCase();
153 | 		var audioblob;
154 | 		fileAudio.onload = function () {
155 | 			audioblob = fileAudio.result;
156 | 			file_data_array = audioblob;
157 | 			info_div.innerHTML = '请点击连接进行识别';
158 | 		}
159 | 
160 | 		fileAudio.onerror = function (e) {
161 | 			console.log('error' + e);
162 | 		}
163 | 	}
164 | 	// for wav file, we  get the sample rate
165 | 	if (file_ext == "wav")
166 | 		for (let i = 0; i < len; i++) {
167 | 
168 | 			let fileAudio = new FileReader();
169 | 			fileAudio.readAsArrayBuffer(this.files[i]);
170 | 			fileAudio.onload = function () {
171 | 				audioblob = new Uint8Array(fileAudio.result);
172 | 
173 | 				// for wav file, we can get the sample rate
174 | 				var info = readWavInfo(audioblob);
175 | 				console.log(info);
176 | 				file_sample_rate = info.sampleRate;
177 | 			}
178 | 		}
179 | }
180 | 
181 | function play_file() {
182 | 	var audioblob = new Blob([new Uint8Array(file_data_array)], { type: "audio/wav" });
183 | 	var audio_record = document.getElementById('audio_record');
184 | 	audio_record.src = (window.URL || webkitURL).createObjectURL(audioblob);
185 | 	audio_record.controls = true;
186 | 	//audio_record.play();  //not auto play
187 | }
188 | function start_file_send() {
189 | 	sampleBuf = new Uint8Array(file_data_array);
190 | 
191 | 	var chunk_size = 960; // for asr chunk_size [5, 10, 5]
192 | 
193 | 	while (sampleBuf.length >= chunk_size) {
194 | 
195 | 		sendBuf = sampleBuf.slice(0, chunk_size);
196 | 		totalsend = totalsend + sampleBuf.length;
197 | 		sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
198 | 		wsconnecter.wsSend(sendBuf);
199 | 	}
200 | 
201 | 	stop();
202 | }
203 | 
204 | // 修改数据转发模式
205 | function on_data_forward_change() {
206 | 	var item = null;
207 | 	var obj = document.getElementsByName("data_forward");
208 | 	for (var i = 0; i < obj.length; i++) { //遍历Radio 
209 | 		if (obj[i].checked) {
210 | 			item = obj[i].value;
211 | 			break;
212 | 		}
213 | 	}
214 | 
215 | 	data_forward = item;
216 | }
217 | 
218 | function on_recoder_mode_change() {
219 | 	var item = null;
220 | 	var obj = document.getElementsByName("recoder_mode");
221 | 	for (var i = 0; i < obj.length; i++) { //遍历Radio 
222 | 		if (obj[i].checked) {
223 | 			item = obj[i].value;
224 | 			break;
225 | 		}
226 | 	}
227 | 	if (item == "mic") {
228 | 		document.getElementById("mic_mode_div").style.display = 'block';
229 | 		document.getElementById("rec_mode_div").style.display = 'none';
230 | 
231 | 		btnStart.disabled = true;
232 | 		btnStop.disabled = true;
233 | 		btnConnect.disabled = false;
234 | 		isfilemode = false;
235 | 	} else {
236 | 		document.getElementById("mic_mode_div").style.display = 'none';
237 | 		document.getElementById("rec_mode_div").style.display = 'block';
238 | 
239 | 		btnStart.disabled = true;
240 | 		btnStop.disabled = true;
241 | 		btnConnect.disabled = true;
242 | 		isfilemode = true;
243 | 		info_div.innerHTML = '请点击选择文件';
244 | 	}
245 | }
246 | 
247 | 
248 | function getHotwords() {
249 | 	var obj = document.getElementById("varHot");
250 | 
251 | 	if (typeof (obj) == 'undefined' || obj == null || obj.value.length <= 0) {
252 | 		return null;
253 | 	}
254 | 	let val = obj.value.toString();
255 | 
256 | 	console.log("hotwords=" + val);
257 | 	let items = val.split(/[(\r\n)\r\n]+/);  //split by \r\n
258 | 	var jsonresult = {};
259 | 	const regexNum = /^[0-9]*$/; // test number
260 | 	for (item of items) {
261 | 
262 | 		let result = item.split(" ");
263 | 		if (result.length >= 2 && regexNum.test(result[result.length - 1])) {
264 | 			var wordstr = "";
265 | 			for (var i = 0; i < result.length - 1; i++)
266 | 				wordstr = wordstr + result[i] + " ";
267 | 
268 | 			jsonresult[wordstr.trim()] = parseInt(result[result.length - 1]);
269 | 		}
270 | 	}
271 | 	console.log("jsonresult=" + JSON.stringify(jsonresult));
272 | 	return JSON.stringify(jsonresult);
273 | 
274 | }
275 | function getAsrMode() {
276 | 
277 | 	var item = null;
278 | 	var obj = document.getElementsByName("asr_mode");
279 | 	for (var i = 0; i < obj.length; i++) { //遍历Radio 
280 | 		if (obj[i].checked) {
281 | 			item = obj[i].value;
282 | 			break;
283 | 		}
284 | 	}
285 | 	if (isfilemode) {
286 | 		item = "offline";
287 | 	}
288 | 	console.log("asr mode" + item);
289 | 
290 | 	return item;
291 | }
292 | 
293 | function handleWithTimestamp(tmptext, tmptime) {
294 | 	console.log("tmptext: " + tmptext);
295 | 	console.log("tmptime: " + tmptime);
296 | 	if (tmptime == null || tmptime == "undefined" || tmptext.length <= 0) {
297 | 		return tmptext;
298 | 	}
299 | 	tmptext = tmptext.replace(/。|？|，|、|\?|\.|\ /g, ","); // in case there are a lot of "。"
300 | 	var words = tmptext.split(",");  // split to chinese sentence or english words
301 | 	var jsontime = JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
302 | 	var char_index = 0; // index for timestamp
303 | 	var text_withtime = "";
304 | 	for (var i = 0; i < words.length; i++) {
305 | 		if (words[i] == "undefined" || words[i].length <= 0) {
306 | 			continue;
307 | 		}
308 | 		console.log("words===", words[i]);
309 | 		console.log("words: " + words[i] + ",time=" + jsontime[char_index][0] / 1000);
310 | 		if (/^[a-zA-Z]+$/.test(words[i])) {   // if it is english
311 | 			text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
312 | 			char_index = char_index + 1;  //for english, timestamp unit is about a word
313 | 		}
314 | 		else {
315 | 			// if it is chinese
316 | 			text_withtime = text_withtime + jsontime[char_index][0] / 1000 + ":" + words[i] + "\n";
317 | 			char_index = char_index + words[i].length; //for chinese, timestamp unit is about a char
318 | 		}
319 | 	}
320 | 	return text_withtime;
321 | }
322 | 
323 | const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay))
324 | async function is_speaking() {
325 | 	try {
326 | 		if (data_forward == "livetalking") {
327 | 			const response = await fetch(buildUrl(document.getElementById("livetalking_api_url").value, '/is_speaking'), {
328 | 				body: JSON.stringify({
329 | 					sessionid: 0,
330 | 				}),
331 | 				headers: {
332 | 					'Content-Type': 'application/json'
333 | 				},
334 | 				method: 'POST'
335 | 			});
336 | 			const data = await response.json();
337 | 			console.log('is_speaking res:', data)
338 | 			return data.data
339 | 		} else if (data_forward == "ai_vtuber") {
340 | 			const response = await fetch(buildUrl(document.getElementById("ai_vtuber_api_url").value, '/get_sys_info'), {
341 | 				headers: {
342 | 					'Content-Type': 'application/json'
343 | 				},
344 | 				method: 'GET'
345 | 			});
346 | 			const data = await response.json();
347 | 			console.log('is_speaking res:', data)
348 | 	
349 | 			// 如果等待播放和等待合成的消息数量都为0，则认为没有在说话
350 | 			if (data["data"]["audio"]["wait_play_audio_num"] == 0 && data["data"]["audio"]["wait_synthesis_msg_num"] == 0 &&
351 | 				data["data"]["metahuman-stream"]["wait_play_audio_num"] == 0 && data["data"]["metahuman-stream"]["wait_synthesis_msg_num"] == 0
352 | 			) {
353 | 				return false;
354 | 			} else {
355 | 				return true;
356 | 			}
357 | 		}
358 | 		
359 | 		return false
360 | 	} catch (error) {
361 | 		console.error('is_speaking error:', error)
362 | 		return false
363 | 	}
364 | }
365 | 
366 | async function waitSpeakingEnd() {
367 | 	if (data_forward == "none") {
368 | 		return
369 | 	} else if (data_forward == "livetalking" || data_forward == "ai_vtuber") {
370 | 	    rec.stop() //关闭录音
371 | 		for (let i = 0; i < 10; i++) {  //等待数字人开始讲话，最长等待10s
372 | 			bspeak = await is_speaking()
373 | 			if (bspeak) {
374 | 				break
375 | 			}
376 | 			await sleep(1000)
377 | 		}
378 | 
379 | 		while (true) {  //等待数字人讲话结束
380 | 			bspeak = await is_speaking()
381 | 			if (!bspeak) {
382 | 				break
383 | 			}
384 | 			await sleep(1000)
385 | 		}
386 | 		await sleep(2000)
387 | 		rec.start()
388 | 	} 
389 | }
390 | // 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
391 | function getJsonMessage(jsonMsg) {
392 | 	//console.log(jsonMsg);
393 | 	// console.log("message: " + JSON.parse(jsonMsg.data)['text']);
394 | 	var rectxt = "" + JSON.parse(jsonMsg.data)['text'];
395 | 	var asrmodel = JSON.parse(jsonMsg.data)['mode'];
396 | 	var is_final = JSON.parse(jsonMsg.data)['is_final'];
397 | 	var timestamp = JSON.parse(jsonMsg.data)['timestamp'];
398 | 	if (asrmodel == "2pass-offline" || asrmodel == "offline") {
399 | 		// 过滤特殊字符
400 | 		rectxt = rectxt.replace(/<[^>]*>/g, '');
401 | 		
402 | 		offline_text = offline_text + rectxt.replace(/ +/g, "") + '\n'; //handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
403 | 		rec_text = offline_text;
404 | 		
405 | 		if (data_forward == "livetalking") {
406 | 			fetch(buildUrl(document.getElementById("livetalking_api_url").value, '/human'), {
407 | 				body: JSON.stringify({
408 | 					text: rectxt.replace(/ +/g, ""),
409 | 					type: 'chat',
410 | 				}),
411 | 				headers: {
412 | 					'Content-Type': 'application/json'
413 | 				},
414 | 				method: 'POST'
415 | 			});
416 | 		} else if (data_forward == "ai_vtuber") {
417 | 		    fetch(buildUrl(document.getElementById("ai_vtuber_api_url").value, '/send'), {
418 | 				body: JSON.stringify({
419 | 					type: 'comment',
420 | 					data: {
421 | 						"type": 'comment',
422 | 						"username": '主人',
423 | 						"content": rectxt.replace(/ +/g, ""),
424 | 					}
425 | 				}),
426 | 				headers: {
427 | 					'Content-Type': 'application/json'
428 | 				},
429 | 				method: 'POST'
430 | 			});
431 | 		}
432 | 
433 | 		waitSpeakingEnd();
434 | 	}
435 | 	else {
436 | 		rec_text = rec_text + rectxt; //.replace(/ +/g,"");
437 | 	}
438 | 	var varArea = document.getElementById('varArea');
439 | 
440 | 	// 过滤特殊字符
441 | 	rec_text = rec_text.replace(/<[^>]*>/g, '');
442 | 	varArea.value = rec_text;
443 | 	// console.log("offline_text: " + asrmodel + "," + offline_text);
444 | 	// console.log("rec_text: " + rec_text);
445 | 	if (isfilemode == true && is_final == true) {
446 | 		console.log("call stop ws!");
447 | 		play_file();
448 | 		wsconnecter.wsStop();
449 | 
450 | 		info_div.innerHTML = "请点击连接";
451 | 
452 | 		btnStart.disabled = true;
453 | 		btnStop.disabled = true;
454 | 		btnConnect.disabled = false;
455 | 	}
456 | }
457 | 
458 | // 连接状态响应
459 | function getConnState(connState) {
460 | 	if (connState === 0) { //on open
461 | 
462 | 
463 | 		info_div.innerHTML = '连接成功!请点击开始';
464 | 		if (isfilemode == true) {
465 | 			info_div.innerHTML = '请耐心等待,大文件等待时间更长';
466 | 			start_file_send();
467 | 		}
468 | 		else {
469 | 			btnStart.disabled = false;
470 | 			btnStop.disabled = true;
471 | 			btnConnect.disabled = true;
472 | 		}
473 | 	} else if (connState === 1) {
474 | 		//stop();
475 | 	} else if (connState === 2) {
476 | 		stop();
477 | 		console.log('connecttion error');
478 | 
479 | 		alert("连接地址" + document.getElementById('wssip').value + "失败,请检查asr地址和端口。或试试界面上手动授权，再连接。");
480 | 		btnStart.disabled = true;
481 | 		btnStop.disabled = true;
482 | 		btnConnect.disabled = false;
483 | 
484 | 		info_div.innerHTML = '请点击连接';
485 | 	}
486 | }
487 | 
488 | function record() {
489 | 	rec.open(function () {
490 | 		rec.start();
491 | 		console.log("开始");
492 | 		btnStart.disabled = true;
493 | 		btnStop.disabled = false;
494 | 		btnConnect.disabled = true;
495 | 	});
496 | }
497 | 
498 | // 识别启动、停止、清空操作
499 | function start() {
500 | 	// 清除显示
501 | 	clear();
502 | 	//控件状态更新
503 | 	console.log("isfilemode" + isfilemode);
504 | 
505 | 	//启动连接
506 | 	var ret = wsconnecter.wsStart();
507 | 	// 1 is ok, 0 is error
508 | 	if (ret == 1) {
509 | 		info_div.innerHTML = "正在连接asr服务器，请等待...";
510 | 		isRec = true;
511 | 		btnStart.disabled = true;
512 | 		btnStop.disabled = true;
513 | 		btnConnect.disabled = true;
514 | 
515 | 		return 1;
516 | 	}
517 | 	else {
518 | 		info_div.innerHTML = "请点击开始";
519 | 		btnStart.disabled = true;
520 | 		btnStop.disabled = true;
521 | 		btnConnect.disabled = false;
522 | 
523 | 		return 0;
524 | 	}
525 | }
526 | 
527 | 
528 | function stop() {
529 | 	var chunk_size = new Array(5, 10, 5);
530 | 	var request = {
531 | 		"chunk_size": chunk_size,
532 | 		"wav_name": "h5",
533 | 		"is_speaking": false,
534 | 		"chunk_interval": 10,
535 | 		"mode": getAsrMode(),
536 | 		"url": document.getElementById('audio_record').src,
537 | 	};
538 | 	console.log(request);
539 | 	if (sampleBuf.length > 0) {
540 | 		wsconnecter.wsSend(sampleBuf);
541 | 		console.log("sampleBuf.length" + sampleBuf.length);
542 | 		sampleBuf = new Int16Array();
543 | 	}
544 | 	wsconnecter.wsSend(JSON.stringify(request));
545 | 
546 | 	// 控件状态更新
547 | 
548 | 	isRec = false;
549 | 	info_div.innerHTML = "发送完数据,请等候,正在识别...";
550 | 
551 | 	if (isfilemode == false) {
552 | 		btnStop.disabled = true;
553 | 		btnStart.disabled = true;
554 | 		btnConnect.disabled = true;
555 | 		//wait 3s for asr result
556 | 		setTimeout(function () {
557 | 			console.log("call stop ws!");
558 | 			wsconnecter.wsStop();
559 | 			btnConnect.disabled = false;
560 | 			info_div.innerHTML = "请点击连接";
561 | 		}, 3000);
562 | 
563 | 		rec.stop(function (blob, duration) {
564 | 			console.log(blob);
565 | 			var audioBlob = Recorder.pcm2wav(data = { sampleRate: 16000, bitRate: 16, blob: blob },
566 | 				function (theblob, duration) {
567 | 					console.log(theblob);
568 | 					var audio_record = document.getElementById('audio_record');
569 | 					audio_record.src = (window.URL || webkitURL).createObjectURL(theblob);
570 | 					audio_record.controls = true;
571 | 					//audio_record.play(); 
572 | 				}, function (msg) {
573 | 					console.log(msg);
574 | 				}
575 | 			);
576 | 		}, function (errMsg) {
577 | 			console.log("errMsg: " + errMsg);
578 | 		});
579 | 	}
580 | 	// 停止连接
581 | }
582 | 
583 | function clear() {
584 | 	var varArea = document.getElementById('varArea');
585 | 
586 | 	varArea.value = "";
587 | 	rec_text = "";
588 | 	offline_text = "";
589 | }
590 | 
591 | function recProcess(buffer, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd) {
592 | 	if (isRec === true) {
593 | 		var data_48k = buffer[buffer.length - 1];
594 | 
595 | 		var array_48k = new Array(data_48k);
596 | 		var data_16k = Recorder.SampleData(array_48k, bufferSampleRate, 16000).data;
597 | 
598 | 		sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
599 | 		var chunk_size = 960; // for asr chunk_size [5, 10, 5]
600 | 		info_div.innerHTML = "" + bufferDuration / 1000 + "s";
601 | 		while (sampleBuf.length >= chunk_size) {
602 | 			sendBuf = sampleBuf.slice(0, chunk_size);
603 | 			sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
604 | 			wsconnecter.wsSend(sendBuf);
605 | 		}
606 | 	}
607 | }
608 | 
609 | function getUseITN() {
610 | 	var obj = document.getElementsByName("use_itn");
611 | 	for (var i = 0; i < obj.length; i++) {
612 | 		if (obj[i].checked) {
613 | 			return obj[i].value === "true";
614 | 		}
615 | 	}
616 | 	return false;
617 | }
618 | 


--------------------------------------------------------------------------------
/web/pcm.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | pcm编码器+编码引擎
 3 | https://github.com/xiangyuecn/Recorder
 4 | 
 5 | 编码原理：本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据（经过了重新采样），16位时为LE小端模式（Little Endian），并未经过任何编码处理
 6 | 
 7 | 编码的代码和wav.js区别不大，pcm加上一个44字节wav头即成wav文件；所以要播放pcm就很简单了，直接转成wav文件来播放，已提供转换函数 Recorder.pcm2wav
 8 | */
 9 | (function(){
10 | "use strict";
11 | 
12 | Recorder.prototype.enc_pcm={
13 | 	stable:true
14 | 	,testmsg:"pcm为未封装的原始音频数据，pcm数据文件无法直接播放；支持位数8位、16位（填在比特率里面），采样率取值无限制"
15 | };
16 | Recorder.prototype.pcm=function(res,True,False){
17 | 		var This=this,set=This.set
18 | 			,size=res.length
19 | 			,bitRate=set.bitRate==8?8:16;
20 | 		
21 | 		var buffer=new ArrayBuffer(size*(bitRate/8));
22 | 		var data=new DataView(buffer);
23 | 		var offset=0;
24 | 		
25 | 		// 写入采样数据
26 | 		if(bitRate==8) {
27 | 			for(var i=0;i<size;i++,offset++) {
28 | 				//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点，虽然都有明显杂音
29 | 				var val=(res[i]>>8)+128;
30 | 				data.setInt8(offset,val,true);
31 | 			};
32 | 		}else{
33 | 			for (var i=0;i<size;i++,offset+=2){
34 | 				data.setInt16(offset,res[i],true);
35 | 			};
36 | 		};
37 | 		
38 | 		
39 | 		True(new Blob([data.buffer],{type:"audio/pcm"}));
40 | 	};
41 | 
42 | 
43 | 
44 | 
45 | 
46 | /**pcm直接转码成wav，可以直接用来播放；需同时引入wav.js
47 | data: {
48 | 		sampleRate:16000 pcm的采样率
49 | 		bitRate:16 pcm的位数 取值：8 或 16
50 | 		blob:blob对象
51 | 	}
52 | 	data如果直接提供的blob将默认使用16位16khz的配置，仅用于测试
53 | True(wavBlob,duration)
54 | False(msg)
55 | **/
56 | Recorder.pcm2wav=function(data,True,False){
57 | 	if(data.slice && data.type!=null){//Blob 测试用
58 | 		data={blob:data};
59 | 	};
60 | 	var sampleRate=data.sampleRate||16000,bitRate=data.bitRate||16;
61 | 	if(!data.sampleRate || !data.bitRate){
62 | 		console.warn("pcm2wav必须提供sampleRate和bitRate");
63 | 	};
64 | 	if(!Recorder.prototype.wav){
65 | 		False("pcm2wav必须先加载wav编码器wav.js");
66 | 		return;
67 | 	};
68 | 	
69 | 	var reader=new FileReader();
70 | 	reader.onloadend=function(){
71 | 		var pcm;
72 | 		if(bitRate==8){
73 | 			//8位转成16位
74 | 			var u8arr=new Uint8Array(reader.result);
75 | 			pcm=new Int16Array(u8arr.length);
76 | 			for(var j=0;j<u8arr.length;j++){
77 | 				pcm[j]=(u8arr[j]-128)<<8;
78 | 			};
79 | 		}else{
80 | 			pcm=new Int16Array(reader.result);
81 | 		};
82 | 		
83 | 		Recorder({
84 | 			type:"wav"
85 | 			,sampleRate:sampleRate
86 | 			,bitRate:bitRate
87 | 		}).mock(pcm,sampleRate).stop(function(wavBlob,duration){
88 | 			True(wavBlob,duration);
89 | 		},False);
90 | 	};
91 | 	reader.readAsArrayBuffer(data.blob);
92 | };
93 | 
94 | 
95 | 
96 | })();


--------------------------------------------------------------------------------
/web/recorder-core.js:
--------------------------------------------------------------------------------
   1 | /*
   2 | 录音
   3 | https://github.com/xiangyuecn/Recorder
   4 | */
   5 | (function(factory){
   6 | 	factory(window);
   7 | 	//umd returnExports.js
   8 | 	if(typeof(define)=='function' && define.amd){
   9 | 		define(function(){
  10 | 			return Recorder;
  11 | 		});
  12 | 	};
  13 | 	if(typeof(module)=='object' && module.exports){
  14 | 		module.exports=Recorder;
  15 | 	};
  16 | }(function(window){
  17 | "use strict";
  18 | 
  19 | var NOOP=function(){};
  20 | 
  21 | var Recorder=function(set){
  22 | 	return new initFn(set);
  23 | };
  24 | Recorder.LM="2023-02-01 18:05";
  25 | var RecTxt="Recorder";
  26 | var getUserMediaTxt="getUserMedia";
  27 | var srcSampleRateTxt="srcSampleRate";
  28 | var sampleRateTxt="sampleRate";
  29 | var CatchTxt="catch";
  30 | 
  31 | 
  32 | //是否已经打开了全局的麦克风录音，所有工作都已经准备好了，就等接收音频数据了
  33 | Recorder.IsOpen=function(){
  34 | 	var stream=Recorder.Stream;
  35 | 	if(stream){
  36 | 		var tracks=stream.getTracks&&stream.getTracks()||stream.audioTracks||[];
  37 | 		var track=tracks[0];
  38 | 		if(track){
  39 | 			var state=track.readyState;
  40 | 			return state=="live"||state==track.LIVE;
  41 | 		};
  42 | 	};
  43 | 	return false;
  44 | };
  45 | /*H5录音时的AudioContext缓冲大小。会影响H5录音时的onProcess调用速率，相对于AudioContext.sampleRate=48000时，4096接近12帧/s，调节此参数可生成比较流畅的回调动画。
  46 | 	取值256, 512, 1024, 2048, 4096, 8192, or 16384
  47 | 	注意，取值不能过低，2048开始不同浏览器可能回调速率跟不上造成音质问题。
  48 | 	一般无需调整，调整后需要先close掉已打开的录音，再open时才会生效。
  49 | */
  50 | Recorder.BufferSize=4096;
  51 | //销毁已持有的所有全局资源，当要彻底移除Recorder时需要显式的调用此方法
  52 | Recorder.Destroy=function(){
  53 | 	CLog(RecTxt+" Destroy");
  54 | 	Disconnect();//断开可能存在的全局Stream、资源
  55 | 	
  56 | 	for(var k in DestroyList){
  57 | 		DestroyList[k]();
  58 | 	};
  59 | };
  60 | var DestroyList={};
  61 | //登记一个需要销毁全局资源的处理方法
  62 | Recorder.BindDestroy=function(key,call){
  63 | 	DestroyList[key]=call;
  64 | };
  65 | //判断浏览器是否支持录音，随时可以调用。注意：仅仅是检测浏览器支持情况，不会判断和调起用户授权，不会判断是否支持特定格式录音。
  66 | Recorder.Support=function(){
  67 | 	var scope=navigator.mediaDevices||{};
  68 | 	if(!scope[getUserMediaTxt]){
  69 | 		scope=navigator;
  70 | 		scope[getUserMediaTxt]||(scope[getUserMediaTxt]=scope.webkitGetUserMedia||scope.mozGetUserMedia||scope.msGetUserMedia);
  71 | 	};
  72 | 	if(!scope[getUserMediaTxt]){
  73 | 		return false;
  74 | 	};
  75 | 	Recorder.Scope=scope;
  76 | 	
  77 | 	if(!Recorder.GetContext()){
  78 | 		return false;
  79 | 	};
  80 | 	return true;
  81 | };
  82 | //获取全局的AudioContext对象，如果浏览器不支持将返回null
  83 | Recorder.GetContext=function(){
  84 | 	var AC=window.AudioContext;
  85 | 	if(!AC){
  86 | 		AC=window.webkitAudioContext;
  87 | 	};
  88 | 	if(!AC){
  89 | 		return null;
  90 | 	};
  91 | 	
  92 | 	if(!Recorder.Ctx||Recorder.Ctx.state=="closed"){
  93 | 		//不能反复构造，低版本number of hardware contexts reached maximum (6)
  94 | 		Recorder.Ctx=new AC();
  95 | 		
  96 | 		Recorder.BindDestroy("Ctx",function(){
  97 | 			var ctx=Recorder.Ctx;
  98 | 			if(ctx&&ctx.close){//能关掉就关掉，关不掉就保留着
  99 | 				ctx.close();
 100 | 				Recorder.Ctx=0;
 101 | 			};
 102 | 		});
 103 | 	};
 104 | 	return Recorder.Ctx;
 105 | };
 106 | 
 107 | 
 108 | /*是否启用MediaRecorder.WebM.PCM来进行音频采集连接（如果浏览器支持的话），默认启用，禁用或者不支持时将使用AudioWorklet或ScriptProcessor来连接；MediaRecorder采集到的音频数据比其他方式更好，几乎不存在丢帧现象，所以音质明显会好很多，建议保持开启*/
 109 | var ConnectEnableWebM="ConnectEnableWebM";
 110 | Recorder[ConnectEnableWebM]=true;
 111 | 
 112 | /*是否启用AudioWorklet特性来进行音频采集连接（如果浏览器支持的话），默认禁用，禁用或不支持时将使用过时的ScriptProcessor来连接（如果方法还在的话），当前AudioWorklet的实现在移动端没有ScriptProcessor稳健；ConnectEnableWebM如果启用并且有效时，本参数将不起作用*/
 113 | var ConnectEnableWorklet="ConnectEnableWorklet";
 114 | Recorder[ConnectEnableWorklet]=false;
 115 | 
 116 | /*初始化H5音频采集连接。如果自行提供了sourceStream将只进行一次简单的连接处理。如果是普通麦克风录音，此时的Stream是全局的，Safari上断开后就无法再次进行连接使用，表现为静音，因此使用全部使用全局处理避免调用到disconnect；全局处理也有利于屏蔽底层细节，start时无需再调用底层接口，提升兼容、可靠性。*/
 117 | var Connect=function(streamStore,isUserMedia){
 118 | 	var bufferSize=streamStore.BufferSize||Recorder.BufferSize;
 119 | 	
 120 | 	var ctx=Recorder.Ctx,stream=streamStore.Stream;
 121 | 	var mediaConn=function(node){
 122 | 		var media=stream._m=ctx.createMediaStreamSource(stream);
 123 | 		var ctxDest=ctx.destination,cmsdTxt="createMediaStreamDestination";
 124 | 		if(ctx[cmsdTxt]){
 125 | 			ctxDest=ctx[cmsdTxt]();
 126 | 		};
 127 | 		media.connect(node);
 128 | 		node.connect(ctxDest);
 129 | 	}
 130 | 	var isWebM,isWorklet,badInt,webMTips="";
 131 | 	var calls=stream._call;
 132 | 	
 133 | 	//浏览器回传的音频数据处理
 134 | 	var onReceive=function(float32Arr){
 135 | 		for(var k0 in calls){//has item
 136 | 			var size=float32Arr.length;
 137 | 			
 138 | 			var pcm=new Int16Array(size);
 139 | 			var sum=0;
 140 | 			for(var j=0;j<size;j++){//floatTo16BitPCM 
 141 | 				var s=Math.max(-1,Math.min(1,float32Arr[j]));
 142 | 				s=s<0?s*0x8000:s*0x7FFF;
 143 | 				pcm[j]=s;
 144 | 				sum+=Math.abs(s);
 145 | 			};
 146 | 			
 147 | 			for(var k in calls){
 148 | 				calls[k](pcm,sum);
 149 | 			};
 150 | 			
 151 | 			return;
 152 | 		};
 153 | 	};
 154 | 	
 155 | 	var scriptProcessor="ScriptProcessor";//一堆字符串名字，有利于压缩js
 156 | 	var audioWorklet="audioWorklet";
 157 | 	var recAudioWorklet=RecTxt+" "+audioWorklet;
 158 | 	var RecProc="RecProc";
 159 | 	var MediaRecorderTxt="MediaRecorder";
 160 | 	var MRWebMPCM=MediaRecorderTxt+".WebM.PCM";
 161 | 
 162 | 
 163 | //===================连接方式三=========================
 164 | 	//古董级别的 ScriptProcessor 处理，目前所有浏览器均兼容，虽然是过时的方法，但更稳健，移动端性能比AudioWorklet强
 165 | 	var oldFn=ctx.createScriptProcessor||ctx.createJavaScriptNode;
 166 | 	var oldIsBest="。由于"+audioWorklet+"内部1秒375次回调，在移动端可能会有性能问题导致回调丢失录音变短，PC端无影响，暂不建议开启"+audioWorklet+"。";
 167 | 	var oldScript=function(){
 168 | 		isWorklet=stream.isWorklet=false;
 169 | 		_Disconn_n(stream);
 170 | 		CLog("Connect采用老的"+scriptProcessor+"，"+(Recorder[ConnectEnableWorklet]?"但已":"可")+"设置"+RecTxt+"."+ConnectEnableWorklet+"=true尝试启用"+audioWorklet+webMTips+oldIsBest,3);
 171 | 		
 172 | 		var process=stream._p=oldFn.call(ctx,bufferSize,1,1);//单声道，省的数据处理复杂
 173 | 		mediaConn(process);
 174 | 		
 175 | 		var _DsetTxt="_D220626",_Dset=Recorder[_DsetTxt];if(_Dset)CLog("Use "+RecTxt+"."+_DsetTxt,3);
 176 | 		process.onaudioprocess=function(e){
 177 | 			var arr=e.inputBuffer.getChannelData(0);
 178 | 			if(_Dset){//临时调试用的参数，未来会被删除
 179 | 				arr=new Float32Array(arr);//块是共享的，必须复制出来
 180 | 				setTimeout(function(){ onReceive(arr) });//立即退出回调，试图减少对浏览器录音的影响
 181 | 			}else{
 182 | 				onReceive(arr);
 183 | 			};
 184 | 		};
 185 | 	};
 186 | 
 187 | 
 188 | //===================连接方式二=========================
 189 | var connWorklet=function(){
 190 | 	//尝试开启AudioWorklet处理
 191 | 	isWebM=stream.isWebM=false;
 192 | 	_Disconn_r(stream);
 193 | 	
 194 | 	isWorklet=stream.isWorklet=!oldFn || Recorder[ConnectEnableWorklet];
 195 | 	var AwNode=window.AudioWorkletNode;
 196 | 	if(!(isWorklet && ctx[audioWorklet] && AwNode)){
 197 | 		oldScript();//被禁用 或 不支持，直接使用老的
 198 | 		return;
 199 | 	};
 200 | 	var clazzUrl=function(){
 201 | 		var xf=function(f){return f.toString().replace(/^function|DEL_/g,"").replace(/\$RA/g,recAudioWorklet)};
 202 | 		var clazz='class '+RecProc+' extends AudioWorkletProcessor{';
 203 | 			clazz+="constructor "+xf(function(option){
 204 | 				DEL_super(option);
 205 | 				var This=this,bufferSize=option.processorOptions.bufferSize;
 206 | 				This.bufferSize=bufferSize;
 207 | 				This.buffer=new Float32Array(bufferSize*2);//乱给size搞乱缓冲区不管
 208 | 				This.pos=0;
 209 | 				This.port.onmessage=function(e){
 210 | 					if(e.data.kill){
 211 | 						This.kill=true;
 212 | 						console.log("$RA kill call");
 213 | 					}
 214 | 				};
 215 | 				console.log("$RA .ctor call", option);
 216 | 			});
 217 | 			
 218 | 			//https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor/process 每次回调128个采样数据，1秒375次回调，高频导致移动端性能问题，结果就是回调次数缺斤少两，进而导致丢失数据，PC端似乎没有性能问题
 219 | 			clazz+="process "+xf(function(input,b,c){//需要等到ctx激活后才会有回调
 220 | 				var This=this,bufferSize=This.bufferSize;
 221 | 				var buffer=This.buffer,pos=This.pos;
 222 | 				input=(input[0]||[])[0]||[];
 223 | 				if(input.length){
 224 | 					buffer.set(input,pos);
 225 | 					pos+=input.length;
 226 | 					
 227 | 					var len=~~(pos/bufferSize)*bufferSize;
 228 | 					if(len){
 229 | 						this.port.postMessage({ val: buffer.slice(0,len) });
 230 | 						
 231 | 						var more=buffer.subarray(len,pos);
 232 | 						buffer=new Float32Array(bufferSize*2);
 233 | 						buffer.set(more);
 234 | 						pos=more.length;
 235 | 						This.buffer=buffer;
 236 | 					}
 237 | 					This.pos=pos;
 238 | 				}
 239 | 				return !This.kill;
 240 | 			});
 241 | 		clazz+='}'
 242 | 			+'try{'
 243 | 				+'registerProcessor("'+RecProc+'", '+RecProc+')'
 244 | 			+'}catch(e){'
 245 | 				+'console.error("'+recAudioWorklet+'注册失败",e)'
 246 | 			+'}';
 247 | 		//URL.createObjectURL 本地有些浏览器会报 Not allowed to load local resource，直接用dataurl
 248 | 		return "data:text/javascript;base64,"+btoa(unescape(encodeURIComponent(clazz)));
 249 | 	};
 250 | 	
 251 | 	var awNext=function(){//可以继续，没有调用断开
 252 | 		return isWorklet && stream._na;
 253 | 	};
 254 | 	var nodeAlive=stream._na=function(){
 255 | 		//start时会调用，只要没有收到数据就断定AudioWorklet有问题，恢复用老的
 256 | 		if(badInt!==""){//没有回调过数据
 257 | 			clearTimeout(badInt);
 258 | 			badInt=setTimeout(function(){
 259 | 				badInt=0;
 260 | 				if(awNext()){
 261 | 					CLog(audioWorklet+"未返回任何音频，恢复使用"+scriptProcessor,3);
 262 | 					oldFn&&oldScript();//未来没有老的，可能是误判
 263 | 				};
 264 | 			},500);
 265 | 		};
 266 | 	};
 267 | 	var createNode=function(){
 268 | 		if(!awNext())return;
 269 | 		var node=stream._n=new AwNode(ctx, RecProc, {
 270 | 			processorOptions:{bufferSize:bufferSize}
 271 | 		});
 272 | 		mediaConn(node);
 273 | 		node.port.onmessage=function(e){
 274 | 			if(badInt){
 275 | 				clearTimeout(badInt);badInt="";
 276 | 			};
 277 | 			if(awNext()){
 278 | 				onReceive(e.data.val);
 279 | 			}else if(!isWorklet){
 280 | 				CLog(audioWorklet+"多余回调",3);
 281 | 			};
 282 | 		};
 283 | 		CLog("Connect采用"+audioWorklet+"，设置"+RecTxt+"."+ConnectEnableWorklet+"=false可恢复老式"+scriptProcessor+webMTips+oldIsBest,3);
 284 | 	};
 285 | 	
 286 | 	//如果start时的resume和下面的构造node同时进行，将会导致部分浏览器崩溃，源码assets中 ztest_chrome_bug_AudioWorkletNode.html 可测试。所以，将所有代码套到resume里面（不管catch），避免出现这个问题
 287 | 	ctx.resume()[calls&&"finally"](function(){//注释掉这行 观摩浏览器崩溃 STATUS_ACCESS_VIOLATION
 288 | 		if(!awNext())return;
 289 | 		if(ctx[RecProc]){
 290 | 			createNode();
 291 | 			return;
 292 | 		};
 293 | 		var url=clazzUrl();
 294 | 		ctx[audioWorklet].addModule(url).then(function(e){
 295 | 			if(!awNext())return;
 296 | 			ctx[RecProc]=1;
 297 | 			createNode();
 298 | 			if(badInt){//重新计时
 299 | 				nodeAlive();
 300 | 			};
 301 | 		})[CatchTxt](function(e){ //fix 关键字，保证catch压缩时保持字符串形式
 302 | 			CLog(audioWorklet+".addModule失败",1,e);
 303 | 			awNext()&&oldScript();
 304 | 		});
 305 | 	});
 306 | };
 307 | 
 308 | 
 309 | //===================连接方式一=========================
 310 | var connWebM=function(){
 311 | 	//尝试开启MediaRecorder录制webm+pcm处理
 312 | 	var MR=window[MediaRecorderTxt];
 313 | 	var onData="ondataavailable";
 314 | 	var webmType="audio/webm; codecs=pcm";
 315 | 	isWebM=stream.isWebM=Recorder[ConnectEnableWebM];
 316 | 	
 317 | 	var supportMR=MR && (onData in MR.prototype) && MR.isTypeSupported(webmType);
 318 | 	webMTips=supportMR?"":"（此浏览器不支持"+MRWebMPCM+"）";
 319 | 	if(!isUserMedia || !isWebM || !supportMR){
 320 | 		connWorklet(); //非麦克风录音（MediaRecorder采样率不可控） 或 被禁用 或 不支持MediaRecorder 或 不支持webm+pcm
 321 | 		return;
 322 | 	}
 323 | 	
 324 | 	var mrNext=function(){//可以继续，没有调用断开
 325 | 		return isWebM && stream._ra;
 326 | 	};
 327 | 	var mrAlive=stream._ra=function(){
 328 | 		//start时会调用，只要没有收到数据就断定MediaRecorder有问题，降级处理
 329 | 		if(badInt!==""){//没有回调过数据
 330 | 			clearTimeout(badInt);
 331 | 			badInt=setTimeout(function(){
 332 | 				//badInt=0; 保留给nodeAlive继续判断
 333 | 				if(mrNext()){
 334 | 					CLog(MediaRecorderTxt+"未返回任何音频，降级使用"+audioWorklet,3);
 335 | 					connWorklet();
 336 | 				};
 337 | 			},500);
 338 | 		};
 339 | 	};
 340 | 	
 341 | 	var mrSet=Object.assign({mimeType:webmType}, Recorder.ConnectWebMOptions);
 342 | 	var mr=stream._r=new MR(stream, mrSet);
 343 | 	var webmData=stream._rd={sampleRate:ctx[sampleRateTxt]};
 344 | 	mr[onData]=function(e){
 345 | 		//提取webm中的pcm数据，提取失败就等着badInt超时降级处理
 346 | 		var reader=new FileReader();
 347 | 		reader.onloadend=function(){
 348 | 			if(mrNext()){
 349 | 				var f32arr=WebM_Extract(new Uint8Array(reader.result),webmData);
 350 | 				if(!f32arr)return;
 351 | 				if(f32arr==-1){//无法提取，立即降级
 352 | 					connWorklet();
 353 | 					return;
 354 | 				};
 355 | 				
 356 | 				if(badInt){
 357 | 					clearTimeout(badInt);badInt="";
 358 | 				};
 359 | 				onReceive(f32arr);
 360 | 			}else if(!isWebM){
 361 | 				CLog(MediaRecorderTxt+"多余回调",3);
 362 | 			};
 363 | 		};
 364 | 		reader.readAsArrayBuffer(e.data);
 365 | 	};
 366 | 	mr.start(~~(bufferSize/48));//按48k时的回调间隔
 367 | 	CLog("Connect采用"+MRWebMPCM+"，设置"+RecTxt+"."+ConnectEnableWebM+"=false可恢复使用"+audioWorklet+"或老式"+scriptProcessor);
 368 | };
 369 | 
 370 | 	connWebM();
 371 | };
 372 | var ConnAlive=function(stream){
 373 | 	if(stream._na) stream._na(); //检查AudioWorklet连接是否有效，无效就回滚到老的ScriptProcessor
 374 | 	if(stream._ra) stream._ra(); //检查MediaRecorder连接是否有效，无效就降级处理
 375 | };
 376 | var _Disconn_n=function(stream){
 377 | 	stream._na=null;
 378 | 	if(stream._n){
 379 | 		stream._n.port.postMessage({kill:true});
 380 | 		stream._n.disconnect();
 381 | 		stream._n=null;
 382 | 	};
 383 | };
 384 | var _Disconn_r=function(stream){
 385 | 	stream._ra=null;
 386 | 	if(stream._r){
 387 | 		stream._r.stop();
 388 | 		stream._r=null;
 389 | 	};
 390 | };
 391 | var Disconnect=function(streamStore){
 392 | 	streamStore=streamStore||Recorder;
 393 | 	var isGlobal=streamStore==Recorder;
 394 | 	
 395 | 	var stream=streamStore.Stream;
 396 | 	if(stream){
 397 | 		if(stream._m){
 398 | 			stream._m.disconnect();
 399 | 			stream._m=null;
 400 | 		};
 401 | 		if(stream._p){
 402 | 			stream._p.disconnect();
 403 | 			stream._p.onaudioprocess=stream._p=null;
 404 | 		};
 405 | 		_Disconn_n(stream);
 406 | 		_Disconn_r(stream);
 407 | 		
 408 | 		if(isGlobal){//全局的时候，要把流关掉（麦克风），直接提供的流不处理
 409 | 			var tracks=stream.getTracks&&stream.getTracks()||stream.audioTracks||[];
 410 | 			for(var i=0;i<tracks.length;i++){
 411 | 				var track=tracks[i];
 412 | 				track.stop&&track.stop();
 413 | 			};
 414 | 			stream.stop&&stream.stop();
 415 | 		};
 416 | 	};
 417 | 	streamStore.Stream=0;
 418 | };
 419 | 
 420 | /*对pcm数据的采样率进行转换
 421 | pcmDatas: [[Int16,...]] pcm片段列表
 422 | pcmSampleRate:48000 pcm数据的采样率
 423 | newSampleRate:16000 需要转换成的采样率，newSampleRate>=pcmSampleRate时不会进行任何处理，小于时会进行重新采样
 424 | prevChunkInfo:{} 可选，上次调用时的返回值，用于连续转换，本次调用将从上次结束位置开始进行处理。或可自行定义一个ChunkInfo从pcmDatas指定的位置开始进行转换
 425 | option:{ 可选，配置项
 426 | 		frameSize:123456 帧大小，每帧的PCM Int16的数量，采样率转换后的pcm长度为frameSize的整数倍，用于连续转换。目前仅在mp3格式时才有用，frameSize取值为1152，这样编码出来的mp3时长和pcm的时长完全一致，否则会因为mp3最后一帧录音不够填满时添加填充数据导致mp3的时长变长。
 427 | 		frameType:"" 帧类型，一般为rec.set.type，提供此参数时无需提供frameSize，会自动使用最佳的值给frameSize赋值，目前仅支持mp3=1152(MPEG1 Layer3的每帧采采样数)，其他类型=1。
 428 | 			以上两个参数用于连续转换时使用，最多使用一个，不提供时不进行帧的特殊处理，提供时必须同时提供prevChunkInfo才有作用。最后一段数据处理时无需提供帧大小以便输出最后一丁点残留数据。
 429 | 	}
 430 | 
 431 | 返回ChunkInfo:{
 432 | 	//可定义，从指定位置开始转换到结尾
 433 | 	index:0 pcmDatas已处理到的索引
 434 | 	offset:0.0 已处理到的index对应的pcm中的偏移的下一个位置
 435 | 	
 436 | 	//仅作为返回值
 437 | 	frameNext:null||[Int16,...] 下一帧的部分数据，frameSize设置了的时候才可能会有
 438 | 	sampleRate:16000 结果的采样率，<=newSampleRate
 439 | 	data:[Int16,...] 转换后的PCM结果；如果是连续转换，并且pcmDatas中并没有新数据时，data的长度可能为0
 440 | }
 441 | */
 442 | Recorder.SampleData=function(pcmDatas,pcmSampleRate,newSampleRate,prevChunkInfo,option){
 443 | 	prevChunkInfo||(prevChunkInfo={});
 444 | 	var index=prevChunkInfo.index||0;
 445 | 	var offset=prevChunkInfo.offset||0;
 446 | 	
 447 | 	var frameNext=prevChunkInfo.frameNext||[];
 448 | 	option||(option={});
 449 | 	var frameSize=option.frameSize||1;
 450 | 	if(option.frameType){
 451 | 		frameSize=option.frameType=="mp3"?1152:1;
 452 | 	};
 453 | 	
 454 | 	var nLen=pcmDatas.length;
 455 | 	if(index>nLen+1){
 456 | 		CLog("SampleData似乎传入了未重置chunk "+index+">"+nLen,3);
 457 | 	};
 458 | 	var size=0;
 459 | 	for(var i=index;i<nLen;i++){
 460 | 		size+=pcmDatas[i].length;
 461 | 	};
 462 | 	size=Math.max(0,size-Math.floor(offset));
 463 | 	
 464 | 	//采样 https://www.cnblogs.com/blqw/p/3782420.html
 465 | 	var step=pcmSampleRate/newSampleRate;
 466 | 	if(step>1){//新采样低于录音采样，进行抽样
 467 | 		size=Math.floor(size/step);
 468 | 	}else{//新采样高于录音采样不处理，省去了插值处理
 469 | 		step=1;
 470 | 		newSampleRate=pcmSampleRate;
 471 | 	};
 472 | 	
 473 | 	size+=frameNext.length;
 474 | 	var res=new Int16Array(size);
 475 | 	var idx=0;
 476 | 	//添加上一次不够一帧的剩余数据
 477 | 	for(var i=0;i<frameNext.length;i++){
 478 | 		res[idx]=frameNext[i];
 479 | 		idx++;
 480 | 	};
 481 | 	//处理数据
 482 | 	for (;index<nLen;index++) {
 483 | 		var o=pcmDatas[index];
 484 | 		var i=offset,il=o.length;
 485 | 		while(i<il){
 486 | 			//res[idx]=o[Math.round(i)]; 直接简单抽样
 487 | 			
 488 | 			//https://www.cnblogs.com/xiaoqi/p/6993912.html
 489 | 			//当前点=当前点+到后面一个点之间的增量，音质比直接简单抽样好些
 490 | 			var before = Math.floor(i);
 491 | 			var after = Math.ceil(i);
 492 | 			var atPoint = i - before;
 493 | 			
 494 | 			var beforeVal=o[before];
 495 | 			var afterVal=after<il ? o[after]
 496 | 				: (//后个点越界了，查找下一个数组
 497 | 					(pcmDatas[index+1]||[beforeVal])[0]||0
 498 | 				);
 499 | 			res[idx]=beforeVal+(afterVal-beforeVal)*atPoint;
 500 | 			
 501 | 			idx++;
 502 | 			i+=step;//抽样
 503 | 		};
 504 | 		offset=i-il;
 505 | 	};
 506 | 	//帧处理
 507 | 	frameNext=null;
 508 | 	var frameNextSize=res.length%frameSize;
 509 | 	if(frameNextSize>0){
 510 | 		var u8Pos=(res.length-frameNextSize)*2;
 511 | 		frameNext=new Int16Array(res.buffer.slice(u8Pos));
 512 | 		res=new Int16Array(res.buffer.slice(0,u8Pos));
 513 | 	};
 514 | 	
 515 | 	return {
 516 | 		index:index
 517 | 		,offset:offset
 518 | 		
 519 | 		,frameNext:frameNext
 520 | 		,sampleRate:newSampleRate
 521 | 		,data:res
 522 | 	};
 523 | };
 524 | 
 525 | 
 526 | /*计算音量百分比的一个方法
 527 | pcmAbsSum: pcm Int16所有采样的绝对值的和
 528 | pcmLength: pcm长度
 529 | 返回值：0-100，主要当做百分比用
 530 | 注意：这个不是分贝，因此没用volume当做名称*/
 531 | Recorder.PowerLevel=function(pcmAbsSum,pcmLength){
 532 | 	/*计算音量 https://blog.csdn.net/jody1989/article/details/73480259
 533 | 	更高灵敏度算法:
 534 | 		限定最大感应值10000
 535 | 			线性曲线：低音量不友好
 536 | 				power/10000*100 
 537 | 			对数曲线：低音量友好，但需限定最低感应值
 538 | 				(1+Math.log10(power/10000))*100
 539 | 	*/
 540 | 	var power=(pcmAbsSum/pcmLength) || 0;//NaN
 541 | 	var level;
 542 | 	if(power<1251){//1250的结果10%，更小的音量采用线性取值
 543 | 		level=Math.round(power/1250*10);
 544 | 	}else{
 545 | 		level=Math.round(Math.min(100,Math.max(0,(1+Math.log(power/10000)/Math.log(10))*100)));
 546 | 	};
 547 | 	return level;
 548 | };
 549 | 
 550 | /*计算音量，单位dBFS（满刻度相对电平）
 551 | maxSample: 为16位pcm采样的绝对值中最大的一个（计算峰值音量），或者为pcm中所有采样的绝对值的平局值
 552 | 返回值：-100~0 （最大值0dB，最小值-100代替-∞）
 553 | */
 554 | Recorder.PowerDBFS=function(maxSample){
 555 | 	var val=Math.max(0.1, maxSample||0),Pref=0x7FFF;
 556 | 	val=Math.min(val,Pref);
 557 | 	//https://www.logiclocmusic.com/can-you-tell-the-decibel/
 558 | 	//https://blog.csdn.net/qq_17256689/article/details/120442510
 559 | 	val=20*Math.log(val/Pref)/Math.log(10);
 560 | 	return Math.max(-100,Math.round(val));
 561 | };
 562 | 
 563 | 
 564 | 
 565 | 
 566 | //带时间的日志输出，可设为一个空函数来屏蔽日志输出
 567 | //CLog(msg,errOrLogMsg, logMsg...) err为数字时代表日志类型1:error 2:log默认 3:warn，否则当做内容输出，第一个参数不能是对象因为要拼接时间，后面可以接无数个输出参数
 568 | Recorder.CLog=function(msg,err){
 569 | 	var now=new Date();
 570 | 	var t=("0"+now.getMinutes()).substr(-2)
 571 | 		+":"+("0"+now.getSeconds()).substr(-2)
 572 | 		+"."+("00"+now.getMilliseconds()).substr(-3);
 573 | 	var recID=this&&this.envIn&&this.envCheck&&this.id;
 574 | 	var arr=["["+t+" "+RecTxt+(recID?":"+recID:"")+"]"+msg];
 575 | 	var a=arguments,console=window.console||{};
 576 | 	var i=2,fn=console.log;
 577 | 	if(typeof(err)=="number"){
 578 | 		fn=err==1?console.error:err==3?console.warn:fn;
 579 | 	}else{
 580 | 		i=1;
 581 | 	};
 582 | 	for(;i<a.length;i++){
 583 | 		arr.push(a[i]);
 584 | 	};
 585 | 	if(IsLoser){//古董浏览器，仅保证基本的可执行不代码异常
 586 | 		fn&&fn("[IsLoser]"+arr[0],arr.length>1?arr:"");
 587 | 	}else{
 588 | 		fn.apply(console,arr);
 589 | 	};
 590 | };
 591 | var CLog=function(){ Recorder.CLog.apply(this,arguments); };
 592 | var IsLoser=true;try{IsLoser=!console.log.apply;}catch(e){};
 593 | 
 594 | 
 595 | 
 596 | 
 597 | var ID=0;
 598 | function initFn(set){
 599 | 	this.id=++ID;
 600 | 	
 601 | 	//如果开启了流量统计，这里将发送一个图片请求
 602 | 	Traffic();
 603 | 	
 604 | 	
 605 | 	var o={
 606 | 		type:"mp3" //输出类型：mp3,wav，wav输出文件尺寸超大不推荐使用，但mp3编码支持会导致js文件超大，如果不需支持mp3可以使js文件大幅减小
 607 | 		,bitRate:16 //比特率 wav:16或8位，MP3：8kbps 1k/s，8kbps 2k/s 录音文件很小
 608 | 		
 609 | 		,sampleRate:16000 //采样率，wav格式大小=sampleRate*时间；mp3此项对低比特率有影响，高比特率几乎无影响。
 610 | 					//wav任意值，mp3取值范围：48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000
 611 | 					//采样率参考https://www.cnblogs.com/devin87/p/mp3-recorder.html
 612 | 		
 613 | 		,onProcess:NOOP //fn(buffers,powerLevel,bufferDuration,bufferSampleRate,newBufferIdx,asyncEnd) buffers=[[Int16,...],...]：缓冲的PCM数据，为从开始录音到现在的所有pcm片段；powerLevel：当前缓冲的音量级别0-100，bufferDuration：已缓冲时长，bufferSampleRate：缓冲使用的采样率（当type支持边录边转码(Worker)时，此采样率和设置的采样率相同，否则不一定相同）；newBufferIdx:本次回调新增的buffer起始索引；asyncEnd:fn() 如果onProcess是异步的(返回值为true时)，处理完成时需要调用此回调，如果不是异步的请忽略此参数，此方法回调时必须是真异步（不能真异步时需用setTimeout包裹）。onProcess返回值：如果返回true代表开启异步模式，在某些大量运算的场合异步是必须的，必须在异步处理完成时调用asyncEnd(不能真异步时需用setTimeout包裹)，在onProcess执行后新增的buffer会全部替换成空数组，因此本回调开头应立即将newBufferIdx到本次回调结尾位置的buffer全部保存到另外一个数组内，处理完成后写回buffers中本次回调的结尾位置。
 614 | 		
 615 | 		//*******高级设置******
 616 | 		//,sourceStream:MediaStream Object
 617 | 				//可选直接提供一个媒体流，从这个流中录制、实时处理音频数据（当前Recorder实例独享此流）；不提供时为普通的麦克风录音，由getUserMedia提供音频流（所有Recorder实例共享同一个流）
 618 | 				//比如：audio、video标签dom节点的captureStream方法（实验特性，不同浏览器支持程度不高）返回的流；WebRTC中的remote流；自己创建的流等
 619 | 				//注意：流内必须至少存在一条音轨(Audio Track)，比如audio标签必须等待到可以开始播放后才会有音轨，否则open会失败
 620 | 		
 621 | 		//,audioTrackSet:{ deviceId:"",groupId:"", autoGainControl:true, echoCancellation:true, noiseSuppression:true }
 622 | 				//普通麦克风录音时getUserMedia方法的audio配置参数，比如指定设备id，回声消除、降噪开关；注意：提供的任何配置值都不一定会生效
 623 | 				//由于麦克风是全局共享的，所以新配置后需要close掉以前的再重新open
 624 | 				//更多参考: https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints
 625 | 		
 626 | 		//,disableEnvInFix:false 内部参数，禁用设备卡顿时音频输入丢失补偿功能
 627 | 		
 628 | 		//,takeoffEncodeChunk:NOOP //fn(chunkBytes) chunkBytes=[Uint8,...]：实时编码环境下接管编码器输出，当编码器实时编码出一块有效的二进制音频数据时实时回调此方法；参数为二进制的Uint8Array，就是编码出来的音频数据片段，所有的chunkBytes拼接在一起即为完整音频。本实现的想法最初由QQ2543775048提出
 629 | 				//当提供此回调方法时，将接管编码器的数据输出，编码器内部将放弃存储生成的音频数据；环境要求比较苛刻：如果当前环境不支持实时编码处理，将在open时直接走fail逻辑
 630 | 				//因此提供此回调后调用stop方法将无法获得有效的音频数据，因为编码器内没有音频数据，因此stop时返回的blob将是一个字节长度为0的blob
 631 | 				//目前只有mp3格式实现了实时编码，在支持实时处理的环境中将会实时的将编码出来的mp3片段通过此方法回调，所有的chunkBytes拼接到一起即为完整的mp3，此种拼接的结果比mock方法实时生成的音质更加，因为天然避免了首尾的静默
 632 | 				//目前除mp3外其他格式不可以提供此回调，提供了将在open时直接走fail逻辑
 633 | 	};
 634 | 	
 635 | 	for(var k in set){
 636 | 		o[k]=set[k];
 637 | 	};
 638 | 	this.set=o;
 639 | 	
 640 | 	this._S=9;//stop同步锁，stop可以阻止open过程中还未运行的start
 641 | 	this.Sync={O:9,C:9};//和Recorder.Sync一致，只不过这个是非全局的，仅用来简化代码逻辑，无实际作用
 642 | };
 643 | //同步锁，控制对Stream的竞争；用于close时中断异步的open；一个对象open如果变化了都要阻止close，Stream的控制权交个新的对象
 644 | Recorder.Sync={/*open*/O:9,/*close*/C:9};
 645 | 
 646 | Recorder.prototype=initFn.prototype={
 647 | 	CLog:CLog
 648 | 	
 649 | 	//流相关的数据存储在哪个对象里面；如果提供了sourceStream，数据直接存储在当前对象中，否则存储在全局
 650 | 	,_streamStore:function(){
 651 | 		if(this.set.sourceStream){
 652 | 			return this;
 653 | 		}else{
 654 | 			return Recorder;
 655 | 		}
 656 | 	}
 657 | 	
 658 | 	//打开录音资源True(),False(msg,isUserNotAllow)，需要调用close。注意：此方法是异步的；一般使用时打开，用完立即关闭；可重复调用，可用来测试是否能录音
 659 | 	,open:function(True,False){
 660 | 		var This=this,streamStore=This._streamStore();
 661 | 		True=True||NOOP;
 662 | 		var failCall=function(errMsg,isUserNotAllow){
 663 | 			isUserNotAllow=!!isUserNotAllow;
 664 | 			This.CLog("录音open失败："+errMsg+",isUserNotAllow:"+isUserNotAllow,1);
 665 | 			False&&False(errMsg,isUserNotAllow);
 666 | 		};
 667 | 		
 668 | 		var ok=function(){
 669 | 			This.CLog("open ok id:"+This.id);
 670 | 			True();
 671 | 			
 672 | 			This._SO=0;//解除stop对open中的start调用的阻止
 673 | 		};
 674 | 		
 675 | 		
 676 | 		//同步锁
 677 | 		var Lock=streamStore.Sync;
 678 | 		var lockOpen=++Lock.O,lockClose=Lock.C;
 679 | 		This._O=This._O_=lockOpen;//记住当前的open，如果变化了要阻止close，这里假定了新对象已取代当前对象并且不再使用
 680 | 		This._SO=This._S;//记住open过程中的stop，中途任何stop调用后都不能继续open中的start
 681 | 		var lockFail=function(){
 682 | 			//允许多次open，但不允许任何一次close，或者自身已经调用了关闭
 683 | 			if(lockClose!=Lock.C || !This._O){
 684 | 				var err="open被取消";
 685 | 				if(lockOpen==Lock.O){
 686 | 					//无新的open，已经调用了close进行取消，此处应让上次的close明确生效
 687 | 					This.close();
 688 | 				}else{
 689 | 					err="open被中断";
 690 | 				};
 691 | 				failCall(err);
 692 | 				return true;
 693 | 			};
 694 | 		};
 695 | 		
 696 | 		//环境配置检查
 697 | 		var checkMsg=This.envCheck({envName:"H5",canProcess:true});
 698 | 		if(checkMsg){
 699 | 			failCall("不能录音："+checkMsg);
 700 | 			return;
 701 | 		};
 702 | 		
 703 | 		
 704 | 		//***********已直接提供了音频流************
 705 | 		if(This.set.sourceStream){
 706 | 			if(!Recorder.GetContext()){
 707 | 				failCall("不支持此浏览器从流中获取录音");
 708 | 				return;
 709 | 			};
 710 | 			
 711 | 			Disconnect(streamStore);//可能已open过，直接先尝试断开
 712 | 			This.Stream=This.set.sourceStream;
 713 | 			This.Stream._call={};
 714 | 			
 715 | 			try{
 716 | 				Connect(streamStore);
 717 | 			}catch(e){
 718 | 				failCall("从流中打开录音失败："+e.message);
 719 | 				return;
 720 | 			}
 721 | 			ok();
 722 | 			return;
 723 | 		};
 724 | 		
 725 | 		
 726 | 		//***********打开麦克风得到全局的音频流************
 727 | 		var codeFail=function(code,msg){
 728 | 			try{//跨域的优先检测一下
 729 | 				window.top.a;
 730 | 			}catch(e){
 731 | 				failCall('无权录音(跨域，请尝试给iframe添加麦克风访问策略，如allow="camera;microphone")');
 732 | 				return;
 733 | 			};
 734 | 			
 735 | 			if(/Permission|Allow/i.test(code)){
 736 | 				failCall("用户拒绝了录音权限",true);
 737 | 			}else if(window.isSecureContext===false){
 738 | 				failCall("浏览器禁止不安全页面录音，可开启https解决");
 739 | 			}else if(/Found/i.test(code)){//可能是非安全环境导致的没有设备
 740 | 				failCall(msg+"，无可用麦克风");
 741 | 			}else{
 742 | 				failCall(msg);
 743 | 			};
 744 | 		};
 745 | 		
 746 | 		
 747 | 		//如果已打开并且有效就不要再打开了
 748 | 		if(Recorder.IsOpen()){
 749 | 			ok();
 750 | 			return;
 751 | 		};
 752 | 		if(!Recorder.Support()){
 753 | 			codeFail("","此浏览器不支持录音");
 754 | 			return;
 755 | 		};
 756 | 				
 757 | 		//请求权限，如果从未授权，一般浏览器会弹出权限请求弹框
 758 | 		var f1=function(stream){
 759 | 			//https://github.com/xiangyuecn/Recorder/issues/14 获取到的track.readyState!="live"，刚刚回调时可能是正常的，但过一下可能就被关掉了，原因不明。延迟一下保证真异步。对正常浏览器不影响
 760 | 			setTimeout(function(){
 761 | 				stream._call={};
 762 | 				var oldStream=Recorder.Stream;
 763 | 				if(oldStream){
 764 | 					Disconnect(); //直接断开已存在的，旧的Connect未完成会自动终止
 765 | 					stream._call=oldStream._call;
 766 | 				};
 767 | 				Recorder.Stream=stream;
 768 | 				if(lockFail())return;
 769 | 				
 770 | 				if(Recorder.IsOpen()){
 771 | 					if(oldStream)This.CLog("发现同时多次调用open",1);
 772 | 					
 773 | 					Connect(streamStore,1);
 774 | 					ok();
 775 | 				}else{
 776 | 					failCall("录音功能无效：无音频流");
 777 | 				};
 778 | 			},100);
 779 | 		};
 780 | 		var f2=function(e){
 781 | 			var code=e.name||e.message||e.code+":"+e;
 782 | 			This.CLog("请求录音权限错误",1,e);
 783 | 			
 784 | 			codeFail(code,"无法录音："+code);
 785 | 		};
 786 | 		
 787 | 		var trackSet={
 788 | 			noiseSuppression:false //默认禁用降噪，原声录制，免得移动端表现怪异（包括系统播放声音变小）
 789 | 			,echoCancellation:false //回声消除
 790 | 		};
 791 | 		var trackSet2=This.set.audioTrackSet;
 792 | 		for(var k in trackSet2)trackSet[k]=trackSet2[k];
 793 | 		trackSet.sampleRate=Recorder.Ctx.sampleRate;//必须指明采样率，不然手机上MediaRecorder采样率16k
 794 | 		
 795 | 		try{
 796 | 			var pro=Recorder.Scope[getUserMediaTxt]({audio:trackSet},f1,f2);
 797 | 		}catch(e){//不能设置trackSet就算了
 798 | 			This.CLog(getUserMediaTxt,3,e);
 799 | 			pro=Recorder.Scope[getUserMediaTxt]({audio:true},f1,f2);
 800 | 		};
 801 | 		if(pro&&pro.then){
 802 | 			pro.then(f1)[CatchTxt](f2); //fix 关键字，保证catch压缩时保持字符串形式
 803 | 		};
 804 | 	}
 805 | 	//关闭释放录音资源
 806 | 	,close:function(call){
 807 | 		call=call||NOOP;
 808 | 		
 809 | 		var This=this,streamStore=This._streamStore();
 810 | 		This._stop();
 811 | 		
 812 | 		var Lock=streamStore.Sync;
 813 | 		This._O=0;
 814 | 		if(This._O_!=Lock.O){
 815 | 			//唯一资源Stream的控制权已交给新对象，这里不能关闭。此处在每次都弹权限的浏览器内可能存在泄漏，新对象被拒绝权限可能不会调用close，忽略这种不处理
 816 | 			This.CLog("close被忽略（因为同时open了多个rec，只有最后一个会真正close）",3);
 817 | 			call();
 818 | 			return;
 819 | 		};
 820 | 		Lock.C++;//获得控制权
 821 | 		
 822 | 		Disconnect(streamStore);
 823 | 		
 824 | 		This.CLog("close");
 825 | 		call();
 826 | 	}
 827 | 	
 828 | 	
 829 | 	
 830 | 	
 831 | 	
 832 | 	/*模拟一段录音数据，后面可以调用stop进行编码，需提供pcm数据[1,2,3...]，pcm的采样率*/
 833 | 	,mock:function(pcmData,pcmSampleRate){
 834 | 		var This=this;
 835 | 		This._stop();//清理掉已有的资源
 836 | 		
 837 | 		This.isMock=1;
 838 | 		This.mockEnvInfo=null;
 839 | 		This.buffers=[pcmData];
 840 | 		This.recSize=pcmData.length;
 841 | 		This[srcSampleRateTxt]=pcmSampleRate;
 842 | 		return This;
 843 | 	}
 844 | 	,envCheck:function(envInfo){//平台环境下的可用性检查，任何时候都可以调用检查，返回errMsg:""正常，"失败原因"
 845 | 		//envInfo={envName:"H5",canProcess:true}
 846 | 		var errMsg,This=this,set=This.set;
 847 | 		
 848 | 		//检测CPU的数字字节序，TypedArray字节序是个迷，直接拒绝罕见的大端模式，因为找不到这种CPU进行测试
 849 | 		var tag="CPU_BE";
 850 | 		if(!errMsg && !Recorder[tag] && window.Int8Array && !new Int8Array(new Int32Array([1]).buffer)[0]){
 851 | 			Traffic(tag); //如果开启了流量统计，这里将发送一个图片请求
 852 | 			errMsg="不支持"+tag+"架构";
 853 | 		};
 854 | 		
 855 | 		//编码器检查环境下配置是否可用
 856 | 		if(!errMsg){
 857 | 			var type=set.type;
 858 | 			if(This[type+"_envCheck"]){//编码器已实现环境检查
 859 | 				errMsg=This[type+"_envCheck"](envInfo,set);
 860 | 			}else{//未实现检查的手动检查配置是否有效
 861 | 				if(set.takeoffEncodeChunk){
 862 | 					errMsg=type+"类型"+(This[type]?"":"(未加载编码器)")+"不支持设置takeoffEncodeChunk";
 863 | 				};
 864 | 			};
 865 | 		};
 866 | 		
 867 | 		return errMsg||"";
 868 | 	}
 869 | 	,envStart:function(mockEnvInfo,sampleRate){//平台环境相关的start调用
 870 | 		var This=this,set=This.set;
 871 | 		This.isMock=mockEnvInfo?1:0;//非H5环境需要启用mock，并提供envCheck需要的环境信息
 872 | 		This.mockEnvInfo=mockEnvInfo;
 873 | 		This.buffers=[];//数据缓冲
 874 | 		This.recSize=0;//数据大小
 875 | 		
 876 | 		This.envInLast=0;//envIn接收到最后录音内容的时间
 877 | 		This.envInFirst=0;//envIn接收到的首个录音内容的录制时间
 878 | 		This.envInFix=0;//补偿的总时间
 879 | 		This.envInFixTs=[];//补偿计数列表
 880 | 		
 881 | 		//engineCtx需要提前确定最终的采样率
 882 | 		var setSr=set[sampleRateTxt];
 883 | 		if(setSr>sampleRate){
 884 | 			set[sampleRateTxt]=sampleRate;
 885 | 		}else{ setSr=0 }
 886 | 		This[srcSampleRateTxt]=sampleRate;
 887 | 		This.CLog(srcSampleRateTxt+": "+sampleRate+" set."+sampleRateTxt+": "+set[sampleRateTxt]+(setSr?" 忽略"+setSr:""), setSr?3:0);
 888 | 		
 889 | 		This.engineCtx=0;
 890 | 		//此类型有边录边转码(Worker)支持
 891 | 		if(This[set.type+"_start"]){
 892 | 			var engineCtx=This.engineCtx=This[set.type+"_start"](set);
 893 | 			if(engineCtx){
 894 | 				engineCtx.pcmDatas=[];
 895 | 				engineCtx.pcmSize=0;
 896 | 			};
 897 | 		};
 898 | 	}
 899 | 	,envResume:function(){//和平台环境无关的恢复录音
 900 | 		//重新开始计数
 901 | 		this.envInFixTs=[];
 902 | 	}
 903 | 	,envIn:function(pcm,sum){//和平台环境无关的pcm[Int16]输入
 904 | 		var This=this,set=This.set,engineCtx=This.engineCtx;
 905 | 		var bufferSampleRate=This[srcSampleRateTxt];
 906 | 		var size=pcm.length;
 907 | 		var powerLevel=Recorder.PowerLevel(sum,size);
 908 | 		
 909 | 		var buffers=This.buffers;
 910 | 		var bufferFirstIdx=buffers.length;//之前的buffer都是经过onProcess处理好的，不允许再修改
 911 | 		buffers.push(pcm);
 912 | 		
 913 | 		//有engineCtx时会被覆盖，这里保存一份
 914 | 		var buffersThis=buffers;
 915 | 		var bufferFirstIdxThis=bufferFirstIdx;
 916 | 		
 917 | 		//卡顿丢失补偿：因为设备很卡的时候导致H5接收到的数据量不够造成播放时候变速，结果比实际的时长要短，此处保证了不会变短，但不能修复丢失的音频数据造成音质变差。当前算法采用输入时间侦测下一帧是否需要添加补偿帧，需要(6次输入||超过1秒)以上才会开始侦测，如果滑动窗口内丢失超过1/3就会进行补偿
 918 | 		var now=Date.now();
 919 | 		var pcmTime=Math.round(size/bufferSampleRate*1000);
 920 | 		This.envInLast=now;
 921 | 		if(This.buffers.length==1){//记下首个录音数据的录制时间
 922 | 			This.envInFirst=now-pcmTime;
 923 | 		};
 924 | 		var envInFixTs=This.envInFixTs;
 925 | 		envInFixTs.splice(0,0,{t:now,d:pcmTime});
 926 | 		//保留3秒的计数滑动窗口，另外超过3秒的停顿不补偿
 927 | 		var tsInStart=now,tsPcm=0;
 928 | 		for(var i=0;i<envInFixTs.length;i++){
 929 | 			var o=envInFixTs[i];
 930 | 			if(now-o.t>3000){
 931 | 				envInFixTs.length=i;
 932 | 				break;
 933 | 			};
 934 | 			tsInStart=o.t;
 935 | 			tsPcm+=o.d;
 936 | 		};
 937 | 		//达到需要的数据量，开始侦测是否需要补偿
 938 | 		var tsInPrev=envInFixTs[1];
 939 | 		var tsIn=now-tsInStart;
 940 | 		var lost=tsIn-tsPcm;
 941 | 		if( lost>tsIn/3 && (tsInPrev&&tsIn>1000 || envInFixTs.length>=6) ){
 942 | 			//丢失过多，开始执行补偿
 943 | 			var addTime=now-tsInPrev.t-pcmTime;//距离上次输入丢失这么多ms
 944 | 			if(addTime>pcmTime/5){//丢失超过本帧的1/5
 945 | 				var fixOpen=!set.disableEnvInFix;
 946 | 				This.CLog("["+now+"]"+(fixOpen?"":"未")+"补偿"+addTime+"ms",3);
 947 | 				This.envInFix+=addTime;
 948 | 				
 949 | 				//用静默进行补偿
 950 | 				if(fixOpen){
 951 | 					var addPcm=new Int16Array(addTime*bufferSampleRate/1000);
 952 | 					size+=addPcm.length;
 953 | 					buffers.push(addPcm);
 954 | 				};
 955 | 			};
 956 | 		};
 957 | 		
 958 | 		
 959 | 		var sizeOld=This.recSize,addSize=size;
 960 | 		var bufferSize=sizeOld+addSize;
 961 | 		This.recSize=bufferSize;//此值在onProcess后需要修正，可能新数据被修改
 962 | 		
 963 | 		
 964 | 		//此类型有边录边转码(Worker)支持，开启实时转码
 965 | 		if(engineCtx){
 966 | 			//转换成set的采样率
 967 | 			var chunkInfo=Recorder.SampleData(buffers,bufferSampleRate,set[sampleRateTxt],engineCtx.chunkInfo);
 968 | 			engineCtx.chunkInfo=chunkInfo;
 969 | 			
 970 | 			sizeOld=engineCtx.pcmSize;
 971 | 			addSize=chunkInfo.data.length;
 972 | 			bufferSize=sizeOld+addSize;
 973 | 			engineCtx.pcmSize=bufferSize;//此值在onProcess后需要修正，可能新数据被修改
 974 | 			
 975 | 			buffers=engineCtx.pcmDatas;
 976 | 			bufferFirstIdx=buffers.length;
 977 | 			buffers.push(chunkInfo.data);
 978 | 			bufferSampleRate=chunkInfo[sampleRateTxt];
 979 | 		};
 980 | 		
 981 | 		var duration=Math.round(bufferSize/bufferSampleRate*1000);
 982 | 		var bufferNextIdx=buffers.length;
 983 | 		var bufferNextIdxThis=buffersThis.length;
 984 | 		
 985 | 		//允许异步处理buffer数据
 986 | 		var asyncEnd=function(){
 987 | 			//重新计算size，异步的早已减去添加的，同步的需去掉本次添加的然后重新计算
 988 | 			var num=asyncBegin?0:-addSize;
 989 | 			var hasClear=buffers[0]==null;
 990 | 			for(var i=bufferFirstIdx;i<bufferNextIdx;i++){
 991 | 				var buffer=buffers[i];
 992 | 				if(buffer==null){//已被主动释放内存，比如长时间实时传输录音时
 993 | 					hasClear=1;
 994 | 				}else{
 995 | 					num+=buffer.length;
 996 | 					
 997 | 					//推入后台边录边转码
 998 | 					if(engineCtx&&buffer.length){
 999 | 						This[set.type+"_encode"](engineCtx,buffer);
1000 | 					};
1001 | 				};
1002 | 			};
1003 | 			
1004 | 			//同步清理This.buffers，不管buffers到底清了多少个，buffersThis是使用不到的进行全清
1005 | 			if(hasClear && engineCtx){
1006 | 				var i=bufferFirstIdxThis;
1007 | 				if(buffersThis[0]){
1008 | 					i=0;
1009 | 				};
1010 | 				for(;i<bufferNextIdxThis;i++){
1011 | 					buffersThis[i]=null;
1012 | 				};
1013 | 			};
1014 | 			
1015 | 			//统计修改后的size，如果异步发生clear要原样加回来，同步的无需操作
1016 | 			if(hasClear){
1017 | 				num=asyncBegin?addSize:0;
1018 | 				
1019 | 				buffers[0]=null;//彻底被清理
1020 | 			};
1021 | 			if(engineCtx){
1022 | 				engineCtx.pcmSize+=num;
1023 | 			}else{
1024 | 				This.recSize+=num;
1025 | 			};
1026 | 		};
1027 | 		//实时回调处理数据，允许修改或替换上次回调以来新增的数据 ，但是不允许修改已处理过的，不允许增删第一维数组 ，允许将第二维数组任意修改替换成空数组也可以
1028 | 		var asyncBegin=0,procTxt="rec.set.onProcess";
1029 | 		try{
1030 | 			asyncBegin=set.onProcess(buffers,powerLevel,duration,bufferSampleRate,bufferFirstIdx,asyncEnd);
1031 | 		}catch(e){
1032 | 			//此错误显示不要用CLog，这样控制台内相同内容不会重复打印
1033 | 			console.error(procTxt+"回调出错是不允许的，需保证不会抛异常",e);
1034 | 		};
1035 | 		
1036 | 		var slowT=Date.now()-now;
1037 | 		if(slowT>10 && This.envInFirst-now>1000){ //1秒后开始onProcess性能监测
1038 | 			This.CLog(procTxt+"低性能，耗时"+slowT+"ms",3);
1039 | 		};
1040 | 		
1041 | 		if(asyncBegin===true){
1042 | 			//开启了异步模式，onProcess已接管buffers新数据，立即清空，避免出现未处理的数据
1043 | 			var hasClear=0;
1044 | 			for(var i=bufferFirstIdx;i<bufferNextIdx;i++){
1045 | 				if(buffers[i]==null){//已被主动释放内存，比如长时间实时传输录音时 ，但又要开启异步模式，此种情况是非法的
1046 | 					hasClear=1;
1047 | 				}else{
1048 | 					buffers[i]=new Int16Array(0);
1049 | 				};
1050 | 			};
1051 | 			
1052 | 			if(hasClear){
1053 | 				This.CLog("未进入异步前不能清除buffers",3);
1054 | 			}else{
1055 | 				//还原size，异步结束后再统计仅修改后的size，如果发生clear要原样加回来
1056 | 				if(engineCtx){
1057 | 					engineCtx.pcmSize-=addSize;
1058 | 				}else{
1059 | 					This.recSize-=addSize;
1060 | 				};
1061 | 			};
1062 | 		}else{
1063 | 			asyncEnd();
1064 | 		};
1065 | 	}
1066 | 	
1067 | 	
1068 | 	
1069 | 	
1070 | 	//开始录音，需先调用open；只要open成功时，调用此方法是安全的，如果未open强行调用导致的内部错误将不会有任何提示，stop时自然能得到错误
1071 | 	,start:function(){
1072 | 		var This=this,ctx=Recorder.Ctx;
1073 | 		
1074 | 		var isOpen=1;
1075 | 		if(This.set.sourceStream){//直接提供了流，仅判断是否调用了open
1076 | 			if(!This.Stream){
1077 | 				isOpen=0;
1078 | 			}
1079 | 		}else if(!Recorder.IsOpen()){//监测全局麦克风是否打开并且有效
1080 | 			isOpen=0;
1081 | 		};
1082 | 		if(!isOpen){
1083 | 			This.CLog("未open",1);
1084 | 			return;
1085 | 		};
1086 | 		This.CLog("开始录音");
1087 | 		
1088 | 		This._stop();
1089 | 		This.state=3;//0未录音 1录音中 2暂停 3等待ctx激活
1090 | 		This.envStart(null, ctx[sampleRateTxt]);
1091 | 		
1092 | 		//检查open过程中stop是否已经调用过
1093 | 		if(This._SO&&This._SO+1!=This._S){//上面调用过一次 _stop
1094 | 			//open未完成就调用了stop，此种情况终止start。也应尽量避免出现此情况
1095 | 			This.CLog("start被中断",3);
1096 | 			return;
1097 | 		};
1098 | 		This._SO=0;
1099 | 		
1100 | 		var end=function(){
1101 | 			if(This.state==3){
1102 | 				This.state=1;
1103 | 				This.resume();
1104 | 			}
1105 | 		};
1106 | 		if(ctx.state=="suspended"){
1107 | 			var tag="AudioContext resume: ";
1108 | 			This.CLog(tag+"wait...");
1109 | 			ctx.resume().then(function(){
1110 | 				This.CLog(tag+ctx.state);
1111 | 				end();
1112 | 			})[CatchTxt](function(e){ //比较少见，可能对录音没有影响
1113 | 				This.CLog(tag+ctx.state+" 可能无法录音："+e.message,1,e);
1114 | 				end();
1115 | 			});
1116 | 		}else{
1117 | 			end();
1118 | 		};
1119 | 	}
1120 | 	/*暂停录音*/
1121 | 	,pause:function(){
1122 | 		var This=this;
1123 | 		if(This.state){
1124 | 			This.state=2;
1125 | 			This.CLog("pause");
1126 | 			delete This._streamStore().Stream._call[This.id];
1127 | 		};
1128 | 	}
1129 | 	/*恢复录音*/
1130 | 	,resume:function(){
1131 | 		var This=this;
1132 | 		if(This.state){
1133 | 			This.state=1;
1134 | 			This.CLog("resume");
1135 | 			This.envResume();
1136 | 			
1137 | 			var stream=This._streamStore().Stream;
1138 | 			stream._call[This.id]=function(pcm,sum){
1139 | 				if(This.state==1){
1140 | 					This.envIn(pcm,sum);
1141 | 				};
1142 | 			};
1143 | 			ConnAlive(stream);//AudioWorklet只会在ctx激活后运行
1144 | 		};
1145 | 	}
1146 | 	
1147 | 	
1148 | 	
1149 | 	
1150 | 	,_stop:function(keepEngine){
1151 | 		var This=this,set=This.set;
1152 | 		if(!This.isMock){
1153 | 			This._S++;
1154 | 		};
1155 | 		if(This.state){
1156 | 			This.pause();
1157 | 			This.state=0;
1158 | 		};
1159 | 		if(!keepEngine && This[set.type+"_stop"]){
1160 | 			This[set.type+"_stop"](This.engineCtx);
1161 | 			This.engineCtx=0;
1162 | 		};
1163 | 	}
1164 | 	/*
1165 | 	结束录音并返回录音数据blob对象
1166 | 		True(blob,duration) blob：录音数据audio/mp3|wav格式
1167 | 							duration：录音时长，单位毫秒
1168 | 		False(msg)
1169 | 		autoClose:false 可选，是否自动调用close，默认为false
1170 | 	*/
1171 | 	,stop:function(True,False,autoClose){
1172 | 		var This=this,set=This.set,t1;
1173 | 		var envInMS=This.envInLast-This.envInFirst, envInLen=envInMS&&This.buffers.length; //可能未start
1174 | 		This.CLog("stop 和start时差"+(envInMS?envInMS+"ms 补偿"+This.envInFix+"ms"+" envIn:"+envInLen+" fps:"+(envInLen/envInMS*1000).toFixed(1):"-"));
1175 | 		
1176 | 		var end=function(){
1177 | 			This._stop();//彻底关掉engineCtx
1178 | 			if(autoClose){
1179 | 				This.close();
1180 | 			};
1181 | 		};
1182 | 		var err=function(msg){
1183 | 			This.CLog("结束录音失败："+msg,1);
1184 | 			False&&False(msg);
1185 | 			end();
1186 | 		};
1187 | 		var ok=function(blob,duration){
1188 | 			This.CLog("结束录音 编码花"+(Date.now()-t1)+"ms 音频时长"+duration+"ms 文件大小"+blob.size+"b");
1189 | 			if(set.takeoffEncodeChunk){//接管了输出，此时blob长度为0
1190 | 				This.CLog("启用takeoffEncodeChunk后stop返回的blob长度为0不提供音频数据",3);
1191 | 			}else if(blob.size<Math.max(100,duration/2)){//1秒小于0.5k？
1192 | 				err("生成的"+set.type+"无效");
1193 | 				return;
1194 | 			};
1195 | 			True&&True(blob,duration);
1196 | 			end();
1197 | 		};
1198 | 		if(!This.isMock){
1199 | 			var isCtxWait=This.state==3;
1200 | 			if(!This.state || isCtxWait){
1201 | 				err("未开始录音"+(isCtxWait?"，开始录音前无用户交互导致AudioContext未运行":""));
1202 | 				return;
1203 | 			};
1204 | 			This._stop(true);
1205 | 		};
1206 | 		var size=This.recSize;
1207 | 		if(!size){
1208 | 			err("未采集到录音");
1209 | 			return;
1210 | 		};
1211 | 		if(!This.buffers[0]){
1212 | 			err("音频buffers被释放");
1213 | 			return;
1214 | 		};
1215 | 		if(!This[set.type]){
1216 | 			err("未加载"+set.type+"编码器");
1217 | 			return;
1218 | 		};
1219 | 		
1220 | 		//环境配置检查，此处仅针对mock调用，因为open已经检查过了
1221 | 		if(This.isMock){
1222 | 			var checkMsg=This.envCheck(This.mockEnvInfo||{envName:"mock",canProcess:false});//没有提供环境信息的mock时没有onProcess回调
1223 | 			if(checkMsg){
1224 | 				err("录音错误："+checkMsg);
1225 | 				return;
1226 | 			};
1227 | 		};
1228 | 		
1229 | 		//此类型有边录边转码(Worker)支持
1230 | 		var engineCtx=This.engineCtx;
1231 | 		if(This[set.type+"_complete"]&&engineCtx){
1232 | 			var duration=Math.round(engineCtx.pcmSize/set[sampleRateTxt]*1000);//采用后的数据长度和buffers的长度可能微小的不一致，是采样率连续转换的精度问题
1233 | 			
1234 | 			t1=Date.now();
1235 | 			This[set.type+"_complete"](engineCtx,function(blob){
1236 | 				ok(blob,duration);
1237 | 			},err);
1238 | 			return;
1239 | 		};
1240 | 		
1241 | 		//标准UI线程转码，调整采样率
1242 | 		t1=Date.now();
1243 | 		var chunk=Recorder.SampleData(This.buffers,This[srcSampleRateTxt],set[sampleRateTxt]);
1244 | 		
1245 | 		set[sampleRateTxt]=chunk[sampleRateTxt];
1246 | 		var res=chunk.data;
1247 | 		var duration=Math.round(res.length/set[sampleRateTxt]*1000);
1248 | 		
1249 | 		This.CLog("采样"+size+"->"+res.length+" 花:"+(Date.now()-t1)+"ms");
1250 | 		
1251 | 		setTimeout(function(){
1252 | 			t1=Date.now();
1253 | 			This[set.type](res,function(blob){
1254 | 				ok(blob,duration);
1255 | 			},function(msg){
1256 | 				err(msg);
1257 | 			});
1258 | 		});
1259 | 	}
1260 | 
1261 | };
1262 | 
1263 | if(window[RecTxt]){
1264 | 	CLog("重复引入"+RecTxt,3);
1265 | 	window[RecTxt].Destroy();
1266 | };
1267 | window[RecTxt]=Recorder;
1268 | 
1269 | 
1270 | 
1271 | 
1272 | //=======从WebM字节流中提取pcm数据，提取成功返回Float32Array，失败返回null||-1=====
1273 | var WebM_Extract=function(inBytes, scope){
1274 | 	if(!scope.pos){
1275 | 		scope.pos=[0]; scope.tracks={}; scope.bytes=[];
1276 | 	};
1277 | 	var tracks=scope.tracks, position=[scope.pos[0]];
1278 | 	var endPos=function(){ scope.pos[0]=position[0] };
1279 | 	
1280 | 	var sBL=scope.bytes.length;
1281 | 	var bytes=new Uint8Array(sBL+inBytes.length);
1282 | 	bytes.set(scope.bytes); bytes.set(inBytes,sBL);
1283 | 	scope.bytes=bytes;
1284 | 	
1285 | 	//先读取文件头和Track信息
1286 | 	if(!scope._ht){
1287 | 		readMatroskaVInt(bytes, position);//EBML Header
1288 | 		readMatroskaBlock(bytes, position);//跳过EBML Header内容
1289 | 		if(!BytesEq(readMatroskaVInt(bytes, position), [0x18,0x53,0x80,0x67])){
1290 | 			return;//未识别到Segment
1291 | 		}
1292 | 		readMatroskaVInt(bytes, position);//跳过Segment长度值
1293 | 		while(position[0]<bytes.length){
1294 | 			var eid0=readMatroskaVInt(bytes, position);
1295 | 			var bytes0=readMatroskaBlock(bytes, position);
1296 | 			var pos0=[0],audioIdx=0;
1297 | 			if(!bytes0)return;//数据不全，等待缓冲
1298 | 			//Track完整数据，循环读取TrackEntry
1299 | 			if(BytesEq(eid0, [0x16,0x54,0xAE,0x6B])){
1300 | 				while(pos0[0]<bytes0.length){
1301 | 					var eid1=readMatroskaVInt(bytes0, pos0);
1302 | 					var bytes1=readMatroskaBlock(bytes0, pos0);
1303 | 					var pos1=[0],track={channels:0,sampleRate:0};
1304 | 					if(BytesEq(eid1, [0xAE])){//TrackEntry
1305 | 						while(pos1[0]<bytes1.length){
1306 | 							var eid2=readMatroskaVInt(bytes1, pos1);
1307 | 							var bytes2=readMatroskaBlock(bytes1, pos1);
1308 | 							var pos2=[0];
1309 | 							if(BytesEq(eid2, [0xD7])){//Track Number
1310 | 								var val=BytesInt(bytes2);
1311 | 								track.number=val;
1312 | 								tracks[val]=track;
1313 | 							}else if(BytesEq(eid2, [0x83])){//Track Type
1314 | 								var val=BytesInt(bytes2);
1315 | 								if(val==1) track.type="video";
1316 | 								else if(val==2) {
1317 | 									track.type="audio";
1318 | 									if(!audioIdx) scope.track0=track;
1319 | 									track.idx=audioIdx++;
1320 | 								}else track.type="Type-"+val;
1321 | 							}else if(BytesEq(eid2, [0x86])){//Track Codec
1322 | 								var str="";
1323 | 								for(var i=0;i<bytes2.length;i++){
1324 | 									str+=String.fromCharCode(bytes2[i]);
1325 | 								}
1326 | 								track.codec=str;
1327 | 							}else if(BytesEq(eid2, [0xE1])){
1328 | 								while(pos2[0]<bytes2.length){//循环读取 Audio 属性
1329 | 									var eid3=readMatroskaVInt(bytes2, pos2);
1330 | 									var bytes3=readMatroskaBlock(bytes2, pos2);
1331 | 									//采样率、位数、声道数
1332 | 									if(BytesEq(eid3, [0xB5])){
1333 | 										var val=0,arr=new Uint8Array(bytes3.reverse()).buffer;
1334 | 										if(bytes3.length==4) val=new Float32Array(arr)[0];
1335 | 										else if(bytes3.length==8) val=new Float64Array(arr)[0];
1336 | 										else CLog("WebM Track !Float",1,bytes3);
1337 | 										track[sampleRateTxt]=Math.round(val);
1338 | 									}else if(BytesEq(eid3, [0x62,0x64])) track.bitDepth=BytesInt(bytes3);
1339 | 									else if(BytesEq(eid3, [0x9F])) track.channels=BytesInt(bytes3);
1340 | 								}
1341 | 							}
1342 | 						}
1343 | 					}
1344 | 				};
1345 | 				scope._ht=1;
1346 | 				CLog("WebM Tracks",tracks);
1347 | 				endPos();
1348 | 				break;
1349 | 			}
1350 | 		}
1351 | 	}
1352 | 	
1353 | 	//校验音频参数信息，如果不符合代码要求，统统拒绝处理
1354 | 	var track0=scope.track0;
1355 | 	if(!track0)return;
1356 | 	if(track0.bitDepth==16 && /FLOAT/i.test(track0.codec)){
1357 | 		track0.bitDepth=32; //chrome v66 实际为浮点数
1358 | 		CLog("WebM 16改32位",3);
1359 | 	}
1360 | 	if(track0[sampleRateTxt]!=scope[sampleRateTxt] || track0.bitDepth!=32 || track0.channels<1 || !/(\b|_)PCM\b/i.test(track0.codec)){
1361 | 		scope.bytes=[];//格式非预期 无法处理，清空缓冲数据
1362 | 		if(!scope.bad)CLog("WebM Track非预期",3,scope);
1363 | 		scope.bad=1;
1364 | 		return -1;
1365 | 	}
1366 | 	
1367 | 	//循环读取Cluster内的SimpleBlock
1368 | 	var datas=[],dataLen=0;
1369 | 	while(position[0]<bytes.length){
1370 | 		var eid1=readMatroskaVInt(bytes, position);
1371 | 		var bytes1=readMatroskaBlock(bytes, position);
1372 | 		if(!bytes1)break;//数据不全，等待缓冲
1373 | 		if(BytesEq(eid1, [0xA3])){//SimpleBlock完整数据
1374 | 			var trackNo=bytes1[0]&0xf;
1375 | 			var track=tracks[trackNo];
1376 | 			if(!track){//不可能没有，数据出错？
1377 | 				CLog("WebM !Track"+trackNo,1,tracks);
1378 | 			}else if(track.idx===0){
1379 | 				var u8arr=new Uint8Array(bytes1.length-4);
1380 | 				for(var i=4;i<bytes1.length;i++){
1381 | 					u8arr[i-4]=bytes1[i];
1382 | 				}
1383 | 				datas.push(u8arr); dataLen+=u8arr.length;
1384 | 			}
1385 | 		}
1386 | 		endPos();
1387 | 	}
1388 | 	
1389 | 	if(dataLen){
1390 | 		var more=new Uint8Array(bytes.length-scope.pos[0]);
1391 | 		more.set(bytes.subarray(scope.pos[0]));
1392 | 		scope.bytes=more; //清理已读取了的缓冲数据
1393 | 		scope.pos[0]=0;
1394 | 		
1395 | 		var u8arr=new Uint8Array(dataLen); //已获取的音频数据
1396 | 		for(var i=0,i2=0;i<datas.length;i++){
1397 | 			u8arr.set(datas[i],i2);
1398 | 			i2+=datas[i].length;
1399 | 		}
1400 | 		var arr=new Float32Array(u8arr.buffer);
1401 | 		
1402 | 		if(track0.channels>1){//多声道，提取一个声道
1403 | 			var arr2=[];
1404 | 			for(var i=0;i<arr.length;){
1405 | 				arr2.push(arr[i]);
1406 | 				i+=track0.channels;
1407 | 			}
1408 | 			arr=new Float32Array(arr2);
1409 | 		};
1410 | 		return arr;
1411 | 	}
1412 | };
1413 | //两个字节数组内容是否相同
1414 | var BytesEq=function(bytes1,bytes2){
1415 | 	if(!bytes1 || bytes1.length!=bytes2.length) return false;
1416 | 	if(bytes1.length==1) return bytes1[0]==bytes2[0];
1417 | 	for(var i=0;i<bytes1.length;i++){
1418 | 		if(bytes1[i]!=bytes2[i]) return false;
1419 | 	}
1420 | 	return true;
1421 | };
1422 | //字节数组BE转成int数字
1423 | var BytesInt=function(bytes){
1424 | 	var s="";//0-8字节，js位运算只支持4字节
1425 | 	for(var i=0;i<bytes.length;i++){var n=bytes[i];s+=(n<16?"0":"")+n.toString(16)};
1426 | 	return parseInt(s,16)||0;
1427 | };
1428 | //读取一个可变长数值字节数组
1429 | var readMatroskaVInt=function(arr,pos,trim){
1430 | 	var i=pos[0];
1431 | 	if(i>=arr.length)return;
1432 | 	var b0=arr[i],b2=("0000000"+b0.toString(2)).substr(-8);
1433 | 	var m=/^(0*1)(\d*)$/.exec(b2);
1434 | 	if(!m)return;
1435 | 	var len=m[1].length, val=[];
1436 | 	if(i+len>arr.length)return;
1437 | 	for(var i2=0;i2<len;i2++){ val[i2]=arr[i]; i++; }
1438 | 	if(trim) val[0]=parseInt(m[2]||'0',2);
1439 | 	pos[0]=i;
1440 | 	return val;
1441 | };
1442 | //读取一个自带长度的内容字节数组
1443 | var readMatroskaBlock=function(arr,pos){
1444 | 	var lenVal=readMatroskaVInt(arr,pos,1);
1445 | 	if(!lenVal)return;
1446 | 	var len=BytesInt(lenVal);
1447 | 	var i=pos[0], val=[];
1448 | 	if(len<0x7FFFFFFF){ //超大值代表没有长度
1449 | 		if(i+len>arr.length)return;
1450 | 		for(var i2=0;i2<len;i2++){ val[i2]=arr[i]; i++; }
1451 | 	}
1452 | 	pos[0]=i;
1453 | 	return val;
1454 | };
1455 | //=====End WebM读取=====
1456 | 
1457 | 
1458 | 
1459 | 
1460 | //流量统计用1像素图片地址，设置为空将不参与统计
1461 | Recorder.TrafficImgUrl="//ia.51.la/go1?id=20469973&pvFlag=1";
1462 | var Traffic=Recorder.Traffic=function(report){
1463 | 	report=report?"/"+RecTxt+"/Report/"+report:"";
1464 | 	var imgUrl=Recorder.TrafficImgUrl;
1465 | 	if(imgUrl){
1466 | 		var data=Recorder.Traffic;
1467 | 		var m=/^(https?:..[^\/#]*\/?)[^#]*/i.exec(location.href)||[];
1468 | 		var host=(m[1]||"http://file/");
1469 | 		var idf=(m[0]||host)+report;
1470 | 		
1471 | 		if(imgUrl.indexOf("//")==0){
1472 | 			//给url加上http前缀，如果是file协议下，不加前缀没法用
1473 | 			if(/^https:/i.test(idf)){
1474 | 				imgUrl="https:"+imgUrl;
1475 | 			}else{
1476 | 				imgUrl="http:"+imgUrl;
1477 | 			};
1478 | 		};
1479 | 		if(report){
1480 | 			imgUrl=imgUrl+"&cu="+encodeURIComponent(host+report);
1481 | 		};
1482 | 		
1483 | 		if(!data[idf]){
1484 | 			data[idf]=1;
1485 | 			
1486 | 			var img=new Image();
1487 | 			img.src=imgUrl;
1488 | 			CLog("Traffic Analysis Image: "+(report||RecTxt+".TrafficImgUrl="+Recorder.TrafficImgUrl));
1489 | 		};
1490 | 	};
1491 | };
1492 | 
1493 | }));


--------------------------------------------------------------------------------
/web/wav.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | wav编码器+编码引擎
 3 | https://github.com/xiangyuecn/Recorder
 4 | 
 5 | 当然最佳推荐使用mp3、wav格式，代码也是优先照顾这两种格式
 6 | 浏览器支持情况
 7 | https://developer.mozilla.org/en-US/docs/Web/HTML/Supported_media_formats
 8 | 
 9 | 编码原理：给pcm数据加上一个44直接的wav头即成wav文件；pcm数据就是Recorder中的buffers原始数据（重新采样），16位时为LE小端模式（Little Endian），实质上是未经过任何编码处理
10 | */
11 | (function(){
12 | "use strict";
13 | 
14 | Recorder.prototype.enc_wav={
15 | 	stable:true
16 | 	,testmsg:"支持位数8位、16位（填在比特率里面），采样率取值无限制"
17 | };
18 | Recorder.prototype.wav=function(res,True,False){
19 | 		var This=this,set=This.set
20 | 			,size=res.length
21 | 			,sampleRate=set.sampleRate
22 | 			,bitRate=set.bitRate==8?8:16;
23 | 		
24 | 		//编码数据 https://github.com/mattdiamond/Recorderjs https://www.cnblogs.com/blqw/p/3782420.html https://www.cnblogs.com/xiaoqi/p/6993912.html
25 | 		var dataLength=size*(bitRate/8);
26 | 		var buffer=new ArrayBuffer(44+dataLength);
27 | 		var data=new DataView(buffer);
28 | 		
29 | 		var offset=0;
30 | 		var writeString=function(str){
31 | 			for (var i=0;i<str.length;i++,offset++) {
32 | 				data.setUint8(offset,str.charCodeAt(i));
33 | 			};
34 | 		};
35 | 		var write16=function(v){
36 | 			data.setUint16(offset,v,true);
37 | 			offset+=2;
38 | 		};
39 | 		var write32=function(v){
40 | 			data.setUint32(offset,v,true);
41 | 			offset+=4;
42 | 		};
43 | 		
44 | 		/* RIFF identifier */
45 | 		writeString('RIFF');
46 | 		/* RIFF chunk length */
47 | 		write32(36+dataLength);
48 | 		/* RIFF type */
49 | 		writeString('WAVE');
50 | 		/* format chunk identifier */
51 | 		writeString('fmt ');
52 | 		/* format chunk length */
53 | 		write32(16);
54 | 		/* sample format (raw) */
55 | 		write16(1);
56 | 		/* channel count */
57 | 		write16(1);
58 | 		/* sample rate */
59 | 		write32(sampleRate);
60 | 		/* byte rate (sample rate * block align) */
61 | 		write32(sampleRate*(bitRate/8));// *1 声道
62 | 		/* block align (channel count * bytes per sample) */
63 | 		write16(bitRate/8);// *1 声道
64 | 		/* bits per sample */
65 | 		write16(bitRate);
66 | 		/* data chunk identifier */
67 | 		writeString('data');
68 | 		/* data chunk length */
69 | 		write32(dataLength);
70 | 		// 写入采样数据
71 | 		if(bitRate==8) {
72 | 			for(var i=0;i<size;i++,offset++) {
73 | 				//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点，虽然都有明显杂音
74 | 				var val=(res[i]>>8)+128;
75 | 				data.setInt8(offset,val,true);
76 | 			};
77 | 		}else{
78 | 			for (var i=0;i<size;i++,offset+=2){
79 | 				data.setInt16(offset,res[i],true);
80 | 			};
81 | 		};
82 | 		
83 | 		
84 | 		True(new Blob([data.buffer],{type:"audio/wav"}));
85 | 	}
86 | })();


--------------------------------------------------------------------------------
/web/wsconnecter.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3 |  * Reserved. MIT License  (https://opensource.org/licenses/MIT)
  4 |  */
  5 | /* 2021-2023 by zhaoming,mali aihealthx.com */
  6 | 
  7 | function WebSocketConnectMethod(config) { //定义socket连接方法类
  8 | 	var speechSokt;
  9 | 	var connKeeperID;
 10 | 
 11 | 	var msgHandle = config.msgHandle;
 12 | 	var stateHandle = config.stateHandle;
 13 | 
 14 | 	this.wsStart = function () {
 15 | 		var Uri = document.getElementById('wssip').value; //"wss://111.205.137.58:5821/wss/" //设置wss asr online接口地址 如 wss://X.X.X.X:port/wss/
 16 | 		if (Uri.match(/wss:\S*|ws:\S*/)) {
 17 | 			console.log("Uri" + Uri);
 18 | 		}
 19 | 		else {
 20 | 			alert("请检查wss地址正确性");
 21 | 			return 0;
 22 | 		}
 23 | 
 24 | 		if ('WebSocket' in window) {
 25 | 			speechSokt = new WebSocket(Uri, ['binary']); // 定义socket连接对象
 26 | 			speechSokt.onopen = function (e) { onOpen(e); }; // 定义响应函数
 27 | 			speechSokt.onclose = function (e) {
 28 | 				console.log("onclose ws!");
 29 | 				//speechSokt.close();
 30 | 				onClose(e);
 31 | 			};
 32 | 			speechSokt.onmessage = function (e) { onMessage(e); };
 33 | 			speechSokt.onerror = function (e) { onError(e); };
 34 | 			return 1;
 35 | 		}
 36 | 		else {
 37 | 			alert('当前浏览器不支持 WebSocket');
 38 | 			return 0;
 39 | 		}
 40 | 	};
 41 | 
 42 | 	// 定义停止与发送函数
 43 | 	this.wsStop = function () {
 44 | 		if (speechSokt != undefined) {
 45 | 			console.log("stop ws!");
 46 | 			speechSokt.close();
 47 | 		}
 48 | 	};
 49 | 
 50 | 	this.wsSend = function (oneData) {
 51 | 
 52 | 		if (speechSokt == undefined) return;
 53 | 		if (speechSokt.readyState === 1) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED
 54 | 
 55 | 			speechSokt.send(oneData);
 56 | 
 57 | 
 58 | 		}
 59 | 	};
 60 | 
 61 | 	// SOCEKT连接中的消息与状态响应
 62 | 	function onOpen(e) {
 63 | 		// 发送json
 64 | 		var chunk_size = new Array(5, 10, 5);
 65 | 		var request = {
 66 | 			"chunk_size": chunk_size,
 67 | 			"wav_name": "h5",
 68 | 			"is_speaking": true,
 69 | 			"chunk_interval": 10,
 70 | 			"itn": getUseITN(),
 71 | 			"mode": getAsrMode(),
 72 | 
 73 | 		};
 74 | 		if (isfilemode) {
 75 | 			request.wav_format = file_ext;
 76 | 			if (file_ext == "wav") {
 77 | 				request.wav_format = "PCM";
 78 | 				request.audio_fs = file_sample_rate;
 79 | 			}
 80 | 		}
 81 | 
 82 | 		var hotwords = getHotwords();
 83 | 
 84 | 		if (hotwords != null) {
 85 | 			request.hotwords = hotwords;
 86 | 		}
 87 | 		console.log(JSON.stringify(request));
 88 | 		speechSokt.send(JSON.stringify(request));
 89 | 		console.log("连接成功");
 90 | 		stateHandle(0);
 91 | 
 92 | 	}
 93 | 
 94 | 	function onClose(e) {
 95 | 		stateHandle(1);
 96 | 	}
 97 | 
 98 | 	function onMessage(e) {
 99 | 
100 | 		msgHandle(e);
101 | 	}
102 | 
103 | 	function onError(e) {
104 | 
105 | 		info_div.innerHTML = "连接" + e;
106 | 		console.log(e);
107 | 		stateHandle(2);
108 | 
109 | 	}
110 | 
111 | 
112 | }


--------------------------------------------------------------------------------
/webui.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import json
 3 | import os
 4 | from fastapi import FastAPI
 5 | from fastapi.responses import FileResponse
 6 | from fastapi.staticfiles import StaticFiles
 7 | from fastapi.middleware.cors import CORSMiddleware
 8 | import uvicorn
 9 | from loguru import logger
10 | 
11 | logger.add("webui-log.txt", level="INFO", rotation="200 MB")
12 | 
13 | app = FastAPI()
14 | 
15 | # 允许跨域
16 | app.add_middleware(
17 |     CORSMiddleware,
18 |     allow_origins=["*"],
19 |     allow_credentials=True,
20 |     allow_methods=["*"],
21 |     allow_headers=["*"],
22 | )
23 | 
24 | # 设置静态文件路径
25 | app.mount("/web", StaticFiles(directory="web"), name="web")
26 | 
27 | # 路由到静态的 index.html
28 | @app.get("/")
29 | async def get_index():
30 |     return FileResponse("web/index.html")
31 | 
32 | # 在程序退出前执行stop_program()函数
33 | @app.on_event("shutdown")
34 | async def shutdown_event():
35 |     os._exit(0)
36 | 
37 | if __name__ == "__main__":
38 |     uvicorn.run(app, host="0.0.0.0", port=8101)


--------------------------------------------------------------------------------