├── .gitignore ├── .python-version ├── README.md ├── api_server.py ├── archive ├── audio-client.js ├── base64_decode.py ├── hello.py ├── main.py └── vad_test.html ├── audio_agent.py ├── cert.pem ├── key.pem ├── pyproject.toml ├── requirements.txt ├── start_https_server.bat ├── start_https_server.sh └── static ├── css └── styles.css ├── favicon.svg ├── index.html └── js └── app.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | # Virtual environments 10 | .venv 11 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 智能语音对话系统 2 | 3 | 基于阿里云千问Qwen-Omni模型的网页端语音对话系统,支持实时语音识别、自然语言处理和语音合成。 4 | 5 | ## 功能特点 6 | 7 | - **实时语音检测**:采用VAD (Voice Activity Detection) 技术,自动检测用户语音 8 | - **语音识别与合成**:将用户语音转为文本,模型回复同时支持文本与语音输出 9 | - **多轮对话记忆**:保留最近5轮对话历史,提供连贯的交互体验 10 | - **优化的性能**: 11 | - TCP连接保持活跃,减少连接建立时间 12 | - WAV头预缓存,优化音频处理 13 | - 响应GZip压缩,减少网络传输量 14 | - 对话历史管理,限制内存使用 15 | 16 | ## 系统架构 17 | 18 | ### 后端组件 19 | 20 | - **api_server.py**: FastAPI服务器,处理HTTP请求,提供REST API 21 | - **audio_agent.py**: 核心音频处理代理,与千问大模型交互 22 | 23 | ### 前端组件 24 | 25 | - **static/index.html**: 主页面 26 | - **static/js/app.js**: 主应用逻辑,处理用户界面与交互 27 | - **static/css/**: 样式文件 28 | - **static/favicon.svg**: 网站图标 29 | 30 | ### 文件结构 31 | 32 | ``` 33 | omni_vad_demo/ 34 | ├── api_server.py # FastAPI服务器入口 35 | ├── audio_agent.py # 音频处理代理 36 | ├── requirements.txt # Python依赖 37 | ├── start_https_server.sh # Linux/Mac启动脚本 38 | ├── start_https_server.bat # Windows启动脚本 39 | ├── cert.pem # SSL证书 40 | ├── key.pem # SSL密钥 41 | ├── static/ # 静态资源目录 42 | │ ├── index.html # 主HTML页面 43 | │ ├── favicon.svg # 网站图标 44 | │ ├── css/ # CSS样式 45 | │ └── js/ # JavaScript文件 46 | │ └── app.js # 主应用逻辑 47 | └── archive/ # 归档的冗余文件 48 | ├── base64_decode.py # 测试脚本 49 | ├── hello.py # 测试脚本 50 | ├── main.py # 旧版服务器 51 | ├── audio-client.js # 旧版音频客户端库 52 | └── vad_test.html # 旧版HTML页面 53 | ``` 54 | 55 | ## 安装部署 56 | 57 | ### 环境要求 58 | 59 | - Python 3.8+ 60 | - 阿里云千问API密钥 61 | - HTTPS支持(浏览器中使用麦克风需要HTTPS) 62 | 63 | ### 步骤 64 | 65 | 1. **克隆仓库并安装依赖**: 66 | 67 | ```bash 68 | git clone <仓库URL> 69 | cd omni_vad_demo 70 | pip install -r requirements.txt 71 | ``` 72 | 73 | 2. **配置API密钥**: 74 | 75 | 设置环境变量`DASHSCOPE_API_KEY`为您的阿里云千问API密钥: 76 | 77 | ```bash 78 | # Linux/Mac 79 | export DASHSCOPE_API_KEY="你的API密钥" 80 | 81 | # Windows 82 | set DASHSCOPE_API_KEY=你的API密钥 83 | ``` 84 | 85 | 3. **HTTPS证书配置**: 86 | 87 | 对于本地开发,可以使用mkcert生成自签名证书: 88 | 89 | ```bash 90 | # 安装mkcert 91 | # Windows: choco install mkcert 92 | # MacOS: brew install mkcert 93 | 94 | # 生成证书 95 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 你的IP地址 96 | ``` 97 | 98 | 4. **启动服务**: 99 | 100 | ```bash 101 | # Windows 102 | .\start_https_server.bat 103 | 104 | # Linux/Mac 105 | ./start_https_server.sh 106 | ``` 107 | 108 | 默认情况下,服务将在`https://localhost:8000`上运行。 109 | 110 | ## 使用方法 111 | 112 | 1. 使用HTTPS在浏览器中访问服务 113 | 2. 点击"启动对话"按钮授予麦克风权限 114 | 3. 开始说话,系统会自动检测语音并处理 115 | 4. 点击"清除历史"按钮可以开始新的对话 116 | 5. 点击"结束对话"按钮停止服务 117 | 118 | ## 性能优化 119 | 120 | 系统已进行多项性能优化: 121 | 122 | 1. **音频处理优化**: 123 | - WAV头预缓存,避免重复生成 124 | - 使用BytesIO减少内存使用 125 | 126 | 2. **响应优化**: 127 | - GZip压缩响应,减少网络传输 128 | - 对话历史限制,控制内存使用 129 | 130 | 3. **提示词优化**: 131 | - 避免重复提示词,提高对话效率 132 | - 系统提示词指导模型更简洁回答 133 | 134 | ## 注意事项 135 | 136 | - 浏览器访问必须使用HTTPS(因为麦克风访问需要安全上下文) 137 | - 确保API密钥有效且有足够的调用额度 138 | - iOS设备可能需要用户交互才能播放音频 139 | - 项目默认限制对话历史为5轮 140 | - 若要支持多进程,使用命令行启动:`uvicorn api_server:app --host=0.0.0.0 --port=8000 --ssl-keyfile=key.pem --ssl-certfile=cert.pem --workers=4` 141 | 142 | ## 技术详情 143 | 144 | - 前端使用纯JavaScript,无框架依赖 145 | - 使用WebAPI MediaRecorder录制音频 146 | - 使用@ricky0123/vad-web进行语音活动检测 147 | - 使用WebAPI SpeechSynthesis作为备用语音合成 148 | - 后端使用FastAPI和Uvicorn 149 | - 使用OpenAI兼容方式调用阿里云千问模型 150 | -------------------------------------------------------------------------------- /api_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | import uvicorn 4 | import time 5 | from fastapi import FastAPI, File, UploadFile, Form, HTTPException 6 | from fastapi.responses import JSONResponse, Response, RedirectResponse 7 | from fastapi.middleware.cors import CORSMiddleware 8 | from fastapi.middleware.gzip import GZipMiddleware 9 | from fastapi.staticfiles import StaticFiles 10 | from typing import Optional 11 | from pydantic import BaseModel 12 | import logging 13 | 14 | from audio_agent import audio_agent 15 | 16 | # 配置日志 17 | logging.basicConfig( 18 | level=logging.INFO, 19 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 20 | ) 21 | logger = logging.getLogger(__name__) 22 | 23 | # 设置一些库的日志级别为WARNING,减少非关键日志 24 | logging.getLogger("httpx").setLevel(logging.WARNING) 25 | logging.getLogger("uvicorn").setLevel(logging.WARNING) 26 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING) 27 | 28 | app = FastAPI(title="音频处理API", description="处理音频并通过大模型获取回复的API服务") 29 | 30 | # 添加Gzip压缩中间件,对大于1000字节的响应进行压缩,提高传输效率 31 | app.add_middleware(GZipMiddleware, minimum_size=1000) 32 | 33 | # 添加CORS中间件 34 | app.add_middleware( 35 | CORSMiddleware, 36 | allow_origins=["*"], 37 | allow_credentials=True, 38 | allow_methods=["*"], 39 | allow_headers=["*"], 40 | ) 41 | 42 | # 挂载静态文件 43 | app.mount("/static", StaticFiles(directory="static"), name="static") 44 | 45 | # 添加favicon.ico路由 46 | @app.get("/favicon.ico") 47 | async def get_favicon(): 48 | """处理favicon.ico请求""" 49 | return RedirectResponse(url="/static/favicon.svg") 50 | 51 | class AudioRequest(BaseModel): 52 | audio_data: str 53 | text_prompt: str = "这段音频在说什么" 54 | audio_format: str = "webm" # 默认使用webm格式,前端现在发送的是wav 55 | 56 | @app.post("/process_audio") 57 | async def process_audio(request: AudioRequest): 58 | start_time = time.time() 59 | try: 60 | # 记录请求大小和格式 61 | request_size = len(request.audio_data) 62 | logger.info(f"收到音频请求,大小: {request_size} 字节,格式: {request.audio_format}") 63 | 64 | # 解码base64音频数据 65 | decode_start = time.time() 66 | audio_bytes = base64.b64decode(request.audio_data) 67 | decode_time = time.time() - decode_start 68 | logger.info(f"base64解码耗时: {decode_time:.2f}秒") 69 | 70 | # 处理音频,传递格式参数 71 | process_start = time.time() 72 | result = audio_agent.process_audio(audio_bytes, request.text_prompt, request.audio_format) 73 | process_time = time.time() - process_start 74 | logger.info(f"音频处理耗时: {process_time:.2f}秒") 75 | 76 | # 构建响应 77 | response = { 78 | "text": result["text"], 79 | "audio": result.get("audio"), 80 | "usage": result.get("usage") 81 | } 82 | 83 | # 记录响应信息 84 | if response["audio"]: 85 | audio_size = len(response["audio"]) 86 | logger.info(f"返回音频数据,base64大小: {audio_size} 字节") 87 | 88 | total_time = time.time() - start_time 89 | logger.info(f"总处理时间: {total_time:.2f}秒") 90 | 91 | # 记录模型使用情况(如果可用) 92 | if response["usage"]: 93 | logger.info(f"模型用量: 提示词 {response['usage'].prompt_tokens} 词元,回复 {response['usage'].completion_tokens} 词元,总计 {response['usage'].total_tokens} 词元") 94 | 95 | return response 96 | 97 | except Exception as e: 98 | logger.error(f"处理音频时出错: {str(e)}", exc_info=True) 99 | raise HTTPException(status_code=500, detail=str(e)) 100 | 101 | @app.post("/clear_history") 102 | async def clear_chat_history(): 103 | """清除对话历史记录""" 104 | try: 105 | audio_agent.clear_history() 106 | return {"status": "success", "message": "对话历史已清除"} 107 | except Exception as e: 108 | logger.error(f"清除对话历史时出错: {str(e)}", exc_info=True) 109 | raise HTTPException(status_code=500, detail=str(e)) 110 | 111 | @app.get("/health") 112 | async def health_check(): 113 | """健康检查端点""" 114 | return {"status": "healthy"} 115 | 116 | @app.get("/") 117 | async def redirect_to_index(): 118 | """重定向到前端页面""" 119 | from fastapi.responses import RedirectResponse 120 | return RedirectResponse(url="/static/index.html") 121 | 122 | if __name__ == "__main__": 123 | # 获取端口,默认为8000 124 | port = int(os.environ.get("PORT", 8000)) 125 | 126 | # 检查是否存在SSL证书和密钥 127 | ssl_keyfile = os.environ.get("SSL_KEYFILE", "key.pem") 128 | ssl_certfile = os.environ.get("SSL_CERTFILE", "cert.pem") 129 | 130 | # 如果证书和密钥文件存在,则启用HTTPS 131 | ssl_enabled = os.path.exists(ssl_keyfile) and os.path.exists(ssl_certfile) 132 | 133 | workers = min(4, os.cpu_count() or 1) # 根据CPU核心数设置工作进程数 134 | 135 | if ssl_enabled: 136 | logger.info(f"使用HTTPS启动服务,证书: {ssl_certfile}, 密钥: {ssl_keyfile}") 137 | # 在以下情况下不使用多工作进程 138 | if workers > 1: 139 | logger.warning("使用SSL时,必须通过命令行启动多工作进程。将使用单进程模式。") 140 | logger.info("如需多工作进程,请使用: uvicorn api_server:app --host=0.0.0.0 --port={} --ssl-keyfile={} --ssl-certfile={} --workers={}".format( 141 | port, ssl_keyfile, ssl_certfile, workers 142 | )) 143 | workers = 1 144 | # 启动HTTPS服务器 145 | uvicorn.run(app, host="0.0.0.0", port=port, ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile) 146 | else: 147 | logger.warning( 148 | "未找到SSL证书和密钥文件,将使用HTTP启动服务。" 149 | "注意: 浏览器中使用麦克风功能需要HTTPS连接。" 150 | "可以使用以下命令生成自签名证书:\n" 151 | "choco install mkcert # Windows\n" 152 | "brew install mkcert # MacOS\n" 153 | "mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 你的IP地址" 154 | ) 155 | # 在以下情况下不使用多工作进程 156 | if workers > 1: 157 | logger.info("要使用{}个工作进程,请使用命令: uvicorn api_server:app --host=0.0.0.0 --port={} --workers={}".format( 158 | workers, port, workers 159 | )) 160 | workers = 1 161 | # 启动HTTP服务器 162 | uvicorn.run(app, host="0.0.0.0", port=port) -------------------------------------------------------------------------------- /archive/audio-client.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 音频处理客户端 3 | * 用于录制音频并与后端API交互 4 | */ 5 | class AudioClient { 6 | /** 7 | * 初始化音频客户端 8 | * @param {Object} config 配置参数 9 | * @param {string} config.apiUrl API基础URL 10 | * @param {string} config.processingEndpoint 处理音频的端点 11 | * @param {boolean} config.debug 是否启用调试模式 12 | * @param {number} config.defaultRecordingDuration 默认录音时长(毫秒),默认5000ms 13 | */ 14 | constructor(config = {}) { 15 | this.config = { 16 | apiUrl: config.apiUrl || 'http://localhost:8000', 17 | processingEndpoint: config.processingEndpoint || '/process_audio', 18 | debug: config.debug || false, 19 | defaultRecordingDuration: config.defaultRecordingDuration || 5000, 20 | }; 21 | 22 | this.mediaRecorder = null; 23 | this.audioChunks = []; 24 | this.isRecording = false; 25 | this.stream = null; 26 | this.recordingTimer = null; 27 | 28 | // 绑定方法 29 | this.startRecording = this.startRecording.bind(this); 30 | this.stopRecording = this.stopRecording.bind(this); 31 | this.processAudio = this.processAudio.bind(this); 32 | } 33 | 34 | /** 35 | * 开始录制音频 36 | * @param {number} duration 录音时长(毫秒),如果提供则在指定时间后自动停止 37 | * @returns {Promise} 返回一个Promise,开始录制时resolve 38 | */ 39 | startRecording(duration) { 40 | if (this.isRecording) { 41 | if (this.config.debug) console.log('已经在录音中'); 42 | return Promise.resolve(); 43 | } 44 | 45 | // 清除可能存在的定时器 46 | if (this.recordingTimer) { 47 | clearTimeout(this.recordingTimer); 48 | this.recordingTimer = null; 49 | } 50 | 51 | this.audioChunks = []; 52 | 53 | return navigator.mediaDevices.getUserMedia({ audio: true }) 54 | .then(stream => { 55 | this.stream = stream; 56 | 57 | // 创建MediaRecorder实例,使用适当的音频格式 58 | const options = { mimeType: 'audio/webm;codecs=opus' }; 59 | try { 60 | this.mediaRecorder = new MediaRecorder(stream, options); 61 | } catch (e) { 62 | // 如果不支持webm格式,尝试使用默认格式 63 | this.mediaRecorder = new MediaRecorder(stream); 64 | } 65 | 66 | this.mediaRecorder.addEventListener('dataavailable', event => { 67 | if (event.data.size > 0) this.audioChunks.push(event.data); 68 | }); 69 | 70 | // 设置录音数据收集间隔为100ms,确保有足够的数据块 71 | this.mediaRecorder.start(100); 72 | this.isRecording = true; 73 | 74 | if (this.config.debug) console.log('开始录音'); 75 | 76 | // 如果设置了时长,则在指定时间后自动停止 77 | const recordingDuration = duration || this.config.defaultRecordingDuration; 78 | if (recordingDuration > 0) { 79 | this.recordingTimer = setTimeout(() => { 80 | if (this.isRecording) { 81 | if (this.config.debug) console.log(`录音达到设定时长 ${recordingDuration}ms,自动停止`); 82 | this.stopRecording(); 83 | } 84 | }, recordingDuration); 85 | } 86 | 87 | return Promise.resolve(); 88 | }) 89 | .catch(error => { 90 | console.error('获取麦克风权限失败:', error); 91 | return Promise.reject(error); 92 | }); 93 | } 94 | 95 | /** 96 | * 停止录制音频 97 | * @returns {Promise} 返回一个Promise,包含录制的Blob 98 | */ 99 | stopRecording() { 100 | if (!this.isRecording) { 101 | return Promise.resolve(null); 102 | } 103 | 104 | // 清除定时器 105 | if (this.recordingTimer) { 106 | clearTimeout(this.recordingTimer); 107 | this.recordingTimer = null; 108 | } 109 | 110 | return new Promise(resolve => { 111 | this.mediaRecorder.addEventListener('stop', () => { 112 | // 停止所有音轨 113 | if (this.stream) { 114 | this.stream.getTracks().forEach(track => track.stop()); 115 | } 116 | 117 | // 将录制的数据块合并为一个Blob 118 | // 使用webm作为MIME类型,因为这是MediaRecorder的原生格式 119 | const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' }); 120 | this.isRecording = false; 121 | 122 | if (this.config.debug) { 123 | console.log('停止录音,录制了 ' + this.audioChunks.length + ' 个数据块'); 124 | console.log('音频大小: ' + audioBlob.size + ' 字节'); 125 | } 126 | 127 | resolve(audioBlob); 128 | }); 129 | 130 | this.mediaRecorder.stop(); 131 | }); 132 | } 133 | 134 | /** 135 | * 将Blob转换为Base64 136 | * @param {Blob} blob 要转换的Blob 137 | * @returns {Promise} 返回一个Promise,包含base64编码的字符串 138 | */ 139 | blobToBase64(blob) { 140 | return new Promise((resolve, reject) => { 141 | const reader = new FileReader(); 142 | reader.onloadend = () => { 143 | // 移除data URL前缀 144 | const base64 = reader.result.split(',')[1]; 145 | resolve(base64); 146 | }; 147 | reader.onerror = reject; 148 | reader.readAsDataURL(blob); 149 | }); 150 | } 151 | 152 | /** 153 | * 将音频发送到服务器处理 154 | * @param {Blob} audioBlob 音频Blob 155 | * @param {Object} options 选项 156 | * @param {string} options.prompt 提示文本 157 | * @returns {Promise} 返回一个Promise,包含服务器响应 158 | */ 159 | processAudio(audioBlob, options = {}) { 160 | if (!audioBlob) { 161 | return Promise.reject(new Error('没有音频数据')); 162 | } 163 | 164 | // 将Blob转换为Base64 165 | return this.blobToBase64(audioBlob) 166 | .then(base64Audio => { 167 | // 准备请求数据 168 | const requestData = { 169 | audio_data: base64Audio, 170 | text_prompt: options.prompt || '这段音频在说什么' 171 | }; 172 | 173 | // 发送请求,添加超时处理 174 | const timeout = 120000; // 120秒超时 175 | const controller = new AbortController(); 176 | const timeoutId = setTimeout(() => controller.abort(), timeout); 177 | 178 | return fetch(`${this.config.apiUrl}${this.config.processingEndpoint}`, { 179 | method: 'POST', 180 | headers: { 181 | 'Content-Type': 'application/json' 182 | }, 183 | body: JSON.stringify(requestData), 184 | signal: controller.signal 185 | }) 186 | .then(response => { 187 | clearTimeout(timeoutId); 188 | if (!response.ok) { 189 | throw new Error(`HTTP错误! 状态: ${response.status}`); 190 | } 191 | return response.json(); 192 | }) 193 | .catch(error => { 194 | clearTimeout(timeoutId); 195 | if (error.name === 'AbortError') { 196 | throw new Error('请求超时'); 197 | } 198 | throw error; 199 | }); 200 | }); 201 | } 202 | 203 | /** 204 | * 播放Base64编码的音频 205 | * @param {string} base64Audio Base64编码的音频 206 | * @returns {Promise} 返回一个Promise,音频播放完成时resolve 207 | */ 208 | playAudio(base64Audio) { 209 | return new Promise((resolve, reject) => { 210 | try { 211 | // 创建一个audio元素 212 | const audio = new Audio(); 213 | 214 | // 监听播放结束事件 215 | audio.addEventListener('ended', () => resolve()); 216 | audio.addEventListener('error', (e) => reject(e)); 217 | 218 | // 设置音频源 219 | audio.src = `data:audio/wav;base64,${base64Audio}`; 220 | 221 | // 播放音频 222 | audio.play().catch(e => { 223 | console.error('播放音频失败:', e); 224 | reject(e); 225 | }); 226 | } catch (error) { 227 | reject(error); 228 | } 229 | }); 230 | } 231 | 232 | /** 233 | * 一站式录制和处理音频 234 | * @param {Object} options 选项 235 | * @param {string} options.prompt 提示文本 236 | * @param {boolean} options.returnAudio 是否返回音频 237 | * @param {boolean} options.autoPlay 是否自动播放返回的音频 238 | * @param {number} options.duration 录音时长(毫秒),默认使用配置中的defaultRecordingDuration 239 | * @param {Function} options.onStart 开始录制时的回调 240 | * @param {Function} options.onStop 停止录制时的回调 241 | * @param {Function} options.onProcessing 处理时的回调 242 | * @param {Function} options.onResult 获得结果时的回调 243 | * @returns {Promise} 返回一个Promise,包含处理结果 244 | */ 245 | recordAndProcess(options = {}) { 246 | if (options.onStart) options.onStart(); 247 | 248 | // 使用options中的duration,如果没有则使用配置的默认值 249 | const duration = options.duration !== undefined ? options.duration : this.config.defaultRecordingDuration; 250 | 251 | return this.startRecording(duration) 252 | .then(() => { 253 | // 如果设置了duration,录音会自动停止,这里等待录音完成 254 | if (duration > 0) { 255 | return new Promise(resolve => { 256 | // 监听录音停止状态 257 | const checkRecording = setInterval(() => { 258 | if (!this.isRecording) { 259 | clearInterval(checkRecording); 260 | resolve(); 261 | } 262 | }, 100); 263 | }); 264 | } 265 | // 否则直接返回,让用户手动停止 266 | return Promise.resolve(); 267 | }) 268 | .then(() => { 269 | if (options.onStop) options.onStop(); 270 | if (this.isRecording) { 271 | return this.stopRecording(); 272 | } 273 | // 获取录音结果 274 | return this.getLastRecordingBlob(); 275 | }) 276 | .then(audioBlob => { 277 | if (!audioBlob) { 278 | throw new Error('没有录音数据'); 279 | } 280 | 281 | if (options.onProcessing) options.onProcessing(); 282 | return this.processAudio(audioBlob, { 283 | prompt: options.prompt, 284 | returnAudio: options.returnAudio 285 | }); 286 | }) 287 | .then(result => { 288 | if (options.onResult) options.onResult(result); 289 | 290 | // 如果返回了音频并设置了自动播放 291 | if (result.audio && options.autoPlay) { 292 | return this.playAudio(result.audio).then(() => result); 293 | } 294 | 295 | return result; 296 | }); 297 | } 298 | 299 | /** 300 | * 获取最后一次录音的Blob 301 | * @returns {Blob|null} 录音Blob或null 302 | */ 303 | getLastRecordingBlob() { 304 | if (this.audioChunks.length === 0) { 305 | return null; 306 | } 307 | return new Blob(this.audioChunks, { type: 'audio/webm' }); 308 | } 309 | 310 | /** 311 | * 检查浏览器是否支持所需的API 312 | * @returns {boolean} 是否支持 313 | */ 314 | static isSupported() { 315 | return !!(navigator.mediaDevices && 316 | navigator.mediaDevices.getUserMedia && 317 | window.MediaRecorder); 318 | } 319 | } 320 | 321 | // 如果在浏览器环境中,将AudioClient挂载到window对象 322 | if (typeof window !== 'undefined') { 323 | window.AudioClient = AudioClient; 324 | } -------------------------------------------------------------------------------- /archive/base64_decode.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai import OpenAI 3 | import base64 4 | import numpy as np 5 | import soundfile as sf 6 | import requests 7 | 8 | client = OpenAI( 9 | # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", 10 | api_key=os.getenv("DASHSCOPE_API_KEY"), 11 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", 12 | ) 13 | 14 | 15 | def encode_audio(audio_path): 16 | with open(audio_path, "rb") as audio_file: 17 | return base64.b64encode(audio_file.read()).decode("utf-8") 18 | 19 | 20 | base64_audio = encode_audio("welcome.mp3") 21 | 22 | completion = client.chat.completions.create( 23 | model="qwen-omni-turbo", 24 | messages=[ 25 | { 26 | "role": "system", 27 | "content": [{"type": "text", "text": "You are a helpful assistant."}], 28 | }, 29 | { 30 | "role": "user", 31 | "content": [ 32 | { 33 | "type": "input_audio", 34 | "input_audio": { 35 | "data": f"data:;base64,{base64_audio}", 36 | "format": "mp3", 37 | }, 38 | }, 39 | {"type": "text", "text": "这段音频在说什么"}, 40 | ], 41 | }, 42 | ], 43 | # 设置输出数据的模态,当前支持两种:["text","audio"]、["text"] 44 | modalities=["text", "audio"], 45 | audio={"voice": "Cherry", "format": "wav"}, 46 | # stream 必须设置为 True,否则会报错 47 | stream=True, 48 | stream_options={"include_usage": True}, 49 | ) 50 | 51 | for chunk in completion: 52 | if chunk.choices: 53 | print(chunk.choices[0].delta) 54 | else: 55 | print(chunk.usage) -------------------------------------------------------------------------------- /archive/hello.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | print("Hello from omni-vad-demo!") 3 | 4 | 5 | if __name__ == "__main__": 6 | main() 7 | -------------------------------------------------------------------------------- /archive/main.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | from fastapi import FastAPI 3 | from fastapi.middleware.cors import CORSMiddleware 4 | from pydantic import BaseModel 5 | from fastapi.staticfiles import StaticFiles 6 | from urllib.parse import urlparse 7 | import requests 8 | import os 9 | from starlette.responses import Response 10 | from starlette.types import Scope 11 | from starlette.staticfiles import StaticFiles 12 | 13 | class CacheControlledStaticFiles(StaticFiles): 14 | async def get_response(self, path: str, scope: Scope) -> Response: 15 | response = await super().get_response(path, scope) 16 | response.headers["Cache-Control"] = "public, max-age=0" 17 | return response 18 | 19 | app = FastAPI() 20 | 21 | 22 | # 或者方法2:更精确地只托管vad_test.html文件(需要自定义路由) 23 | @app.get("/vad_test") 24 | async def serve_vad_test(): 25 | with open("vad_test.html", "r", encoding="utf-8") as f: 26 | html_content = f.read() 27 | return Response(content=html_content, media_type="text/html") 28 | 29 | 30 | 31 | #uvicorn main:app --host 0.0.0.0 --reload 32 | if __name__ == "__main__": 33 | uvicorn.run(app, host="0.0.0.0", port=8000) 34 | 35 | 36 | #choco install mkcert 37 | #mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 192.168.50.250 38 | 39 | #uvicorn main:app --host 0.0.0.0 --port 8000 --ssl-keyfile key.pem --ssl-certfile cert.pem 40 | 41 | #https://192.168.50.250:8000/vad_test 42 | -------------------------------------------------------------------------------- /archive/vad_test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 智能语音对话系统 7 | 8 | 385 | 386 | 387 |

智能语音对话系统

388 |

请允许麦克风访问权限,开始与AI对话

389 | 390 |
391 |
392 |
等待语音输入...
393 |
394 |
395 | 396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 | 410 |
411 | 414 | 417 |
418 | 419 |
420 |
421 |

对话记录

422 |
423 |
424 | 425 |
426 |

系统日志

427 |
428 |
429 |
430 |
431 | 432 | 433 | 434 | 435 | 436 | 985 | 986 | -------------------------------------------------------------------------------- /audio_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | import struct 4 | import time 5 | from agno.agent import Agent 6 | from openai import OpenAI 7 | from typing import Dict, Any 8 | from io import BytesIO # 添加BytesIO导入 9 | 10 | # 配置OpenAI客户端 11 | def get_openai_client(): 12 | return OpenAI( 13 | api_key=os.getenv("DASHSCOPE_API_KEY", ""), 14 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", 15 | ) 16 | 17 | # 预缓存常用采样率的WAV头 18 | WAV_HEADERS = {} 19 | 20 | def generate_wav_header(sample_rate: int = 24000) -> bytes: 21 | """生成WAV文件头""" 22 | # WAV文件头格式 23 | # RIFF header 24 | riff_header = b'RIFF' 25 | file_size_placeholder = struct.pack(' bytes: 65 | """添加WAV文件头,使用预缓存的头部""" 66 | # 如果没有预缓存当前采样率的头部,生成一个 67 | if sample_rate not in WAV_HEADERS: 68 | WAV_HEADERS[sample_rate] = generate_wav_header(sample_rate) 69 | 70 | # 获取预缓存的头部 71 | header_template = WAV_HEADERS[sample_rate] 72 | 73 | # 计算文件大小和数据大小 74 | data_chunk_size = len(audio_data) 75 | file_size = data_chunk_size + 36 # 文件总长度减去8字节 76 | 77 | # 创建包含正确大小信息的头部 78 | header = bytearray(header_template) 79 | struct.pack_into(' Dict[str, Any]: 103 | """处理音频并调用模型获取回复 104 | 105 | Args: 106 | audio_data: 音频数据字节 107 | text_prompt: 提示文本 108 | audio_format: 音频格式,可以是'webm'或'wav'等 109 | 110 | Returns: 111 | 包含文本和音频回复的字典 112 | """ 113 | start_time = time.time() 114 | try: 115 | # api_server.py中已经处理了base64解码,直接使用传入的音频数据 116 | # 移除重复的base64编码操作 117 | print(f"发送请求到模型,音频大小: {len(audio_data)} 字节,格式: {audio_format}") 118 | 119 | # 构建消息列表,包含历史记录 120 | messages = [ 121 | { 122 | "role": "system", 123 | "content": [{"type": "text", "text": "你是一个专业AI助手,简洁回答问题,不要重复用户的问话。"}], 124 | }, 125 | ] 126 | 127 | # 添加历史消息 128 | for msg in self.chat_history: 129 | messages.append(msg) 130 | 131 | # 添加当前用户消息 132 | # 这里仍需要编码,因为API需要base64格式 133 | base64_audio = base64.b64encode(audio_data).decode("utf-8") 134 | messages.append({ 135 | "role": "user", 136 | "content": [ 137 | { 138 | "type": "input_audio", 139 | "input_audio": { 140 | "data": f"data:;base64,{base64_audio}", 141 | "format": audio_format, 142 | }, 143 | }, 144 | # 只有当text_prompt不为空时才添加文本提示 145 | *([{"type": "text", "text": text_prompt}] if text_prompt.strip() else []), 146 | ], 147 | }) 148 | 149 | # 调用模型 150 | model_start = time.time() 151 | completion = self.client.chat.completions.create( 152 | model="qwen-omni-turbo", 153 | messages=messages, 154 | modalities=["text", "audio"], 155 | audio={"voice": "Chelsie", "format": "wav"}, 156 | stream=True, 157 | stream_options={"include_usage": True}, 158 | ) 159 | 160 | # 处理响应 161 | response = {"text": "", "audio": None, "usage": None} 162 | audio_chunks = [] # 存储原始音频数据块 163 | transcript_text = "" 164 | audio_chunks_count = 0 165 | audio_total_size = 0 166 | 167 | try: 168 | for chunk in completion: 169 | if chunk.choices: 170 | if hasattr(chunk.choices[0].delta, "audio"): 171 | try: 172 | # 获取音频数据 173 | audio_data = chunk.choices[0].delta.audio.get("data") 174 | if audio_data: 175 | # 解码并保存原始音频数据 176 | try: 177 | audio_chunk = base64.b64decode(audio_data) 178 | audio_chunks.append(audio_chunk) 179 | audio_chunks_count += 1 180 | audio_total_size += len(audio_chunk) 181 | # 简化日志,不再输出每个音频块 182 | except Exception as e: 183 | print(f"解码音频数据块时出错: {e}") 184 | 185 | # 获取转录文本 186 | transcript = chunk.choices[0].delta.audio.get("transcript") 187 | if transcript: 188 | transcript_text += transcript 189 | # 简化日志,不再输出每个转录片段 190 | except Exception as e: 191 | print(f"处理音频数据时出错: {e}") 192 | elif hasattr(chunk.choices[0].delta, "content"): 193 | content = chunk.choices[0].delta.content 194 | if content: 195 | response["text"] += str(content) 196 | # 简化日志,不再输出每个文本片段 197 | elif hasattr(chunk, "usage"): 198 | response["usage"] = chunk.usage 199 | print(f"收到用量统计: {chunk.usage}") 200 | break # 收到用量统计后结束循环 201 | except Exception as e: 202 | print(f"处理响应时出错: {e}") 203 | raise 204 | 205 | # 在处理完成后输出统计信息 206 | print(f"共收到{audio_chunks_count}个音频数据块,总大小: {audio_total_size} 字节") 207 | 208 | model_time = time.time() - model_start 209 | print(f"模型处理耗时: {model_time:.2f}秒") 210 | 211 | # 处理音频数据 212 | if audio_chunks: 213 | try: 214 | # 优化音频数据处理,使用BytesIO减少内存使用 215 | audio_process_start = time.time() 216 | audio_buffer = BytesIO() 217 | for chunk in audio_chunks: 218 | audio_buffer.write(chunk) 219 | raw_audio = audio_buffer.getvalue() 220 | # 添加WAV头 221 | wav_audio = add_wav_header(raw_audio) 222 | # 编码为base64 223 | response["audio"] = base64.b64encode(wav_audio).decode('utf-8') 224 | audio_process_time = time.time() - audio_process_start 225 | print(f"音频后处理耗时: {audio_process_time:.2f}秒") 226 | print(f"最终音频数据大小: {len(wav_audio)} 字节") 227 | except Exception as e: 228 | print(f"处理最终音频数据时出错: {e}") 229 | else: 230 | print("没有收集到任何音频数据") 231 | 232 | # 如果有转录文本但没有其他文本内容,使用转录文本 233 | if not response["text"] and transcript_text: 234 | response["text"] = transcript_text 235 | print(f"使用转录文本作为响应: {transcript_text}") 236 | 237 | # 更新对话历史 - 只添加一种信息来源,优先使用转录文本 238 | if transcript_text: 239 | # 使用转录的文本作为用户消息 240 | self.chat_history.append({ 241 | "role": "user", 242 | "content": [{"type": "text", "text": transcript_text}] 243 | }) 244 | print(f"添加用户转录文本到历史: {transcript_text}") 245 | elif text_prompt and text_prompt.strip(): 246 | # 只有在有有效提示文本且没有转录时使用提示文本 247 | self.chat_history.append({ 248 | "role": "user", 249 | "content": [{"type": "text", "text": text_prompt}] 250 | }) 251 | print(f"添加用户提示文本到历史: {text_prompt}") 252 | else: 253 | # 如果两者都没有,添加一个空的用户消息以保持对话连贯性 254 | self.chat_history.append({ 255 | "role": "user", 256 | "content": [{"type": "text", "text": "(用户发送了一段音频)"}] 257 | }) 258 | print("添加默认用户消息到历史") 259 | 260 | # 添加助手回复 261 | self.chat_history.append({ 262 | "role": "assistant", 263 | "content": [{"type": "text", "text": response["text"]}] 264 | }) 265 | 266 | # 保持历史长度在限制范围内,超过5轮就抛弃前面的对话 267 | MAX_TEXT_LENGTH = 1000 # 每条消息最大字符数 268 | if len(self.chat_history) > self.max_history * 2: # 每轮对话有2条消息(用户+助手) 269 | # 只保留最近的5轮对话 270 | self.chat_history = self.chat_history[-self.max_history*2:] 271 | print(f"对话历史超过{self.max_history}轮,已删除最早的对话") 272 | 273 | # 限制每条消息的文本长度以控制内存使用 274 | for msg in self.chat_history: 275 | if "content" in msg and isinstance(msg["content"], list): 276 | for item in msg["content"]: 277 | if item.get("type") == "text" and len(item.get("text", "")) > MAX_TEXT_LENGTH: 278 | item["text"] = item["text"][:MAX_TEXT_LENGTH] + "..." 279 | 280 | total_time = time.time() - start_time 281 | print(f"总处理时间: {total_time:.2f}秒") 282 | print(f"最终文本响应: {response['text']}") 283 | print(f"当前对话历史数量: {len(self.chat_history)//2} 轮") 284 | 285 | return response 286 | 287 | except Exception as e: 288 | print(f"处理音频时出错: {e}") 289 | raise 290 | 291 | def clear_history(self): 292 | """清除对话历史""" 293 | self.chat_history = [] 294 | print("对话历史已清除") 295 | 296 | # 实例化Agent 297 | audio_agent = AudioProcessingAgent() 298 | 299 | # 如果直接运行此脚本,进行测试 300 | if __name__ == "__main__": 301 | import sys 302 | from pathlib import Path 303 | 304 | # 测试用例 305 | test_file = Path("welcome.mp3") 306 | if test_file.exists(): 307 | with open(test_file, "rb") as audio_file: 308 | audio_data = audio_file.read() 309 | base64_audio = encode_audio(audio_data) 310 | result = audio_agent.process_audio(audio_data) 311 | print(f"文本回复: {result['text']}") 312 | if result['audio']: 313 | print(f"收到音频回复,大小: {len(result['audio'])} 字节") 314 | if result['usage']: 315 | print(f"用量统计: {result['usage']}") 316 | else: 317 | print(f"测试文件 {test_file} 不存在") -------------------------------------------------------------------------------- /cert.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIEjzCCAvegAwIBAgIRAJAr7v+VOYfl1EgghzBPtB4wDQYJKoZIhvcNAQELBQAw 3 | gaExHjAcBgNVBAoTFW1rY2VydCBkZXZlbG9wbWVudCBDQTE7MDkGA1UECwwyTEVN 4 | T04tSFAtTEFQVE9QXGxlbW9uQExFTU9OLUhQLUxBUFRPUCAoTGVtb24gSGFsbCkx 5 | QjBABgNVBAMMOW1rY2VydCBMRU1PTi1IUC1MQVBUT1BcbGVtb25ATEVNT04tSFAt 6 | TEFQVE9QIChMZW1vbiBIYWxsKTAeFw0yNTA0MTAwMTUyNDlaFw0yNzA3MTAwMTUy 7 | NDlaMGYxJzAlBgNVBAoTHm1rY2VydCBkZXZlbG9wbWVudCBjZXJ0aWZpY2F0ZTE7 8 | MDkGA1UECwwyTEVNT04tSFAtTEFQVE9QXGxlbW9uQExFTU9OLUhQLUxBUFRPUCAo 9 | TGVtb24gSGFsbCkwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDT0jmB 10 | Mpj7LI2cBikhzeqyuDbhfnDeBJU+a922OF5VstzjxawGrK6O4xT38hXB8nS8pat/ 11 | lUAIOUI6CLDuQTizbRwti8xvIX+KJtubB+N9Usd4n4pVANCP6G1UqNt0ZJn6HRIv 12 | 6h0LOTb9KsiQfcGBY0uFx/WuwliG5MQdcqhxz0M7NAH0fsJtHbIw6iivrMK62P3m 13 | 33FPDZrfSm1BTkhnijlmgCC3Zzea4l/7/yyxmlDYY2+TH8lY6rSz2mAfSifG7M+Q 14 | wE/z1BmeYFT96joeBssnYDLzHN9pVt1pjW7iO5yRvdt+AF4QwOSrjdGfRGORN164 15 | Zbj/C3yLJtaVm8PRAgMBAAGjfDB6MA4GA1UdDwEB/wQEAwIFoDATBgNVHSUEDDAK 16 | BggrBgEFBQcDATAfBgNVHSMEGDAWgBT30qv1V2r2ZBEjRhuG8ik4eXBmGzAyBgNV 17 | HREEKzApgglsb2NhbGhvc3SHBH8AAAGHEAAAAAAAAAAAAAAAAAAAAAGHBMCoMvow 18 | DQYJKoZIhvcNAQELBQADggGBAJOpvRM0eOUjxPUDvMXB/BhhHa+6IQfZ3tHpQ2D5 19 | oaXsbj9saWzczrOQlGqeaS4/naUrJTuaXZUUJZ7E1GI+xPyrnjNSK3LZQFEREsGg 20 | ZGRVzxqW62xbKnQBB3WbRdwgUgc5/Wy1txYmiVgxkrGzaAX7EBegY9M5GDYf+VhD 21 | r6KVMOL5ZvHJPkI4eIBTGX83nzpB821j+ngeW8wNtWW2tQA7EssCLNFT5hh2jV4h 22 | FbNwX1KtwlSgeMESLujr2ppB05O3zGicgZdghI+sh6QylVOYwWWN2fRDTGKdzJfc 23 | 3euiDKnqTXSidKg5UtsOcHFKJ3/B5Nd9MHkwsxmzD7gWrB2KKcL984Mr93mPiugp 24 | TwInUJmOhqiKlYeFAEdlGUeqWP4hn1a09ceu6mRnjbvpkpnI9FZILtHnsWs/Ws8L 25 | sURrsV9xg1OItpSM+dmz1ReaNpPzVOzplzEa9lP8Ji6dwlc879xbnQp9n0nDBctA 26 | XLcGy7hfQ4PHwSdILUuMxkySjQ== 27 | -----END CERTIFICATE----- 28 | -------------------------------------------------------------------------------- /key.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDT0jmBMpj7LI2c 3 | BikhzeqyuDbhfnDeBJU+a922OF5VstzjxawGrK6O4xT38hXB8nS8pat/lUAIOUI6 4 | CLDuQTizbRwti8xvIX+KJtubB+N9Usd4n4pVANCP6G1UqNt0ZJn6HRIv6h0LOTb9 5 | KsiQfcGBY0uFx/WuwliG5MQdcqhxz0M7NAH0fsJtHbIw6iivrMK62P3m33FPDZrf 6 | Sm1BTkhnijlmgCC3Zzea4l/7/yyxmlDYY2+TH8lY6rSz2mAfSifG7M+QwE/z1Bme 7 | YFT96joeBssnYDLzHN9pVt1pjW7iO5yRvdt+AF4QwOSrjdGfRGORN164Zbj/C3yL 8 | JtaVm8PRAgMBAAECggEAf/vvft7BjFH5JiKay7ANdPrVPh4VuC/wtQybo7QfW4x8 9 | 5qrTLB0+Q1t1mfKNruf+HNXE74uQaued2k7SCMMjrVXpxqNHXIZS93hPDDcR/vD7 10 | USikfoPFgI4hMRvtrT/zwSm7iXPdJKDnVsR49sTlHHaQdT7CdVs7/hVPYbObj1df 11 | qJdjBwr0AtOllGhs6iLzsJxdgcnUtWSON1jsLOX0gk2U8+IB45o1yRLP9+PhagGC 12 | x971tvP8lsiICX8285cuGZvHuX1sMSmVaKHF/oPyh419yEYnXHRuIdTB1hqPvAxA 13 | GwnTY3YLNLU7THtTRNad/kzP3gRULm0mvobpzSsxeQKBgQD8az287NPOM9EX2BIr 14 | odj1Rw95mVaKHIiINL4777yVHypQcW2cXgzJIkztW5RvGUuxdH25fvr54J6hoQdp 15 | v/ZxLrSk6WiehOmE2sPM5kpBY6NV+8xW1XILdB5DxGVJRQeQQYANYfh3dBBm1W0m 16 | NQ66LBLwAlBK+D0huV6jEOZnPwKBgQDW04lthhWAvCcJ7p/JUVKRoNWyMLSNKID9 17 | VRlkB/H3tNFI2WiyUUjJPm8XPwRDKXqDw08ebIWbYhOGy96Owm8nES4IqmL4VlnT 18 | oMvghLB59ARjWrHYw1pccdjbJ9ZTyHDvKTgjmA7+vAdeKPCcT8gTNTSvMAMHeWvi 19 | umt6fYag7wKBgQC+RDftSLb/H5/k0UIhEYZwnHfVuPe6c3eW8+rRUwxbe3px2I4+ 20 | 58XLdsd1wypH9FFSGfUK9eRIpj/spWzpEYG6HvKbvDTYCGfddOlSceRXFbvw/DQy 21 | 4AFvEMAfZNLUP+xLmJPlgou/vwT9/rKfsi6/tqkvsQ7E9AlgelITqJGEEQKBgG6w 22 | gFcWh23VhKfxdBNe+5Rdsr4lqmIxRIVDm9mW3m4rlMpcez2l9EL9EHCB38hbTu0l 23 | bVbXw9/UIQuLcBlOxcbzayy73lLm61HHwETnGac8vCYVTR3LSnvnjT0ewahZ1xbj 24 | vjFY4CEQ8RrrLU7dLNH40DSUIHtxbM1eEJMEqqGxAoGBAO2JNRIcAZBrEvGnd5AH 25 | szA85pPIiF+jWRVq3WhhpsiWY1WIsnFzxnvuGoJ9FpnuhrzVKZifWzqrD+QFdoLh 26 | zA7x4nUCVpkF1r6nEfOnhtZmI6hR7cOnGC+3IG/Hio/FFsuGy6oPXw6lRanBnoOT 27 | DVaF/Osuky+6AhsJNNZb8Rzh 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "omni-vad-demo" 3 | version = "0.1.0" 4 | description = "智能语音对话系统,基于阿里云千问Qwen-Omni模型" 5 | readme = "README.md" 6 | requires-python = ">=3.8" 7 | license = {text = "MIT"} 8 | authors = [ 9 | {name = "项目作者"} 10 | ] 11 | 12 | dependencies = [ 13 | "numpy", 14 | "openai>=1.2.0", 15 | "pyaudio", 16 | "soundfile", 17 | "fastapi>=0.95.0", 18 | "uvicorn>=0.22.0", 19 | "agno>=0.1.0", 20 | "python-multipart>=0.0.6", 21 | "requests>=2.28.2", 22 | "pydantic>=1.10.7" 23 | ] 24 | 25 | [build-system] 26 | requires = ["setuptools>=61.0.0", "wheel"] 27 | build-backend = "setuptools.build_meta" 28 | 29 | [project.urls] 30 | Documentation = "https://github.com/username/omni-vad-demo" 31 | Source = "https://github.com/username/omni-vad-demo" 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | openai>=1.2.0 3 | pyaudio 4 | soundfile 5 | fastapi>=0.95.0 6 | uvicorn>=0.22.0 7 | agno>=0.1.0 8 | python-multipart>=0.0.6 9 | requests>=2.28.2 10 | pydantic>=1.10.7 -------------------------------------------------------------------------------- /start_https_server.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | echo ===== HTTPS语音对话服务器启动工具 ===== 4 | echo. 5 | 6 | REM 检查证书文件是否存在 7 | if exist key.pem if exist cert.pem ( 8 | echo [√] 找到SSL证书和密钥文件 9 | ) else ( 10 | echo [!] 未找到SSL证书或密钥文件,将尝试生成... 11 | echo. 12 | echo 注意: 需要先安装mkcert工具 13 | echo 在管理员权限的PowerShell中执行以下命令安装mkcert: 14 | echo choco install mkcert 15 | echo mkcert -install 16 | echo. 17 | 18 | REM 获取本机IP地址 19 | echo 正在获取IP地址... 20 | set ip_found=false 21 | for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /C:"IPv4"') do ( 22 | if "!ip_found!"=="false" ( 23 | set ip_addr=%%a 24 | set ip_found=true 25 | set ip_addr=!ip_addr:~1! 26 | echo 检测到IP地址: !ip_addr! 27 | ) 28 | ) 29 | 30 | echo. 31 | echo 是否要使用此IP地址生成证书? [Y/N] 32 | set /p use_ip= 33 | 34 | if /i "!use_ip!"=="Y" ( 35 | echo 正在生成包含本机IP的证书... 36 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 !ip_addr! 37 | ) else ( 38 | echo 正在生成基本证书... 39 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 40 | ) 41 | 42 | if exist key.pem if exist cert.pem ( 43 | echo [√] 证书生成成功! 44 | ) else ( 45 | echo [×] 证书生成失败,请手动执行: 46 | echo mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 你的IP地址 47 | echo. 48 | echo 请先安装mkcert: 49 | echo 1. 以管理员身份运行PowerShell 50 | echo 2. 执行: choco install mkcert 51 | echo 3. 执行: mkcert -install 52 | echo. 53 | pause 54 | exit /b 55 | ) 56 | ) 57 | 58 | REM 设置环境变量以便服务器使用SSL 59 | set SSL_KEYFILE=key.pem 60 | set SSL_CERTFILE=cert.pem 61 | 62 | echo. 63 | echo [*] 正在启动HTTPS服务器... 64 | echo. 65 | echo 请注意以下事项: 66 | echo 1. 浏览器会显示证书警告,这是正常的,因为使用了自签名证书 67 | echo 2. 请通过 https://localhost:8000 或 https://你的IP地址:8000 访问服务 68 | echo 3. 使用Ctrl+C可以停止服务器 69 | echo. 70 | 71 | REM 启动服务器 72 | python api_server.py 73 | 74 | echo. 75 | echo 服务器已停止 76 | pause -------------------------------------------------------------------------------- /start_https_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "===== HTTPS语音对话服务器启动工具 =====" 4 | echo "" 5 | 6 | # 检查mkcert是否安装 7 | if ! command -v mkcert &> /dev/null; then 8 | echo "[!] 未找到mkcert工具,请先安装:" 9 | echo " macOS: brew install mkcert" 10 | echo " Linux: 请参考 https://github.com/FiloSottile/mkcert#linux" 11 | echo "" 12 | read -p "是否继续尝试生成证书? [y/N] " -n 1 -r 13 | echo "" 14 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 15 | echo "已取消操作" 16 | exit 1 17 | fi 18 | fi 19 | 20 | # 检查证书文件是否存在 21 | if [ -f key.pem ] && [ -f cert.pem ]; then 22 | echo "[√] 找到SSL证书和密钥文件" 23 | else 24 | echo "[!] 未找到SSL证书或密钥文件,将尝试生成..." 25 | echo "" 26 | 27 | # 获取本机IP地址 28 | echo "正在获取IP地址..." 29 | if [[ "$OSTYPE" == "darwin"* ]]; then 30 | # macOS 31 | ip_addr=$(ipconfig getifaddr en0 2>/dev/null || ipconfig getifaddr en1 2>/dev/null) 32 | else 33 | # Linux 34 | ip_addr=$(hostname -I | awk '{print $1}') 35 | fi 36 | 37 | if [ -n "$ip_addr" ]; then 38 | echo "检测到IP地址: $ip_addr" 39 | echo "" 40 | read -p "是否要使用此IP地址生成证书? [Y/n] " -n 1 -r 41 | echo "" 42 | if [[ ! $REPLY =~ ^[Nn]$ ]]; then 43 | echo "正在生成包含本机IP的证书..." 44 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 "$ip_addr" 45 | else 46 | echo "正在生成基本证书..." 47 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 48 | fi 49 | else 50 | echo "未能检测到IP地址,将生成基本证书..." 51 | mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 52 | fi 53 | 54 | if [ -f key.pem ] && [ -f cert.pem ]; then 55 | echo "[√] 证书生成成功!" 56 | else 57 | echo "[×] 证书生成失败,请手动执行:" 58 | echo " mkcert -key-file key.pem -cert-file cert.pem localhost 127.0.0.1 ::1 你的IP地址" 59 | echo "" 60 | exit 1 61 | fi 62 | fi 63 | 64 | # 设置环境变量以便服务器使用SSL 65 | export SSL_KEYFILE=key.pem 66 | export SSL_CERTFILE=cert.pem 67 | 68 | echo "" 69 | echo "[*] 正在启动HTTPS服务器..." 70 | echo "" 71 | echo "请注意以下事项:" 72 | echo " 1. 浏览器会显示证书警告,这是正常的,因为使用了自签名证书" 73 | echo " 2. 请通过 https://localhost:8000 或 https://你的IP地址:8000 访问服务" 74 | echo " 3. 使用Ctrl+C可以停止服务器" 75 | echo "" 76 | 77 | # 启动服务器 78 | python3 api_server.py 79 | 80 | echo "" 81 | echo "服务器已停止" -------------------------------------------------------------------------------- /static/css/styles.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --primary-color: #00ff9d; 3 | --secondary-color: #00b4ff; 4 | --dark-bg: #0a192f; 5 | --darker-bg: #020c1b; 6 | --text-color: #e6f1ff; 7 | --highlight-color: #ff2d75; 8 | } 9 | 10 | body { 11 | font-family: 'Roboto', sans-serif; 12 | max-width: 900px; 13 | margin: 0 auto; 14 | padding: 20px; 15 | background-color: var(--dark-bg); 16 | color: var(--text-color); 17 | background-image: 18 | radial-gradient(circle at 25% 25%, rgba(0, 180, 255, 0.1) 0%, transparent 50%), 19 | radial-gradient(circle at 75% 75%, rgba(0, 255, 157, 0.1) 0%, transparent 50%); 20 | } 21 | 22 | h1 { 23 | font-family: 'Orbitron', sans-serif; 24 | font-weight: 700; 25 | color: var(--primary-color); 26 | text-shadow: 0 0 10px rgba(0, 255, 157, 0.5); 27 | margin-bottom: 10px; 28 | letter-spacing: 1px; 29 | } 30 | 31 | .container { 32 | background-color: var(--darker-bg); 33 | border-radius: 15px; 34 | padding: 25px; 35 | margin-top: 20px; 36 | box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5); 37 | border: 1px solid rgba(0, 180, 255, 0.2); 38 | position: relative; 39 | overflow: hidden; 40 | } 41 | 42 | .container::before { 43 | content: ''; 44 | position: absolute; 45 | top: 0; 46 | left: 0; 47 | right: 0; 48 | height: 3px; 49 | background: linear-gradient(90deg, var(--primary-color), var(--secondary-color)); 50 | } 51 | 52 | .status-container { 53 | position: relative; 54 | margin: 30px 0; 55 | padding: 20px; 56 | border-radius: 10px; 57 | background-color: rgba(0, 25, 47, 0.7); 58 | border: 1px solid rgba(0, 180, 255, 0.3); 59 | box-shadow: inset 0 0 15px rgba(0, 180, 255, 0.1); 60 | } 61 | 62 | .status { 63 | font-size: 24px; 64 | font-weight: bold; 65 | margin: 0; 66 | font-family: 'Orbitron', sans-serif; 67 | position: relative; 68 | z-index: 2; 69 | } 70 | 71 | .status-indicator { 72 | position: absolute; 73 | top: 0; 74 | left: 0; 75 | width: 100%; 76 | height: 100%; 77 | opacity: 0.1; 78 | border-radius: 8px; 79 | transition: background-color 0.3s ease; 80 | } 81 | 82 | .listening .status-indicator { 83 | background-color: var(--primary-color); 84 | animation: pulse 2s infinite; 85 | } 86 | 87 | .processing .status-indicator { 88 | background-color: var(--secondary-color); 89 | animation: pulse 1.5s infinite; 90 | } 91 | 92 | .speaking .status-indicator { 93 | background-color: var(--highlight-color); 94 | animation: pulse 1s infinite; 95 | } 96 | 97 | @keyframes pulse { 98 | 0% { opacity: 0.1; } 99 | 50% { opacity: 0.3; } 100 | 100% { opacity: 0.1; } 101 | } 102 | 103 | #audioWave { 104 | width: 100%; 105 | height: 120px; 106 | margin: 30px 0; 107 | background-color: rgba(0, 25, 47, 0.7); 108 | border-radius: 10px; 109 | border: 1px solid rgba(0, 180, 255, 0.3); 110 | position: relative; 111 | overflow: hidden; 112 | } 113 | 114 | .waveform { 115 | position: absolute; 116 | top: 0; 117 | left: 0; 118 | width: 100%; 119 | height: 100%; 120 | display: flex; 121 | align-items: center; 122 | justify-content: center; 123 | padding: 0 20px; 124 | } 125 | 126 | .wave-bar { 127 | width: 6px; 128 | height: 20px; 129 | margin: 0 3px; 130 | background: linear-gradient(to top, var(--primary-color), var(--secondary-color)); 131 | border-radius: 3px; 132 | animation: wave 1.5s infinite ease-in-out; 133 | transform-origin: bottom; 134 | } 135 | 136 | @keyframes wave { 137 | 0%, 100% { transform: scaleY(0.3); } 138 | 50% { transform: scaleY(1); } 139 | } 140 | 141 | .wave-bar:nth-child(1) { animation-delay: 0.1s; } 142 | .wave-bar:nth-child(2) { animation-delay: 0.2s; } 143 | .wave-bar:nth-child(3) { animation-delay: 0.3s; } 144 | .wave-bar:nth-child(4) { animation-delay: 0.4s; } 145 | .wave-bar:nth-child(5) { animation-delay: 0.5s; } 146 | .wave-bar:nth-child(6) { animation-delay: 0.4s; } 147 | .wave-bar:nth-child(7) { animation-delay: 0.3s; } 148 | .wave-bar:nth-child(8) { animation-delay: 0.2s; } 149 | .wave-bar:nth-child(9) { animation-delay: 0.1s; } 150 | 151 | .btn-group { 152 | display: flex; 153 | justify-content: center; 154 | gap: 20px; 155 | margin: 30px 0; 156 | } 157 | 158 | button { 159 | position: relative; 160 | background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)); 161 | color: var(--darker-bg); 162 | border: none; 163 | padding: 12px 30px; 164 | font-size: 16px; 165 | font-weight: 500; 166 | margin: 10px 0; 167 | cursor: pointer; 168 | border-radius: 50px; 169 | font-family: 'Orbitron', sans-serif; 170 | letter-spacing: 1px; 171 | overflow: hidden; 172 | transition: all 0.3s ease; 173 | box-shadow: 0 5px 15px rgba(0, 255, 157, 0.3); 174 | } 175 | 176 | button:hover { 177 | transform: translateY(-3px); 178 | box-shadow: 0 8px 20px rgba(0, 255, 157, 0.4); 179 | } 180 | 181 | button:active { 182 | transform: translateY(1px); 183 | } 184 | 185 | button:disabled { 186 | background: #555; 187 | color: #999; 188 | cursor: not-allowed; 189 | box-shadow: none; 190 | transform: none; 191 | } 192 | 193 | button::after { 194 | content: ''; 195 | position: absolute; 196 | top: -50%; 197 | left: -50%; 198 | width: 200%; 199 | height: 200%; 200 | background: rgba(255, 255, 255, 0.1); 201 | transform: rotate(45deg); 202 | transition: all 0.3s ease; 203 | opacity: 0; 204 | } 205 | 206 | button:hover::after { 207 | opacity: 1; 208 | top: -20%; 209 | left: -20%; 210 | } 211 | 212 | .conversation-container { 213 | display: flex; 214 | flex-direction: column; 215 | gap: 20px; 216 | margin: 30px 0; 217 | } 218 | 219 | .conversation, .log { 220 | text-align: left; 221 | border-radius: 10px; 222 | background-color: rgba(0, 25, 47, 0.7); 223 | border: 1px solid rgba(0, 180, 255, 0.3); 224 | box-shadow: inset 0 0 15px rgba(0, 180, 255, 0.1); 225 | padding: 20px; 226 | max-height: 250px; 227 | overflow-y: auto; 228 | } 229 | 230 | .conversation-title, .log-title { 231 | font-family: 'Orbitron', sans-serif; 232 | color: var(--primary-color); 233 | margin-top: 0; 234 | margin-bottom: 15px; 235 | font-size: 18px; 236 | display: flex; 237 | align-items: center; 238 | } 239 | 240 | .conversation-title::before, .log-title::before { 241 | content: ''; 242 | display: inline-block; 243 | width: 12px; 244 | height: 12px; 245 | border-radius: 50%; 246 | background-color: var(--primary-color); 247 | margin-right: 10px; 248 | box-shadow: 0 0 5px var(--primary-color); 249 | } 250 | 251 | .log-title::before { 252 | background-color: var(--secondary-color); 253 | box-shadow: 0 0 5px var(--secondary-color); 254 | } 255 | 256 | .message { 257 | margin: 10px 0; 258 | padding: 12px 15px; 259 | border-radius: 8px; 260 | position: relative; 261 | line-height: 1.5; 262 | animation: fadeIn 0.3s ease; 263 | } 264 | 265 | @keyframes fadeIn { 266 | from { opacity: 0; transform: translateY(10px); } 267 | to { opacity: 1; transform: translateY(0); } 268 | } 269 | 270 | .user-message { 271 | background-color: rgba(0, 180, 255, 0.15); 272 | border-left: 3px solid var(--secondary-color); 273 | margin-left: 20px; 274 | } 275 | 276 | .ai-message { 277 | background-color: rgba(0, 255, 157, 0.15); 278 | border-left: 3px solid var(--primary-color); 279 | margin-right: 20px; 280 | } 281 | 282 | .message-sender { 283 | font-weight: bold; 284 | margin-bottom: 5px; 285 | font-family: 'Orbitron', sans-serif; 286 | font-size: 14px; 287 | } 288 | 289 | .user-message .message-sender { 290 | color: var(--secondary-color); 291 | } 292 | 293 | .ai-message .message-sender { 294 | color: var(--primary-color); 295 | } 296 | 297 | .log-entry { 298 | margin: 8px 0; 299 | padding: 8px 0; 300 | border-bottom: 1px solid rgba(0, 180, 255, 0.1); 301 | font-size: 13px; 302 | color: #a8b2d1; 303 | display: flex; 304 | } 305 | 306 | .log-time { 307 | color: var(--secondary-color); 308 | margin-right: 10px; 309 | font-family: 'Orbitron', sans-serif; 310 | font-size: 12px; 311 | min-width: 70px; 312 | } 313 | 314 | .typing-indicator { 315 | display: flex; 316 | align-items: center; 317 | padding: 10px 15px; 318 | background-color: rgba(0, 255, 157, 0.1); 319 | border-radius: 8px; 320 | margin: 10px 0; 321 | width: fit-content; 322 | } 323 | 324 | .typing-dot { 325 | width: 8px; 326 | height: 8px; 327 | background-color: var(--primary-color); 328 | border-radius: 50%; 329 | margin: 0 3px; 330 | animation: typingAnimation 1.4s infinite ease-in-out; 331 | } 332 | 333 | .typing-dot:nth-child(1) { animation-delay: 0s; } 334 | .typing-dot:nth-child(2) { animation-delay: 0.2s; } 335 | .typing-dot:nth-child(3) { animation-delay: 0.4s; } 336 | 337 | @keyframes typingAnimation { 338 | 0%, 60%, 100% { transform: translateY(0); } 339 | 30% { transform: translateY(-5px); } 340 | } 341 | 342 | /* 自定义滚动条 */ 343 | ::-webkit-scrollbar { 344 | width: 8px; 345 | } 346 | 347 | ::-webkit-scrollbar-track { 348 | background: rgba(0, 0, 0, 0.2); 349 | border-radius: 4px; 350 | } 351 | 352 | ::-webkit-scrollbar-thumb { 353 | background: linear-gradient(var(--primary-color), var(--secondary-color)); 354 | border-radius: 4px; 355 | } 356 | 357 | /* 响应式设计 */ 358 | @media (max-width: 768px) { 359 | body { 360 | padding: 10px; 361 | } 362 | 363 | .container { 364 | padding: 15px; 365 | } 366 | 367 | .btn-group { 368 | flex-direction: column; 369 | gap: 10px; 370 | } 371 | 372 | button { 373 | width: 100%; 374 | } 375 | } 376 | 377 | /* 错误消息样式 */ 378 | .error-message { 379 | color: #ff0033; 380 | background-color: rgba(255, 200, 200, 0.8); 381 | padding: 8px 12px; 382 | border-radius: 4px; 383 | margin-top: 8px; 384 | font-weight: bold; 385 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); 386 | text-align: center; 387 | width: 100%; 388 | } 389 | 390 | /* iOS音频播放器样式 */ 391 | .ios-audio-player { 392 | margin-top: 10px; 393 | padding: 10px; 394 | background-color: rgba(255, 255, 255, 0.9); 395 | border-radius: 8px; 396 | text-align: center; 397 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); 398 | } 399 | 400 | .ios-hint { 401 | margin: 0 0 5px 0; 402 | font-size: 12px; 403 | color: #666; 404 | } -------------------------------------------------------------------------------- /static/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 智能语音对话系统 7 | 8 | 9 | 10 | 11 | 12 | 13 |

智能语音对话系统

14 |

请允许麦克风访问权限,开始与AI对话

15 | 16 |
17 |
18 |
等待语音输入...
19 |
20 | 21 | 25 |
26 | 27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | 41 |
42 | 45 | 48 | 51 |
52 | 53 |
54 |
55 |

对话记录

56 |
57 |
58 | 59 |
60 |

系统日志

61 |
62 |
63 |
64 |
65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /static/js/app.js: -------------------------------------------------------------------------------- 1 | // 全局变量 2 | let myvad = null; 3 | let audioContext = null; 4 | let isProcessing = false; 5 | let isVADPaused = false; 6 | let waveBars = []; 7 | 8 | // API配置 9 | const apiConfig = { 10 | apiUrl: window.location.origin, // 使用当前域名作为API的基础URL 11 | processingEndpoint: '/process_audio', 12 | debug: true 13 | }; 14 | 15 | // 初始化页面元素 16 | document.addEventListener('DOMContentLoaded', () => { 17 | waveBars = Array.from(document.querySelectorAll('.wave-bar')); 18 | updateWaveform(0); // 初始化为静音状态 19 | 20 | // 事件监听 21 | document.getElementById('startBtn').addEventListener('click', startConversation); 22 | document.getElementById('stopBtn').addEventListener('click', stopConversation); 23 | document.getElementById('clearHistoryBtn').addEventListener('click', clearChatHistory); 24 | }); 25 | 26 | // 更新波形显示 27 | function updateWaveform(level) { 28 | waveBars.forEach((bar, index) => { 29 | // 根据语音活动级别和条形位置计算高度 30 | const positionFactor = 1 - Math.abs(index - 4) / 4; // 中间条形更高 31 | const heightFactor = level * 0.8 + 0.2; // 确保最小高度 32 | const scale = positionFactor * heightFactor; 33 | bar.style.transform = `scaleY(${scale})`; 34 | 35 | // 根据高度调整颜色 36 | const colorValue = Math.min(255, 100 + scale * 155); 37 | bar.style.background = `linear-gradient(to top, 38 | rgba(0, 255, 157, ${scale}), 39 | rgba(0, 180, 255, ${scale}))`; 40 | }); 41 | } 42 | 43 | // 模拟波形动画 44 | function simulateWaveform() { 45 | let level = 0; 46 | const interval = setInterval(() => { 47 | if (isProcessing || isVADPaused) { 48 | level = Math.max(0, level - 0.05); 49 | } else { 50 | // 随机波动,模拟环境噪音 51 | level = Math.min(1, Math.max(0, level + (Math.random() - 0.5) * 0.2)); 52 | } 53 | updateWaveform(level); 54 | }, 100); 55 | 56 | return interval; 57 | } 58 | 59 | let waveInterval = simulateWaveform(); 60 | 61 | // 将Blob转换为Base64 62 | function blobToBase64(blob) { 63 | return new Promise((resolve, reject) => { 64 | const reader = new FileReader(); 65 | reader.onloadend = () => { 66 | // 移除data URL前缀 67 | const base64 = reader.result.split(',')[1]; 68 | resolve(base64); 69 | }; 70 | reader.onerror = reject; 71 | reader.readAsDataURL(blob); 72 | }); 73 | } 74 | 75 | // 将Float32Array转换为WAV格式 76 | function float32ArrayToWav(audioData, sampleRate) { 77 | // WAV文件头的大小为44字节 78 | const headerSize = 44; 79 | // 每个采样点是16位(2字节) 80 | const bytesPerSample = 2; 81 | const dataSize = audioData.length * bytesPerSample; 82 | const buffer = new ArrayBuffer(headerSize + dataSize); 83 | const view = new DataView(buffer); 84 | 85 | // 写入WAV文件头 86 | // "RIFF"标识 87 | writeString(view, 0, 'RIFF'); 88 | // 文件大小 89 | view.setUint32(4, 32 + dataSize, true); 90 | // "WAVE"标识 91 | writeString(view, 8, 'WAVE'); 92 | // "fmt "子块标识 93 | writeString(view, 12, 'fmt '); 94 | // 子块大小(16表示PCM格式) 95 | view.setUint32(16, 16, true); 96 | // 音频格式(1表示PCM) 97 | view.setUint16(20, 1, true); 98 | // 通道数(1表示单声道) 99 | view.setUint16(22, 1, true); 100 | // 采样率 101 | view.setUint32(24, sampleRate, true); 102 | // 字节率 = 采样率 * 通道数 * 字节数/采样点 103 | view.setUint32(28, sampleRate * 1 * bytesPerSample, true); 104 | // 块对齐 = 通道数 * 字节数/采样点 105 | view.setUint16(32, 1 * bytesPerSample, true); 106 | // 每个采样点的位数 107 | view.setUint16(34, 8 * bytesPerSample, true); 108 | // "data"子块标识 109 | writeString(view, 36, 'data'); 110 | // 音频数据大小 111 | view.setUint32(40, dataSize, true); 112 | 113 | // 写入音频数据 114 | // 将Float32Array转换为16位整数 115 | const volume = 0.8; // 避免可能的截断 116 | for (let i = 0; i < audioData.length; i++) { 117 | // 将[-1,1]范围的float32转换为[-32768,32767]范围的int16 118 | const sample = Math.max(-1, Math.min(1, audioData[i])); 119 | const int16Sample = Math.floor(sample * volume * 32767); 120 | view.setInt16(headerSize + i * bytesPerSample, int16Sample, true); 121 | } 122 | 123 | return buffer; 124 | } 125 | 126 | // 辅助函数:将字符串写入DataView 127 | function writeString(view, offset, string) { 128 | for (let i = 0; i < string.length; i++) { 129 | view.setUint8(offset + i, string.charCodeAt(i)); 130 | } 131 | } 132 | 133 | // 更新处理进度状态 134 | function updateProcessingStatus(stage, progress = null) { 135 | // 阶段: 'recording', 'processing', 'sending', 'receiving', 'complete' 136 | const statusElement = document.getElementById('status'); 137 | let statusText = ''; 138 | 139 | switch(stage) { 140 | case 'recording': 141 | statusText = "正在录音..."; 142 | updateWaveActivityLevel(0.6 + Math.random() * 0.4); // 高活跃度 143 | break; 144 | case 'processing': 145 | statusText = "正在处理音频..."; 146 | updateWaveActivityLevel(0.3 + Math.random() * 0.2); // 中等活跃度 147 | break; 148 | case 'sending': 149 | statusText = "发送请求到服务器..."; 150 | updateWaveActivityLevel(0.2 + Math.random() * 0.1); // 低活跃度 151 | break; 152 | case 'receiving': 153 | statusText = "接收服务器响应..."; 154 | updateWaveActivityLevel(0.2 + Math.random() * 0.2); // 低到中等活跃度 155 | break; 156 | case 'complete': 157 | statusText = "处理完成"; 158 | updateWaveActivityLevel(0); // 无活跃度 159 | break; 160 | case 'initializing': 161 | statusText = "正在初始化..."; 162 | break; 163 | case 'stopping': 164 | statusText = "正在停止..."; 165 | break; 166 | case 'stopped': 167 | statusText = "已停止"; 168 | break; 169 | default: 170 | statusText = stage; // 如果提供了自定义文本 171 | } 172 | 173 | // 如果提供了进度信息 174 | if (progress !== null && typeof progress === 'number') { 175 | statusText += ` (${Math.round(progress * 100)}%)`; 176 | } 177 | 178 | updateStatus(statusText, stage); 179 | } 180 | 181 | // 更新波形活动水平 182 | function updateWaveActivityLevel(level) { 183 | // 使波形显示与处理状态对应 184 | waveBars.forEach((bar, index) => { 185 | const delay = index * 50; // 创建波浪效果的延迟 186 | setTimeout(() => { 187 | // 添加一些随机性使动画更自然 188 | const randomFactor = 0.8 + Math.random() * 0.4; 189 | const adjustedLevel = level * randomFactor; 190 | const positionFactor = 1 - Math.abs(index - 4) / 4; // 中间条形更高 191 | const scale = positionFactor * adjustedLevel; 192 | 193 | bar.style.transform = `scaleY(${Math.max(0.1, scale)})`; 194 | 195 | // 根据状态调整颜色 196 | let color1, color2; 197 | if (level > 0.5) { 198 | // 录音状态 - 绿色到蓝色 199 | color1 = `0, 255, ${Math.round(157 * scale)}`; 200 | color2 = `0, ${Math.round(180 * scale)}, 255`; 201 | } else if (level > 0.2) { 202 | // 处理状态 - 黄色到橙色 203 | color1 = `255, ${Math.round(200 * scale)}, 0`; 204 | color2 = `255, ${Math.round(140 * scale)}, 0`; 205 | } else if (level > 0) { 206 | // 等待状态 - 蓝色到紫色 207 | color1 = `100, ${Math.round(100 * scale)}, 255`; 208 | color2 = `180, 0, ${Math.round(220 * scale)}`; 209 | } else { 210 | // 不活跃状态 - 灰色 211 | color1 = `100, 100, 100`; 212 | color2 = `50, 50, 50`; 213 | } 214 | 215 | bar.style.background = `linear-gradient(to top, 216 | rgba(${color1}, ${scale}), 217 | rgba(${color2}, ${scale}))`; 218 | }, delay); 219 | }); 220 | } 221 | 222 | // 处理音频API请求 223 | async function processAudio(audioData) { 224 | try { 225 | console.log("处理音频...", typeof audioData, audioData.length); 226 | 227 | if (!audioData || audioData.length === 0) { 228 | console.error("无效的音频数据"); 229 | showError("无效的音频数据"); 230 | return; 231 | } 232 | 233 | // 转换为WAV格式 234 | const wavBuffer = float32ArrayToWav(audioData, 16000); 235 | 236 | // 创建Blob并转换为base64 237 | const blob = new Blob([wavBuffer], { type: 'audio/wav' }); 238 | const base64Audio = await blobToBase64(blob); 239 | 240 | // 更新UI状态 241 | updateStatus("处理中...", "processing"); 242 | hideError(); // 清除任何显示的错误 243 | 244 | // 发送API请求,并指定音频格式为wav 245 | const response = await fetch('/process_audio', { 246 | method: 'POST', 247 | headers: { 248 | 'Content-Type': 'application/json', 249 | }, 250 | body: JSON.stringify({ 251 | audio_data: base64Audio, 252 | text_prompt: "", // 不再发送重复的系统提示,使用空字符串 253 | audio_format: "wav" // 指定音频格式为wav 254 | }), 255 | }); 256 | 257 | if (!response.ok) { 258 | const errorMsg = `服务器错误: ${response.status} ${response.statusText}`; 259 | showError(errorMsg); 260 | throw new Error(errorMsg); 261 | } 262 | 263 | const result = await response.json(); 264 | addLog(`收到API响应: 文本长度: ${result.text.length} 字符`); 265 | 266 | if (result.audio) { 267 | addLog(`收到音频响应,大小: ${result.audio.length} 字符`); 268 | } 269 | 270 | return result; 271 | } catch (error) { 272 | addLog(`处理音频请求出错: ${error.message}`); 273 | showError(`处理错误: ${error.message}`); 274 | if (error.name === 'AbortError') { 275 | throw new Error('请求超时'); 276 | } 277 | throw error; 278 | } 279 | } 280 | 281 | // 播放Base64编码的音频 282 | function playAudio(base64Audio) { 283 | return new Promise((resolve, reject) => { 284 | try { 285 | // 检测是否为iOS设备 286 | const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent); 287 | addLog(`当前设备: ${isIOS ? 'iOS' : '非iOS'}`); 288 | 289 | if (isIOS) { 290 | // iOS设备特殊处理 - 使用可见的音频控件 291 | addLog("iOS设备,使用特殊播放模式"); 292 | 293 | // 获取iOS专用播放器元素 294 | const iosPlayerContainer = document.getElementById('iosAudioPlayer'); 295 | const iosAudio = document.getElementById('iosAudio'); 296 | 297 | if (!iosPlayerContainer || !iosAudio) { 298 | addLog("警告: 未找到iOS音频播放器元素"); 299 | resolve(); // 继续流程 300 | return; 301 | } 302 | 303 | // 显示播放器 304 | iosPlayerContainer.style.display = 'block'; 305 | 306 | // 设置音频源 307 | iosAudio.src = `data:audio/wav;base64,${base64Audio}`; 308 | 309 | // 添加事件监听 310 | iosAudio.onended = () => { 311 | addLog("iOS音频播放完成"); 312 | // 隐藏播放器 313 | iosPlayerContainer.style.display = 'none'; 314 | resolve(); 315 | }; 316 | 317 | iosAudio.onerror = (e) => { 318 | addLog(`iOS音频播放错误: ${e.message || '未知错误'}`); 319 | // 隐藏播放器 320 | iosPlayerContainer.style.display = 'none'; 321 | 322 | // 尝试使用系统TTS作为备选方案 323 | addLog("尝试使用系统TTS作为备选方案..."); 324 | // 简单消息告知用户 325 | addConversation('system', '(iOS设备无法播放音频,请检查浏览器权限设置并允许自动播放,或点击"清除历史"按钮后重试)'); 326 | resolve(); 327 | }; 328 | 329 | // 模拟用户交互触发播放 330 | addLog("iOS音频已准备,请点击播放按钮"); 331 | 332 | } else { 333 | // 非iOS设备使用原有方法 334 | const audio = new Audio(); 335 | 336 | // 监听播放结束事件 337 | audio.addEventListener('ended', () => { 338 | addLog("音频播放完成"); 339 | resolve(); 340 | }); 341 | audio.addEventListener('error', (e) => { 342 | addLog(`音频播放错误: ${e.message}`); 343 | reject(e); 344 | }); 345 | 346 | // 设置音频源 347 | audio.src = `data:audio/wav;base64,${base64Audio}`; 348 | 349 | // 播放音频 350 | audio.play().catch(e => { 351 | addLog(`播放音频失败: ${e.message}`); 352 | reject(e); 353 | }); 354 | } 355 | } catch (error) { 356 | addLog(`音频播放设置失败: ${error.message}`); 357 | reject(error); 358 | } 359 | }); 360 | } 361 | 362 | // 初始化VAD 363 | async function initVAD() { 364 | try { 365 | myvad = await vad.MicVAD.new({ 366 | onSpeechStart: () => { 367 | if (!isProcessing && !isVADPaused) { 368 | updateStatus("正在聆听...", "listening"); 369 | addLog("检测到语音开始"); 370 | 371 | // 激活波形显示 372 | waveBars.forEach(bar => { 373 | bar.style.animationPlayState = 'running'; 374 | }); 375 | } 376 | }, 377 | onSpeechEnd: async (audio) => { 378 | if (isProcessing || isVADPaused) return; 379 | isProcessing = true; 380 | updateProcessingStatus('recording'); 381 | addLog("检测到语音结束"); 382 | 383 | // 记录audio对象类型,以便调试 384 | console.log("VAD音频数据:", audio); 385 | if (audio) { 386 | addLog(`音频数据类型: ${audio.constructor.name}, 长度: ${audio.length || 0}`); 387 | } else { 388 | addLog("警告: 收到空的音频数据"); 389 | } 390 | 391 | // 显示正在输入指示器 392 | showTypingIndicator(); 393 | 394 | // 暂停VAD而不是停止 395 | try { 396 | if (myvad && typeof myvad.pause === 'function') { 397 | await myvad.pause(); 398 | isVADPaused = true; 399 | addLog("VAD已暂停"); 400 | } 401 | } catch (e) { 402 | console.error("暂停VAD时出错:", e); 403 | addLog(`错误: 暂停VAD失败 - ${e.message}`); 404 | } 405 | 406 | // 最大重试次数 407 | const maxRetries = 2; 408 | let retryCount = 0; 409 | let success = false; 410 | 411 | while (retryCount <= maxRetries && !success) { 412 | try { 413 | // 确认我们有有效的音频数据 414 | if (!audio || !(audio instanceof Float32Array || Array.isArray(audio))) { 415 | throw new Error("无法获取有效的音频数据"); 416 | } 417 | 418 | // 如果是重试,显示提示 419 | if (retryCount > 0) { 420 | addLog(`正在重试处理音频... (第${retryCount}次)`); 421 | updateProcessingStatus(`重试处理... (${retryCount}/${maxRetries})`, retryCount/maxRetries); 422 | } 423 | 424 | // 添加简单的用户消息,因为VAD不提供转写 425 | if (retryCount === 0) { 426 | addConversation('user', '(已检测到语音)'); 427 | } 428 | 429 | // 直接处理Float32Array音频数据 430 | const result = await processAudio(audio); 431 | 432 | // 隐藏正在输入指示器 433 | hideTypingIndicator(); 434 | 435 | // 添加AI响应到对话 436 | addConversation('ai', result.text); 437 | 438 | // 如果有音频响应,播放它 439 | if (result.audio) { 440 | updateProcessingStatus('speaking'); 441 | await playAIResponse(result.audio); 442 | } else { 443 | // 如果没有音频,使用浏览器的TTS 444 | updateProcessingStatus('speaking'); 445 | await playTextAudio(result.text); 446 | updateProcessingStatus('complete'); 447 | } 448 | 449 | // 标记成功 450 | success = true; 451 | 452 | } catch (error) { 453 | retryCount++; 454 | console.error(`处理音频时出错 (尝试 ${retryCount}/${maxRetries}):`, error); 455 | addLog(`错误: ${error.message}`); 456 | 457 | if (retryCount > maxRetries) { 458 | hideTypingIndicator(); 459 | addConversation('ai', "很抱歉,处理您的请求时遇到问题。请再试一次或检查您的网络连接。"); 460 | showError(`处理失败,已尝试 ${maxRetries} 次: ${error.message}`); 461 | updateProcessingStatus('error'); 462 | } else { 463 | // 短暂延迟后重试 464 | updateProcessingStatus('retrying', retryCount/maxRetries); 465 | await new Promise(resolve => setTimeout(resolve, 1000)); 466 | } 467 | } 468 | } 469 | 470 | // 播放完成后恢复VAD 471 | resumeVAD(); 472 | }, 473 | // 其他VAD配置参数 474 | positiveSpeechThreshold: 0.70, 475 | negativeSpeechThreshold: 0.50, 476 | model: "v5", 477 | }); 478 | 479 | // 启动VAD 480 | await myvad.start(); 481 | isVADPaused = false; 482 | addLog("VAD已启动"); 483 | } catch (error) { 484 | console.error("VAD初始化失败:", error); 485 | addLog(`错误: VAD初始化失败 - ${error.message}`); 486 | } 487 | } 488 | 489 | // 显示正在输入指示器 490 | function showTypingIndicator() { 491 | const conversationContent = document.getElementById('conversationContent'); 492 | const typingDiv = document.createElement('div'); 493 | typingDiv.className = 'typing-indicator'; 494 | typingDiv.id = 'typingIndicator'; 495 | 496 | for (let i = 0; i < 3; i++) { 497 | const dot = document.createElement('div'); 498 | dot.className = 'typing-dot'; 499 | typingDiv.appendChild(dot); 500 | } 501 | 502 | conversationContent.appendChild(typingDiv); 503 | conversationContent.scrollTop = conversationContent.scrollHeight; 504 | } 505 | 506 | // 隐藏正在输入指示器 507 | function hideTypingIndicator() { 508 | const typingIndicator = document.getElementById('typingIndicator'); 509 | if (typingIndicator) { 510 | typingIndicator.remove(); 511 | } 512 | } 513 | 514 | // 恢复VAD监听 515 | async function resumeVAD() { 516 | if (!myvad || !isVADPaused) return; 517 | try { 518 | // 某些VAD实现可能需要重新初始化而不是简单的start 519 | if (typeof myvad.start === 'function') { 520 | await myvad.start(); 521 | isVADPaused = false; 522 | isProcessing = false; 523 | updateProcessingStatus('listening'); 524 | addLog("VAD已恢复"); 525 | } 526 | } catch (e) { 527 | console.error("恢复VAD时出错:", e); 528 | addLog(`错误: 恢复VAD失败 - ${e.message}`); 529 | 530 | // 失败时尝试重新初始化 531 | await initVAD(); 532 | } 533 | } 534 | 535 | // 播放AI响应从服务器返回的音频 536 | async function playAIResponse(base64Audio) { 537 | updateProcessingStatus('speaking'); 538 | addLog("开始播放AI响应音频"); 539 | 540 | // 模拟波形活动 541 | let speakingInterval = setInterval(() => { 542 | const level = 0.5 + Math.random() * 0.5; 543 | updateWaveActivityLevel(level); 544 | }, 100); 545 | 546 | try { 547 | await playAudio(base64Audio); 548 | addLog("AI响应音频播放完成"); 549 | } catch (error) { 550 | addLog(`播放音频失败: ${error.message}`); 551 | } finally { 552 | clearInterval(speakingInterval); 553 | updateWaveActivityLevel(0); 554 | updateProcessingStatus('listening'); 555 | } 556 | } 557 | 558 | // 使用浏览器的语音合成播放文本 559 | async function playTextAudio(text) { 560 | addLog("使用浏览器语音合成播放文本"); 561 | 562 | // 检测是否为iOS设备 563 | const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent); 564 | 565 | // 模拟波形活动 566 | let speakingInterval = setInterval(() => { 567 | const level = 0.5 + Math.random() * 0.5; 568 | updateWaveActivityLevel(level); 569 | }, 100); 570 | 571 | return new Promise((resolve) => { 572 | if ('speechSynthesis' in window) { 573 | const utterance = new SpeechSynthesisUtterance(text); 574 | 575 | // 设置声音参数 576 | utterance.volume = 1.0; 577 | utterance.rate = 1.0; 578 | utterance.pitch = 1.0; 579 | utterance.lang = 'zh-CN'; 580 | 581 | utterance.onstart = () => { 582 | addLog(`语音合成开始播放 (${isIOS ? 'iOS' : '非iOS'}设备)`); 583 | }; 584 | 585 | utterance.onend = () => { 586 | clearInterval(speakingInterval); 587 | updateWaveActivityLevel(0); 588 | addLog("AI响应播放完成"); 589 | resolve(); 590 | }; 591 | 592 | utterance.onerror = (event) => { 593 | addLog(`语音合成错误: ${event.error}`); 594 | clearInterval(speakingInterval); 595 | updateWaveActivityLevel(0); 596 | resolve(); 597 | }; 598 | 599 | // 在iOS上,需要特殊处理 600 | if (isIOS) { 601 | addLog("iOS设备,使用特殊TTS处理"); 602 | 603 | // 取消任何正在进行的合成 604 | window.speechSynthesis.cancel(); 605 | 606 | // 使用定时器延迟执行,避免iOS的限制 607 | setTimeout(() => { 608 | try { 609 | // 直接播放短文本 610 | window.speechSynthesis.speak(utterance); 611 | 612 | // 由于iOS上长文本可能被截断,我们需要分段播放 613 | if (text.length > 100) { 614 | addLog("iOS上检测到长文本,使用分段播放"); 615 | 616 | // 分段播放长文本的处理在这里添加 617 | // (可根据需求进一步实现) 618 | 619 | // iOS Safari需要保持语音合成活跃 620 | const iosInterval = setInterval(() => { 621 | if (!window.speechSynthesis.speaking) { 622 | clearInterval(iosInterval); 623 | return; 624 | } 625 | // 暂停再恢复可以防止iOS上的截断问题 626 | window.speechSynthesis.pause(); 627 | setTimeout(() => window.speechSynthesis.resume(), 50); 628 | }, 5000); 629 | } 630 | } catch (e) { 631 | addLog(`iOS语音合成异常: ${e.message}`); 632 | clearInterval(speakingInterval); 633 | updateWaveActivityLevel(0); 634 | resolve(); 635 | } 636 | }, 300); // 延迟300ms执行 637 | 638 | } else { 639 | // 非iOS设备的处理 640 | try { 641 | // 在非iOS设备上直接调用 642 | window.speechSynthesis.cancel(); // 取消任何现有的语音 643 | window.speechSynthesis.speak(utterance); 644 | } catch (e) { 645 | addLog(`语音合成异常: ${e.message}`); 646 | clearInterval(speakingInterval); 647 | updateWaveActivityLevel(0); 648 | resolve(); 649 | } 650 | } 651 | } else { 652 | addLog("没有语音合成API"); 653 | // 如果没有语音合成API,模拟延迟 654 | setTimeout(() => { 655 | clearInterval(speakingInterval); 656 | updateWaveActivityLevel(0); 657 | resolve(); 658 | }, 2000); 659 | } 660 | }); 661 | } 662 | 663 | // 启动对话 664 | async function startConversation() { 665 | try { 666 | // 初始化音频上下文 667 | if (!audioContext) { 668 | audioContext = new (window.AudioContext || window.webkitAudioContext)(); 669 | addLog("音频上下文已初始化"); 670 | 671 | // iOS需要在用户交互中解锁AudioContext 672 | if (audioContext.state === 'suspended') { 673 | audioContext.resume(); 674 | } 675 | } 676 | 677 | // 初始化语音合成 678 | if ('speechSynthesis' in window) { 679 | // 在用户交互时预热语音合成引擎 680 | window.speechSynthesis.cancel(); 681 | const utterance = new SpeechSynthesisUtterance(''); 682 | window.speechSynthesis.speak(utterance); 683 | } 684 | 685 | // 初始化状态显示 686 | updateProcessingStatus('initializing'); 687 | 688 | // 初始化VAD 689 | await initVAD(); 690 | 691 | // 更新UI状态 692 | document.getElementById('startBtn').disabled = true; 693 | document.getElementById('stopBtn').disabled = false; 694 | updateProcessingStatus('listening'); 695 | 696 | // 添加欢迎消息 697 | addConversation('ai', '您好!我是智能语音助手,请开始说话...'); 698 | } catch (error) { 699 | console.error("启动失败:", error); 700 | addLog(`错误: ${error.message}`); 701 | updateProcessingStatus('error'); 702 | showError(`启动失败: ${error.message}`); 703 | } 704 | } 705 | 706 | // 停止对话 707 | async function stopConversation() { 708 | try { 709 | updateProcessingStatus('stopping'); 710 | 711 | if (myvad) { 712 | // 不同VAD实现可能有不同方法 713 | if (typeof myvad.destroy === 'function') { 714 | await myvad.destroy(); 715 | } else if (typeof myvad.stop === 'function') { 716 | await myvad.stop(); 717 | } 718 | addLog("VAD已停止"); 719 | } 720 | 721 | if (audioContext && audioContext.state !== 'closed') { 722 | await audioContext.close(); 723 | addLog("音频上下文已关闭"); 724 | } 725 | 726 | // 重置状态 727 | myvad = null; 728 | audioContext = null; 729 | isProcessing = false; 730 | isVADPaused = false; 731 | 732 | // 更新UI 733 | document.getElementById('startBtn').disabled = false; 734 | document.getElementById('stopBtn').disabled = true; 735 | updateProcessingStatus('stopped'); 736 | 737 | // 添加结束消息 738 | addConversation('ai', '对话已结束。如需继续,请点击"启动对话"按钮。'); 739 | } catch (error) { 740 | console.error("停止时出错:", error); 741 | addLog(`错误: ${error.message}`); 742 | showError(`停止失败: ${error.message}`); 743 | } 744 | } 745 | 746 | // 更新状态显示 747 | function updateStatus(text, state) { 748 | const statusElement = document.getElementById('status'); 749 | const indicator = document.getElementById('statusIndicator'); 750 | 751 | statusElement.textContent = text; 752 | 753 | // 移除所有状态类 754 | statusElement.className = 'status'; 755 | indicator.className = 'status-indicator'; 756 | 757 | // 添加新状态类 758 | if (state) { 759 | statusElement.classList.add(state); 760 | indicator.classList.add(state); 761 | } 762 | } 763 | 764 | // 添加日志条目 765 | function addLog(message) { 766 | const logContent = document.getElementById('logContent'); 767 | const now = new Date(); 768 | const timeString = now.toLocaleTimeString(); 769 | 770 | const logEntry = document.createElement('div'); 771 | logEntry.className = 'log-entry'; 772 | 773 | const timeSpan = document.createElement('span'); 774 | timeSpan.className = 'log-time'; 775 | timeSpan.textContent = timeString; 776 | 777 | const messageSpan = document.createElement('span'); 778 | messageSpan.textContent = message; 779 | 780 | logEntry.appendChild(timeSpan); 781 | logEntry.appendChild(messageSpan); 782 | logContent.appendChild(logEntry); 783 | logContent.scrollTop = logContent.scrollHeight; 784 | } 785 | 786 | // 添加对话消息 787 | function addConversation(speaker, message) { 788 | const conversationContent = document.getElementById('conversationContent'); 789 | const messageDiv = document.createElement('div'); 790 | messageDiv.className = speaker === 'user' ? 'user-message message' : 'ai-message message'; 791 | 792 | const senderDiv = document.createElement('div'); 793 | senderDiv.className = 'message-sender'; 794 | senderDiv.textContent = speaker === 'user' ? '你说' : 'AI助手'; 795 | 796 | const textDiv = document.createElement('div'); 797 | textDiv.textContent = message; 798 | 799 | messageDiv.appendChild(senderDiv); 800 | messageDiv.appendChild(textDiv); 801 | conversationContent.appendChild(messageDiv); 802 | conversationContent.scrollTop = conversationContent.scrollHeight; 803 | } 804 | 805 | // 页面卸载时清理 806 | window.addEventListener('beforeunload', () => { 807 | if (myvad || audioContext) { 808 | stopConversation(); 809 | } 810 | if (waveInterval) { 811 | clearInterval(waveInterval); 812 | } 813 | }); 814 | 815 | // 添加简单的错误处理函数 816 | function hideError() { 817 | const errorElement = document.getElementById('errorMsg'); 818 | if (errorElement) { 819 | errorElement.style.display = 'none'; 820 | errorElement.textContent = ''; 821 | } 822 | } 823 | 824 | function showError(message) { 825 | const errorElement = document.getElementById('errorMsg'); 826 | if (errorElement) { 827 | errorElement.textContent = message; 828 | errorElement.style.display = 'block'; 829 | } else { 830 | // 如果元素不存在,使用日志记录错误 831 | addLog(`错误: ${message}`); 832 | } 833 | } 834 | 835 | // 清除对话历史 836 | async function clearChatHistory() { 837 | try { 838 | updateStatus("清除历史中...", "processing"); 839 | 840 | // 发送清除历史的请求 841 | const response = await fetch('/clear_history', { 842 | method: 'POST' 843 | }); 844 | 845 | if (!response.ok) { 846 | throw new Error(`HTTP错误! 状态: ${response.status}`); 847 | } 848 | 849 | const result = await response.json(); 850 | addLog(`清除历史结果: ${result.message}`); 851 | 852 | // 清除界面上的对话记录 853 | const conversationContent = document.getElementById('conversationContent'); 854 | conversationContent.innerHTML = ''; 855 | 856 | // 添加系统消息 857 | addConversation('ai', '对话历史已清除,可以开始新的对话了。'); 858 | 859 | updateStatus("历史已清除", ""); 860 | setTimeout(() => updateStatus("等待语音输入...", "listening"), 1500); 861 | 862 | } catch (error) { 863 | console.error("清除历史时出错:", error); 864 | addLog(`错误: ${error.message}`); 865 | showError(`清除历史失败: ${error.message}`); 866 | updateStatus("清除历史失败", "error"); 867 | } 868 | } --------------------------------------------------------------------------------