├── .gitignore ├── LICENSE ├── README.md ├── app ├── app.js ├── assets │ ├── input.wav │ └── output.wav ├── configs │ ├── common.res │ ├── kobe.pmdl │ ├── kobe.wav │ ├── mamba.pmdl │ └── volute.pmdl ├── services │ ├── snowboy.service.js │ ├── tulingbot.service.js │ ├── xunfeiiat.service.js │ └── xunfeitts.service.js └── utils │ └── auth.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | node_modules/ 3 | images/ 4 | .vscode/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 null仔 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## volute 是什么? 2 | 3 | > volute(蜗壳)是一个使用 Raspberry Pi+Node.js 制作的语音助手. 4 | 5 | ## 什么是树莓派? 6 | 7 | ![raspberry-pi](https://s1.ax1x.com/2020/10/22/BiTO76.png) 8 | 9 | ![raspberry-pi-4](https://s1.ax1x.com/2020/10/22/Bi7lBq.png) 10 | 11 | 树莓派(英语:Raspberry Pi)是基于 Linux 的单片机电脑,由英国树莓派基金会开发,目的是以低价硬件及自由软件促进学校的基本计算机科学教育。 12 | 13 | 树莓派每一代均使用博通(Broadcom)出产的 ARM 架构处理器,如今生产的机型内存在 2GB 和 8GB 之间,主要使用 SD 卡或者 TF 卡作为存储媒体,配备 USB 接口、HDMI 的视频输出(支持声音输出)和 RCA 端子输出,内置 Ethernet/WLAN/Bluetooth 网络链接的方式(依据型号决定),并且可使用多种操作系统。产品线型号分为 A 型、B 型、Zero 型和 ComputeModule 计算卡。 14 | 15 | > 简单的说,这是一台可以放到口袋里的电脑!! 16 | 17 | ## 什么是 Node.js? 18 | 19 | ![node-js](https://s1.ax1x.com/2020/10/22/Bi7T58.jpg) 20 | 21 | > Node.js 是一个能执行 Javascript 的环境,一个事件驱动 I/O 的 Javascript 环境,基于 Google 的 V8 引擎. 22 | 23 | ## 什么是人机对话系统 ? 24 | 25 | ![chatbot](https://s1.ax1x.com/2020/10/22/Biqiu9.png) 26 | 27 | > 人机对话(Human-Machine Conversation)是指让机器理解和运用自然语言实现人机通信的技术。 28 | 29 | 对话系统大致可分为 5 个基本模块:语音识别(ASR)、自然语音理解(NLU)、对话管理(DM)、自然语言生成(NLG)、语音合成(TTS)。 30 | 31 | - 语音识别(ASR):完成语音到文本的转换,将用户说话的声音转化为语音。 32 | - 自然语言理解(NLU):完成对文本的语义解析,提取关键信息,进行意图识别与实体识别。 33 | - 对话管理(DM):负责对话状态维护、数据库查询、上下文管理等。 34 | - 自然语言生成(NLG):生成相应的自然语言文本。 35 | - 语音合成(TTS):将生成的文本转换为语音。 36 | 37 | ## 材料准备 38 | 39 | - 树莓派 4B 主板 40 | - 树莓派 5V3A TYPE C 接口 41 | - 微型 USB 麦克风 42 | - 迷你音箱 43 | - 16G TF 卡 44 | - 川宇读卡器 45 | - 杜邦线,外壳,散热片... 46 | 47 | ![material](https://s1.ax1x.com/2020/10/22/BiDGbn.jpg) 48 | 49 | ## 树莓派系统安装及基础配置 50 | 51 | 新的树莓派不像你买的 Macbook Pro 一样开机就能用 🐶,想要顺利体验树莓派,还得一步一步来~ 52 | 53 | ### 烧录操作系统 54 | 55 | 树莓派没有硬盘结构,仅有一个 micro SD 卡插槽用于存储,因此要把操作系统装到 micro SD 卡中。 56 | 57 | 树莓派支持许多操作系统,这里选择的是官方推荐的 Raspbian,这是一款基于 Debian Linux 的树莓派专用系统,适用于树莓派所有的型号。 58 | 59 | 安装系统我用的是 Raspberry Pi Imager 工具为树莓派烧录系统镜像。 60 | 61 | ![imager](https://s1.ax1x.com/2020/10/22/BF0X5V.png) 62 | 63 | ### 基础配置 64 | 65 | 要对树莓派进行配置,首先要启动系统(我们安装的是系统镜像,可免安装直接进入),然后将树莓派连接显示器即可看到系统桌面,我这里使用的是另一种方法: 66 | 67 | - 使用 IP Scanner 工具 扫描出 Raspberry Pi 的 IP 68 | 69 | ![ip-scanner](https://s1.ax1x.com/2020/10/22/BkEXT0.png) 70 | 71 | - 扫描出 IP 后使用 VNC Viewer 工具 连接进系统 72 | 73 | ![vnc-viewer](https://s1.ax1x.com/2020/10/22/BkEcyd.png) 74 | 75 | - 也可以直接 ssh 连接,然后通过 raspi-config 命令进行配置 76 | 77 | ![ssh-pi](https://s1.ax1x.com/2020/10/22/BkV6hT.png) 78 | 79 | - 配置网络/分辨率/语言/输入输出音频等参数 80 | 81 | ![asound](https://s1.ax1x.com/2020/10/22/BkeLmd.png) 82 | 83 | ## volute 实现思路 84 | 85 | ![volute](https://s1.ax1x.com/2020/10/22/BiDrr9.png) 86 | 87 | ### 任务调度服务 88 | 89 | ```js 90 | const fs = require("fs"); 91 | const path = require("path"); 92 | const Speaker = require("speaker"); 93 | const { record } = require("node-record-lpcm16"); 94 | const XunFeiIAT = require("./services/xunfeiiat.service"); 95 | const XunFeiTTS = require("./services/xunfeitts.service"); 96 | const initSnowboy = require("./services/snowboy.service"); 97 | const TulingBotService = require("./services/tulingbot.service"); 98 | // 任务调度服务 99 | const taskScheduling = { 100 | // 麦克风 101 | mic: null, 102 | speaker: null, 103 | detector: null, 104 | // 音频输入流 105 | inputStream: null, 106 | // 音頻輸出流 107 | outputStream: null, 108 | init() { 109 | // 初始化snowboy 110 | this.detector = initSnowboy({ 111 | record: this.recordSound.bind(this), 112 | stopRecord: this.stopRecord.bind(this), 113 | }); 114 | // 管道流,将麦克风接收到的流传递给snowboy 115 | this.mic.pipe(this.detector); 116 | }, 117 | start() { 118 | // 监听麦克风输入流 119 | this.mic = record({ 120 | sampleRate: 16000, // 采样率 121 | threshold: 0.5, 122 | verbose: true, 123 | recordProgram: "arecord", 124 | }).stream(); 125 | this.init(); 126 | }, 127 | // 记录音频输入 128 | recordSound() { 129 | // 每次记录前,先停止上次未播放完成的输出流 130 | this.stopSpeak(); 131 | console.log("start record"); 132 | // 创建可写流 133 | this.inputStream = fs.createWriteStream( 134 | path.resolve(__dirname, "./assets/input.wav"), 135 | { 136 | encoding: "binary", 137 | } 138 | ); 139 | // 管道流,将麦克风接受到的输入流 传递给 创建的可写流 140 | this.mic.pipe(this.inputStream); 141 | }, 142 | // 停止音频输入 143 | stopRecord() { 144 | if (this.inputStream) { 145 | console.log("stop record"); 146 | // 解绑this.mac绑定的管道流 147 | this.mic.unpipe(this.inputStream); 148 | this.mic.unpipe(this.detector); 149 | process.nextTick(() => { 150 | // 销毁输入流 151 | this.inputStream.destroy(); 152 | this.inputStream = null; 153 | // 重新初始化 154 | this.init(); 155 | // 调用语音听写服务 156 | this.speech2Text(); 157 | }); 158 | } 159 | }, 160 | // speech to text 161 | speech2Text() { 162 | // 实例化 语音听写服务 163 | const iatService = new XunFeiIAT({ 164 | onReply: (msg) => { 165 | console.log("msg", msg); 166 | // 回调,调用聊天功能 167 | this.onChat(msg); 168 | }, 169 | }); 170 | iatService.init(); 171 | }, 172 | // 聊天->图灵机器人 173 | onChat(text) { 174 | // 实例化聊天机器人 175 | TulingBotService.start(text).then((res) => { 176 | console.log(res); 177 | // 接收到聊天消息,调用语音合成服务 178 | this.text2Speech(res); 179 | }); 180 | }, 181 | // text to speech 182 | text2Speech(text) { 183 | // 实例化 语音合成服务 184 | const ttsService = new XunFeiTTS({ 185 | text, 186 | onDone: () => { 187 | console.log("onDone"); 188 | this.onSpeak(); 189 | }, 190 | }); 191 | ttsService.init(); 192 | }, 193 | // 播放,音频输出 194 | onSpeak() { 195 | // 实例化speaker,用于播放语音 196 | this.speaker = new Speaker({ 197 | channels: 1, 198 | bitDepth: 16, 199 | sampleRate: 16000, 200 | }); 201 | // 创建可读流 202 | this.outputStream = fs.createReadStream( 203 | path.resolve(__dirname, "./assets/output.pcm") 204 | ); 205 | // this is just to activate the speaker, 2s delay 206 | this.speaker.write(Buffer.alloc(32000, 10)); 207 | // 管道流,将输出流传递给speaker进行播放 208 | this.outputStream.pipe(this.speaker); 209 | this.outputStream.on("end", () => { 210 | this.outputStream = null; 211 | this.speaker = null; 212 | }); 213 | }, 214 | // 停止播放 215 | stopSpeak() { 216 | this.outputStream && this.outputStream.unpipe(this.speaker); 217 | }, 218 | }; 219 | taskScheduling.start(); 220 | ``` 221 | 222 | ### 热词唤醒 Snowboy 223 | 224 | 语音助手需要像市面上的设备一样,需要唤醒。 如果没有唤醒步骤,一直做监听的话,对存储资源和网络连接的需求是非常大的。 225 | 226 | Snowboy 是一款高度可定制的唤醒词检测引擎(Hotwords Detection Library),可以用于实时嵌入式系统,通过训练热词之后,可以离线运行,并且 功耗很低。当前,它可以运行在 Raspberry Pi、(Ubuntu)Linux 和 Mac OS X 系统上。 227 | 228 | ![snowboy](https://s1.ax1x.com/2020/10/22/BirEzF.jpg) 229 | 230 | ```js 231 | const path = require("path"); 232 | const snowboy = require("snowboy"); 233 | const models = new snowboy.Models(); 234 | 235 | // 添加训练模型 236 | models.add({ 237 | file: path.resolve(__dirname, "../configs/volute.pmdl"), 238 | sensitivity: "0.5", 239 | hotwords: "volute", 240 | }); 241 | 242 | // 初始化 Detector 对象 243 | const detector = new snowboy.Detector({ 244 | resource: path.resolve(__dirname, "../configs/common.res"), 245 | models: models, 246 | audioGain: 1.0, 247 | applyFrontend: false, 248 | }); 249 | 250 | /** 251 | * 初始化 initSnowboy 252 | * 实现思路: 253 | * 1. 监听到热词,进行唤醒,开始录音 254 | * 2. 录音期间,有声音时,重置silenceCount参数 255 | * 3. 录音期间,未接受到声音时,对silenceCount进行累加,当累加值大于3时,停止录音 256 | */ 257 | function initSnowboy({ record, stopRecord }) { 258 | const MAX_SILENCE_COUNT = 3; 259 | let silenceCount = 0, 260 | speaking = false; 261 | /** 262 | * silence事件回调,没声音时触发 263 | */ 264 | const onSilence = () => { 265 | console.log("silence"); 266 | if (speaking && ++silenceCount > MAX_SILENCE_COUNT) { 267 | speaking = false; 268 | stopRecord && stopRecord(); 269 | detector.off("silence", onSilence); 270 | detector.off("sound", onSound); 271 | detector.off("hotword", onHotword); 272 | } 273 | }; 274 | /** 275 | * sound事件回调,有声音时触发 276 | */ 277 | const onSound = () => { 278 | console.log("sound"); 279 | if (speaking) { 280 | silenceCount = 0; 281 | } 282 | }; 283 | /** 284 | * hotword事件回调,监听到热词时触发 285 | */ 286 | const onHotword = (index, hotword, buffer) => { 287 | if (!speaking) { 288 | silenceCount = 0; 289 | speaking = true; 290 | record && record(); 291 | } 292 | }; 293 | detector.on("silence", onSilence); 294 | detector.on("sound", onSound); 295 | detector.on("hotword", onHotword); 296 | return detector; 297 | } 298 | 299 | module.exports = initSnowboy; 300 | ``` 301 | 302 | ### 语音听写 科大讯飞 API 303 | 304 | 语音转文字使用的是讯飞开放平台的语音听写服务.它可以将短音频(≤60 秒)精准识别成文字,除中文普通话和英文外,支持 25 种方言和 12 个语种,实时返回结果,达到边说边返回的效果。 305 | 306 | ```js 307 | require("dotenv").config(); 308 | const fs = require("fs"); 309 | const WebSocket = require("ws"); 310 | const { resolve } = require("path"); 311 | const { createAuthParams } = require("../utils/auth"); 312 | 313 | class XunFeiIAT { 314 | constructor({ onReply }) { 315 | super(); 316 | // websocket 连接 317 | this.ws = null; 318 | // 返回结果,解析后的消息文字 319 | this.message = ""; 320 | this.onReply = onReply; 321 | // 需要进行转换的输入流 语音文件 322 | this.inputFile = resolve(__dirname, "../assets/input.wav"); 323 | // 接口 入参 324 | this.params = { 325 | host: "iat-api.xfyun.cn", 326 | path: "/v2/iat", 327 | apiKey: process.env.XUNFEI_API_KEY, 328 | secret: process.env.XUNFEI_SECRET, 329 | }; 330 | } 331 | // 生成websocket连接 332 | generateWsUrl() { 333 | const { host, path } = this.params; 334 | // 接口鉴权,参数加密 335 | const params = createAuthParams(this.params); 336 | return `ws://${host}${path}?${params}`; 337 | } 338 | // 初始化 339 | init() { 340 | const reqUrl = this.generateWsUrl(); 341 | this.ws = new WebSocket(reqUrl); 342 | this.initWsEvent(); 343 | } 344 | // 初始化websocket事件 345 | initWsEvent() { 346 | this.ws.on("open", this.onOpen.bind(this)); 347 | this.ws.on("error", this.onError); 348 | this.ws.on("close", this.onClose); 349 | this.ws.on("message", this.onMessage.bind(this)); 350 | } 351 | /** 352 | * websocket open事件,触发表示已成功建立连接 353 | */ 354 | onOpen() { 355 | console.log("open"); 356 | this.onPush(this.inputFile); 357 | } 358 | onPush(file) { 359 | this.pushAudioFile(file); 360 | } 361 | // websocket 消息接收 回调 362 | onMessage(data) { 363 | const payload = JSON.parse(data); 364 | if (payload.data && payload.data.result) { 365 | // 拼接消息结果 366 | this.message += payload.data.result.ws.reduce( 367 | (acc, item) => acc + item.cw.map((cw) => cw.w), 368 | "" 369 | ); 370 | // status 2表示结束 371 | if (payload.data.status === 2) { 372 | this.onReply(this.message); 373 | } 374 | } 375 | } 376 | // websocket 关闭事件 377 | onClose() { 378 | console.log("close"); 379 | } 380 | // websocket 错误事件 381 | onError(error) { 382 | console.log(error); 383 | } 384 | /** 385 | * 解析语音文件,将语音以二进制流的形式传送给后端 386 | */ 387 | pushAudioFile(audioFile) { 388 | this.message = ""; 389 | // 发送需要的载体参数 390 | const audioPayload = (statusCode, audioBase64) => ({ 391 | common: 392 | statusCode === 0 393 | ? { 394 | app_id: "5f6cab72", 395 | } 396 | : undefined, 397 | business: 398 | statusCode === 0 399 | ? { 400 | language: "zh_cn", 401 | domain: "iat", 402 | ptt: 0, 403 | } 404 | : undefined, 405 | data: { 406 | status: statusCode, 407 | format: "audio/L16;rate=16000", 408 | encoding: "raw", 409 | audio: audioBase64, 410 | }, 411 | }); 412 | const chunkSize = 9000; 413 | // 创建buffer,用于存储二进制数据 414 | const buffer = Buffer.alloc(chunkSize); 415 | // 打开语音文件 416 | fs.open(audioFile, "r", (err, fd) => { 417 | if (err) { 418 | throw err; 419 | } 420 | 421 | let i = 0; 422 | // 以二进制流的形式递归发送 423 | function readNextChunk() { 424 | fs.read(fd, buffer, 0, chunkSize, null, (errr, nread) => { 425 | if (errr) { 426 | throw errr; 427 | } 428 | // nread表示文件流已读完,发送传输结束标识(status=2) 429 | if (nread === 0) { 430 | this.ws.send( 431 | JSON.stringify({ 432 | data: { status: 2 }, 433 | }) 434 | ); 435 | 436 | return fs.close(fd, (err) => { 437 | if (err) { 438 | throw err; 439 | } 440 | }); 441 | } 442 | 443 | let data; 444 | if (nread < chunkSize) { 445 | data = buffer.slice(0, nread); 446 | } else { 447 | data = buffer; 448 | } 449 | 450 | const audioBase64 = data.toString("base64"); 451 | const payload = audioPayload(i >= 1 ? 1 : 0, audioBase64); 452 | this.ws.send(JSON.stringify(payload)); 453 | i++; 454 | readNextChunk.call(this); 455 | }); 456 | } 457 | 458 | readNextChunk.call(this); 459 | }); 460 | } 461 | } 462 | 463 | module.exports = XunFeiIAT; 464 | ``` 465 | 466 | ### 聊天机器人 图灵机器人 API 467 | 468 | 图灵机器人 API V2.0 是基于图灵机器人平台语义理解、深度学习等核心技术,为广大开发者和企业提供的在线服务和开发接口。 469 | 470 | 目前 API 接口可调用聊天对话、语料库、技能三大模块的语料: 471 | 472 | 聊天对话是指平台免费提供的近 10 亿条公有对话语料,满足用户对话娱乐需求; 473 | 474 | 语料库是指用户在平台上传的私有语料,仅供个人查看使用,帮助用户最便捷的搭建专业领域次的语料。 475 | 476 | 技能服务是指平台打包的 26 种实用服务技能。涵盖生活、出行、购物等多个领域,一站式满足用户需求。 477 | 478 | ```js 479 | require("dotenv").config(); 480 | const axios = require("axios"); 481 | 482 | // 太简单了..懒得解释 🐶 483 | 484 | const TulingBotService = { 485 | requestUrl: "http://openapi.tuling123.com/openapi/api/v2", 486 | start(text) { 487 | return new Promise((resolve) => { 488 | axios 489 | .post(this.requestUrl, { 490 | reqType: 0, 491 | perception: { 492 | inputText: { 493 | text, 494 | }, 495 | }, 496 | userInfo: { 497 | apiKey: process.env.TULING_BOT_API_KEY, 498 | userId: process.env.TULING_BOT_USER_ID, 499 | }, 500 | }) 501 | .then((res) => { 502 | // console.log(JSON.stringify(res.data, null, 2)); 503 | resolve(res.data.results[0].values.text); 504 | }); 505 | }); 506 | }, 507 | }; 508 | 509 | module.exports = TulingBotService; 510 | ``` 511 | 512 | ### 语音合成 科大讯飞 API 513 | 514 | 语音合成流式接口将文字信息转化为声音信息,同时提供了众多极具特色的发音人(音库)供您选择。 515 | 516 | 该语音能力是通过 Websocket API 的方式给开发者提供一个通用的接口。Websocket API 具备流式传输能力,适用于需要流式数据传输的 AI 服务场景。相较于 SDK,API 具有轻量、跨语言的特点;相较于 HTTP API,Websocket API 协议有原生支持跨域的优势。 517 | 518 | ```js 519 | require("dotenv").config(); 520 | const fs = require("fs"); 521 | const WebSocket = require("ws"); 522 | const { resolve } = require("path"); 523 | const { createAuthParams } = require("../utils/auth"); 524 | 525 | class XunFeiTTS { 526 | constructor({ text, onDone }) { 527 | super(); 528 | this.ws = null; 529 | // 要转换的文字 530 | this.text = text; 531 | this.onDone = onDone; 532 | // 转换后的语音文件 533 | this.outputFile = resolve(__dirname, "../assets/output.pcm"); 534 | // 接口入参 535 | this.params = { 536 | host: "tts-api.xfyun.cn", 537 | path: "/v2/tts", 538 | appid: process.env.XUNFEI_APP_ID, 539 | apiKey: process.env.XUNFEI_API_KEY, 540 | secret: process.env.XUNFEI_SECRET, 541 | }; 542 | } 543 | // 生成websocket连接 544 | generateWsUrl() { 545 | const { host, path } = this.params; 546 | const params = createAuthParams(this.params); 547 | return `ws://${host}${path}?${params}`; 548 | } 549 | // 初始化 550 | init() { 551 | const reqUrl = this.generateWsUrl(); 552 | console.log(reqUrl); 553 | this.ws = new WebSocket(reqUrl); 554 | this.initWsEvent(); 555 | } 556 | // 初始化websocket事件 557 | initWsEvent() { 558 | this.ws.on("open", this.onOpen.bind(this)); 559 | this.ws.on("error", this.onError); 560 | this.ws.on("close", this.onClose); 561 | this.ws.on("message", this.onMessage.bind(this)); 562 | } 563 | /** 564 | * websocket open事件,触发表示已成功建立连接 565 | */ 566 | onOpen() { 567 | console.log("open"); 568 | this.onSend(); 569 | if (fs.existsSync(this.outputFile)) { 570 | fs.unlinkSync(this.outputFile); 571 | } 572 | } 573 | // 发送要转换的参数信息 574 | onSend() { 575 | const frame = { 576 | // 填充common 577 | common: { 578 | app_id: this.params.appid, 579 | }, 580 | // 填充business 581 | business: { 582 | aue: "raw", 583 | auf: "audio/L16;rate=16000", 584 | vcn: "xiaoyan", 585 | tte: "UTF8", 586 | }, 587 | // 填充data 588 | data: { 589 | text: Buffer.from(this.text).toString("base64"), 590 | status: 2, 591 | }, 592 | }; 593 | this.ws.send(JSON.stringify(frame)); 594 | } 595 | // 保存转换后的语音结果 596 | onSave(data) { 597 | fs.writeFileSync(this.outputFile, data, { flag: "a" }); 598 | } 599 | // websocket 消息接收 回调 600 | onMessage(data, err) { 601 | if (err) return; 602 | const res = JSON.parse(data); 603 | if (res.code !== 0) { 604 | this.ws.close(); 605 | return; 606 | } 607 | // 接收消息结果并进行保存 608 | const audio = res.data.audio; 609 | const audioBuf = Buffer.from(audio, "base64"); 610 | this.onSave(audioBuf); 611 | if (res.code == 0 && res.data.status == 2) { 612 | this.ws.close(); 613 | this.onDone(); 614 | } 615 | } 616 | onClose() { 617 | console.log("close"); 618 | } 619 | onError(error) { 620 | console.log(error); 621 | } 622 | } 623 | 624 | module.exports = XunFeiTTS; 625 | ``` 626 | 627 | ## 效果演示 628 | 629 | [语雀-文章最底部可看效果](https://www.yuque.com/docs/share/df7fbb6d-d1ae-45cf-a7db-a37a38bd1e23?#%20%E3%80%8Avolute%E3%80%8B) 630 | 631 | ## 源码地址 632 | 633 | [Github 源码地址](https://github.com/webfansplz/volute) 634 | -------------------------------------------------------------------------------- /app/app.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | const path = require("path"); 3 | const Speaker = require("speaker"); 4 | const { record } = require("node-record-lpcm16"); 5 | const XunFeiIAT = require("./services/xunfeiiat.service"); 6 | const XunFeiTTS = require("./services/xunfeitts.service"); 7 | const initSnowboy = require("./services/snowboy.service"); 8 | const TulingBotService = require("./services/tulingbot.service"); 9 | // 任务调度服务 10 | const taskScheduling = { 11 | // 麦克风 12 | mic: null, 13 | speaker: null, 14 | detector: null, 15 | // 音频输入流 16 | inputStream: null, 17 | // 音頻輸出流 18 | outputStream: null, 19 | init() { 20 | // 初始化snowboy 21 | this.detector = initSnowboy({ 22 | record: this.recordSound.bind(this), 23 | stopRecord: this.stopRecord.bind(this), 24 | }); 25 | // 管道流,将麦克风接收到的流传递给snowboy 26 | this.mic.pipe(this.detector); 27 | }, 28 | start() { 29 | // 监听麦克风输入流 30 | this.mic = record({ 31 | sampleRate: 16000, // 采样率 32 | threshold: 0.5, 33 | verbose: true, 34 | recordProgram: "arecord", 35 | }).stream(); 36 | this.init(); 37 | }, 38 | // 记录音频输入 39 | recordSound() { 40 | // 每次记录前,先停止上次未播放完成的输出流 41 | this.stopSpeak(); 42 | console.log("start record"); 43 | // 创建可写流 44 | this.inputStream = fs.createWriteStream( 45 | path.resolve(__dirname, "./assets/input.wav"), 46 | { 47 | encoding: "binary", 48 | } 49 | ); 50 | // 管道流,将麦克风接受到的输入流 传递给 创建的可写流 51 | this.mic.pipe(this.inputStream); 52 | }, 53 | // 停止音频输入 54 | stopRecord() { 55 | if (this.inputStream) { 56 | console.log("stop record"); 57 | // 解绑this.mac绑定的管道流 58 | this.mic.unpipe(this.inputStream); 59 | this.mic.unpipe(this.detector); 60 | process.nextTick(() => { 61 | // 销毁输入流 62 | this.inputStream.destroy(); 63 | this.inputStream = null; 64 | // 重新初始化 65 | this.init(); 66 | // 调用语音听写服务 67 | this.speech2Text(); 68 | }); 69 | } 70 | }, 71 | // speech to text 72 | speech2Text() { 73 | // 实例化 语音听写服务 74 | const iatService = new XunFeiIAT({ 75 | onReply: (msg) => { 76 | console.log("msg", msg); 77 | // 回调,调用聊天功能 78 | this.onChat(msg); 79 | }, 80 | }); 81 | iatService.init(); 82 | }, 83 | // 聊天->图灵机器人 84 | onChat(text) { 85 | // 实例化聊天机器人 86 | TulingBotService.start(text).then((res) => { 87 | console.log(res); 88 | // 接收到聊天消息,调用语音合成服务 89 | this.text2Speech(res); 90 | }); 91 | }, 92 | // text to speech 93 | text2Speech(text) { 94 | // 实例化 语音合成服务 95 | const ttsService = new XunFeiTTS({ 96 | text, 97 | onDone: () => { 98 | console.log("onDone"); 99 | this.onSpeak(); 100 | }, 101 | }); 102 | ttsService.init(); 103 | }, 104 | // 播放,音频输出 105 | onSpeak() { 106 | // 实例化speaker,用于播放语音 107 | this.speaker = new Speaker({ 108 | channels: 1, 109 | bitDepth: 16, 110 | sampleRate: 16000, 111 | }); 112 | // 创建可读流 113 | this.outputStream = fs.createReadStream( 114 | path.resolve(__dirname, "./assets/output.pcm") 115 | ); 116 | // this is just to activate the speaker, 2s delay 117 | this.speaker.write(Buffer.alloc(32000, 10)); 118 | // 管道流,将输出流传递给speaker进行播放 119 | this.outputStream.pipe(this.speaker); 120 | this.outputStream.on("end", () => { 121 | this.outputStream = null; 122 | this.speaker = null; 123 | }); 124 | }, 125 | // 停止播放 126 | stopSpeak() { 127 | this.outputStream && this.outputStream.unpipe(this.speaker); 128 | }, 129 | }; 130 | taskScheduling.start(); 131 | -------------------------------------------------------------------------------- /app/assets/input.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/assets/input.wav -------------------------------------------------------------------------------- /app/assets/output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/assets/output.wav -------------------------------------------------------------------------------- /app/configs/common.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/common.res -------------------------------------------------------------------------------- /app/configs/kobe.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/kobe.pmdl -------------------------------------------------------------------------------- /app/configs/kobe.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/kobe.wav -------------------------------------------------------------------------------- /app/configs/mamba.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/mamba.pmdl -------------------------------------------------------------------------------- /app/configs/volute.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/volute.pmdl -------------------------------------------------------------------------------- /app/services/snowboy.service.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const snowboy = require("snowboy"); 3 | const models = new snowboy.Models(); 4 | 5 | // 添加训练模型 6 | models.add({ 7 | file: path.resolve(__dirname, "../configs/volute.pmdl"), 8 | sensitivity: "0.5", 9 | hotwords: "volute", 10 | }); 11 | 12 | // 初始化 Detector 对象 13 | const detector = new snowboy.Detector({ 14 | resource: path.resolve(__dirname, "../configs/common.res"), 15 | models: models, 16 | audioGain: 1.0, 17 | applyFrontend: false, 18 | }); 19 | 20 | /** 21 | * 初始化 initSnowboy 22 | * 实现思路: 23 | * 1. 监听到热词,进行唤醒,开始录音 24 | * 2. 录音期间,有声音时,重置silenceCount参数 25 | * 3. 录音期间,未接受到声音时,对silenceCount进行累加,当累加值大于3时,停止录音 26 | */ 27 | function initSnowboy({ record, stopRecord }) { 28 | const MAX_SILENCE_COUNT = 3; 29 | let silenceCount = 0, 30 | speaking = false; 31 | /** 32 | * silence事件回调,没声音时触发 33 | */ 34 | const onSilence = () => { 35 | console.log("silence"); 36 | if (speaking && ++silenceCount > MAX_SILENCE_COUNT) { 37 | speaking = false; 38 | stopRecord && stopRecord(); 39 | detector.off("silence", onSilence); 40 | detector.off("sound", onSound); 41 | detector.off("hotword", onHotword); 42 | } 43 | }; 44 | /** 45 | * sound事件回调,有声音时触发 46 | */ 47 | const onSound = () => { 48 | console.log("sound"); 49 | if (speaking) { 50 | silenceCount = 0; 51 | } 52 | }; 53 | /** 54 | * hotword事件回调,监听到热词时触发 55 | */ 56 | const onHotword = (index, hotword, buffer) => { 57 | if (!speaking) { 58 | silenceCount = 0; 59 | speaking = true; 60 | record && record(); 61 | } 62 | }; 63 | detector.on("silence", onSilence); 64 | detector.on("sound", onSound); 65 | detector.on("hotword", onHotword); 66 | return detector; 67 | } 68 | 69 | module.exports = initSnowboy; 70 | -------------------------------------------------------------------------------- /app/services/tulingbot.service.js: -------------------------------------------------------------------------------- 1 | require("dotenv").config(); 2 | const axios = require("axios"); 3 | 4 | // 太简单了..懒得解释 🐶 5 | 6 | const TulingBotService = { 7 | requestUrl: "http://openapi.tuling123.com/openapi/api/v2", 8 | start(text) { 9 | return new Promise((resolve) => { 10 | axios 11 | .post(this.requestUrl, { 12 | reqType: 0, 13 | perception: { 14 | inputText: { 15 | text, 16 | }, 17 | }, 18 | userInfo: { 19 | apiKey: process.env.TULING_BOT_API_KEY, 20 | userId: process.env.TULING_BOT_USER_ID, 21 | }, 22 | }) 23 | .then((res) => { 24 | // console.log(JSON.stringify(res.data, null, 2)); 25 | resolve(res.data.results[0].values.text); 26 | }); 27 | }); 28 | }, 29 | }; 30 | 31 | module.exports = TulingBotService; 32 | -------------------------------------------------------------------------------- /app/services/xunfeiiat.service.js: -------------------------------------------------------------------------------- 1 | require("dotenv").config(); 2 | const fs = require("fs"); 3 | const WebSocket = require("ws"); 4 | const { resolve } = require("path"); 5 | const { createAuthParams } = require("../utils/auth"); 6 | 7 | class XunFeiIAT { 8 | constructor({ onReply }) { 9 | // websocket 连接 10 | this.ws = null; 11 | // 返回结果,解析后的消息文字 12 | this.message = ""; 13 | this.onReply = onReply; 14 | // 需要进行转换的输入流 语音文件 15 | this.inputFile = resolve(__dirname, "../assets/input.wav"); 16 | // 接口 入参 17 | this.params = { 18 | host: "iat-api.xfyun.cn", 19 | path: "/v2/iat", 20 | apiKey: process.env.XUNFEI_API_KEY, 21 | secret: process.env.XUNFEI_SECRET, 22 | }; 23 | } 24 | // 生成websocket连接 25 | generateWsUrl() { 26 | const { host, path } = this.params; 27 | // 接口鉴权,参数加密 28 | const params = createAuthParams(this.params); 29 | return `ws://${host}${path}?${params}`; 30 | } 31 | // 初始化 32 | init() { 33 | const reqUrl = this.generateWsUrl(); 34 | this.ws = new WebSocket(reqUrl); 35 | this.initWsEvent(); 36 | } 37 | // 初始化websocket事件 38 | initWsEvent() { 39 | this.ws.on("open", this.onOpen.bind(this)); 40 | this.ws.on("error", this.onError); 41 | this.ws.on("close", this.onClose); 42 | this.ws.on("message", this.onMessage.bind(this)); 43 | } 44 | /** 45 | * websocket open事件,触发表示已成功建立连接 46 | */ 47 | onOpen() { 48 | console.log("open"); 49 | this.onPush(this.inputFile); 50 | } 51 | onPush(file) { 52 | this.pushAudioFile(file); 53 | } 54 | // websocket 消息接收 回调 55 | onMessage(data) { 56 | const payload = JSON.parse(data); 57 | if (payload.data && payload.data.result) { 58 | // 拼接消息结果 59 | this.message += payload.data.result.ws.reduce( 60 | (acc, item) => acc + item.cw.map((cw) => cw.w), 61 | "" 62 | ); 63 | // status 2表示结束 64 | if (payload.data.status === 2) { 65 | this.onReply(this.message); 66 | } 67 | } 68 | } 69 | // websocket 关闭事件 70 | onClose() { 71 | console.log("close"); 72 | } 73 | // websocket 错误事件 74 | onError(error) { 75 | console.log(error); 76 | } 77 | /** 78 | * 解析语音文件,将语音以二进制流的形式传送给后端 79 | */ 80 | pushAudioFile(audioFile) { 81 | this.message = ""; 82 | // 发送需要的载体参数 83 | const audioPayload = (statusCode, audioBase64) => ({ 84 | common: 85 | statusCode === 0 86 | ? { 87 | app_id: "5f6cab72", 88 | } 89 | : undefined, 90 | business: 91 | statusCode === 0 92 | ? { 93 | language: "zh_cn", 94 | domain: "iat", 95 | ptt: 0, 96 | } 97 | : undefined, 98 | data: { 99 | status: statusCode, 100 | format: "audio/L16;rate=16000", 101 | encoding: "raw", 102 | audio: audioBase64, 103 | }, 104 | }); 105 | const chunkSize = 9000; 106 | // 创建buffer,用于存储二进制数据 107 | const buffer = Buffer.alloc(chunkSize); 108 | // 打开语音文件 109 | fs.open(audioFile, "r", (err, fd) => { 110 | if (err) { 111 | throw err; 112 | } 113 | 114 | let i = 0; 115 | // 以二进制流的形式递归发送 116 | function readNextChunk() { 117 | fs.read(fd, buffer, 0, chunkSize, null, (errr, nread) => { 118 | if (errr) { 119 | throw errr; 120 | } 121 | // nread表示文件流已读完,发送传输结束标识(status=2) 122 | if (nread === 0) { 123 | this.ws.send( 124 | JSON.stringify({ 125 | data: { status: 2 }, 126 | }) 127 | ); 128 | 129 | return fs.close(fd, (err) => { 130 | if (err) { 131 | throw err; 132 | } 133 | }); 134 | } 135 | 136 | let data; 137 | if (nread < chunkSize) { 138 | data = buffer.slice(0, nread); 139 | } else { 140 | data = buffer; 141 | } 142 | 143 | const audioBase64 = data.toString("base64"); 144 | const payload = audioPayload(i >= 1 ? 1 : 0, audioBase64); 145 | this.ws.send(JSON.stringify(payload)); 146 | i++; 147 | readNextChunk.call(this); 148 | }); 149 | } 150 | 151 | readNextChunk.call(this); 152 | }); 153 | } 154 | } 155 | 156 | module.exports = XunFeiIAT; 157 | -------------------------------------------------------------------------------- /app/services/xunfeitts.service.js: -------------------------------------------------------------------------------- 1 | require("dotenv").config(); 2 | const fs = require("fs"); 3 | const WebSocket = require("ws"); 4 | const { resolve } = require("path"); 5 | const { createAuthParams } = require("../utils/auth"); 6 | 7 | class XunFeiTTS { 8 | constructor({ text, onDone }) { 9 | this.ws = null; 10 | // 要转换的文字 11 | this.text = text; 12 | this.onDone = onDone; 13 | // 转换后的语音文件 14 | this.outputFile = resolve(__dirname, "../assets/output.pcm"); 15 | // 接口入参 16 | this.params = { 17 | host: "tts-api.xfyun.cn", 18 | path: "/v2/tts", 19 | appid: process.env.XUNFEI_APP_ID, 20 | apiKey: process.env.XUNFEI_API_KEY, 21 | secret: process.env.XUNFEI_SECRET, 22 | }; 23 | } 24 | // 生成websocket连接 25 | generateWsUrl() { 26 | const { host, path } = this.params; 27 | const params = createAuthParams(this.params); 28 | return `ws://${host}${path}?${params}`; 29 | } 30 | // 初始化 31 | init() { 32 | const reqUrl = this.generateWsUrl(); 33 | console.log(reqUrl); 34 | this.ws = new WebSocket(reqUrl); 35 | this.initWsEvent(); 36 | } 37 | // 初始化websocket事件 38 | initWsEvent() { 39 | this.ws.on("open", this.onOpen.bind(this)); 40 | this.ws.on("error", this.onError); 41 | this.ws.on("close", this.onClose); 42 | this.ws.on("message", this.onMessage.bind(this)); 43 | } 44 | /** 45 | * websocket open事件,触发表示已成功建立连接 46 | */ 47 | onOpen() { 48 | console.log("open"); 49 | this.onSend(); 50 | if (fs.existsSync(this.outputFile)) { 51 | fs.unlinkSync(this.outputFile); 52 | } 53 | } 54 | // 发送要转换的参数信息 55 | onSend() { 56 | const frame = { 57 | // 填充common 58 | common: { 59 | app_id: this.params.appid, 60 | }, 61 | // 填充business 62 | business: { 63 | aue: "raw", 64 | auf: "audio/L16;rate=16000", 65 | vcn: "xiaoyan", 66 | tte: "UTF8", 67 | }, 68 | // 填充data 69 | data: { 70 | text: Buffer.from(this.text).toString("base64"), 71 | status: 2, 72 | }, 73 | }; 74 | this.ws.send(JSON.stringify(frame)); 75 | } 76 | // 保存转换后的语音结果 77 | onSave(data) { 78 | fs.writeFileSync(this.outputFile, data, { flag: "a" }); 79 | } 80 | // websocket 消息接收 回调 81 | onMessage(data, err) { 82 | if (err) return; 83 | const res = JSON.parse(data); 84 | if (res.code !== 0) { 85 | this.ws.close(); 86 | return; 87 | } 88 | // 接收消息结果并进行保存 89 | const audio = res.data.audio; 90 | const audioBuf = Buffer.from(audio, "base64"); 91 | this.onSave(audioBuf); 92 | if (res.code == 0 && res.data.status == 2) { 93 | this.ws.close(); 94 | this.onDone(); 95 | } 96 | } 97 | onClose() { 98 | console.log("close"); 99 | } 100 | onError(error) { 101 | console.log(error); 102 | } 103 | } 104 | 105 | module.exports = XunFeiTTS; 106 | -------------------------------------------------------------------------------- /app/utils/auth.js: -------------------------------------------------------------------------------- 1 | const crypto = require("crypto"); 2 | const qs = require("qs"); 3 | 4 | // 加密,算法 hmac-sha256 5 | const encrypt = (secret, value) => { 6 | const hmac = crypto.createHmac("sha256", Buffer.from(secret)); 7 | hmac.update(value); 8 | return hmac.digest("base64"); 9 | }; 10 | // 生成 signature参数 11 | const createSignature = (host, path, dateString) => { 12 | return `host: ${host}\ndate: ${dateString}\nGET ${path} HTTP/1.1`; 13 | }; 14 | // 生成 authorization_origin参数 15 | const createAuthOrigin = (apiKey, signature) => { 16 | return `api_key="${apiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${signature}"`; 17 | }; 18 | // string -> base64 string 19 | const toBase64Str = (value) => Buffer.from(value).toString("base64"); 20 | 21 | // 生成鉴权参数 22 | function createAuthParams({ host, path, apiKey, secret }) { 23 | const dateString = new Date().toUTCString(); 24 | const signature = encrypt(secret, createSignature(host, path, dateString)); 25 | const authorization = toBase64Str(createAuthOrigin(apiKey, signature)); 26 | return qs.stringify({ 27 | host, 28 | date: dateString, 29 | authorization, 30 | }); 31 | } 32 | exports.createAuthParams = createAuthParams; 33 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "volute", 3 | "version": "1.0.0", 4 | "description": "speech robot", 5 | "main": "app.js", 6 | "scripts": { 7 | "dev": "nodemon ./app/app.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/webfansplz/volute.git" 12 | }, 13 | "author": "webfansplz", 14 | "license": "MIT", 15 | "bugs": { 16 | "url": "https://github.com/webfansplz/volute/issues" 17 | }, 18 | "homepage": "https://github.com/webfansplz/volute#readme", 19 | "devDependencies": { 20 | "nodemon": "^2.0.5" 21 | }, 22 | "dependencies": { 23 | "axios": "^0.20.0", 24 | "dotenv": "^8.2.0", 25 | "nan": "^2.14.2", 26 | "node-record-lpcm16": "^1.0.1", 27 | "qs": "^6.9.4", 28 | "snowboy": "^1.3.1-alpha.1", 29 | "speaker": "^0.5.2", 30 | "ws": "^7.3.1" 31 | } 32 | } 33 | --------------------------------------------------------------------------------