├── .gitignore
├── LICENSE
├── README.md
├── app
    ├── app.js
    ├── assets
    │   ├── input.wav
    │   └── output.wav
    ├── configs
    │   ├── common.res
    │   ├── kobe.pmdl
    │   ├── kobe.wav
    │   ├── mamba.pmdl
    │   └── volute.pmdl
    ├── services
    │   ├── snowboy.service.js
    │   ├── tulingbot.service.js
    │   ├── xunfeiiat.service.js
    │   └── xunfeitts.service.js
    └── utils
    │   └── auth.js
└── package.json


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | node_modules/
3 | images/
4 | .vscode/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 null仔
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## volute 是什么?
  2 | 
  3 | > volute(蜗壳)是一个使用 Raspberry Pi+Node.js 制作的语音助手.
  4 | 
  5 | ## 什么是树莓派?
  6 | 
  7 | ![raspberry-pi](https://s1.ax1x.com/2020/10/22/BiTO76.png)
  8 | 
  9 | ![raspberry-pi-4](https://s1.ax1x.com/2020/10/22/Bi7lBq.png)
 10 | 
 11 | 树莓派（英语：Raspberry Pi）是基于 Linux 的单片机电脑，由英国树莓派基金会开发，目的是以低价硬件及自由软件促进学校的基本计算机科学教育。
 12 | 
 13 | 树莓派每一代均使用博通（Broadcom）出产的 ARM 架构处理器，如今生产的机型内存在 2GB 和 8GB 之间，主要使用 SD 卡或者 TF 卡作为存储媒体，配备 USB 接口、HDMI 的视频输出（支持声音输出）和 RCA 端子输出，内置 Ethernet/WLAN/Bluetooth 网络链接的方式（依据型号决定），并且可使用多种操作系统。产品线型号分为 A 型、B 型、Zero 型和 ComputeModule 计算卡。
 14 | 
 15 | > 简单的说,这是一台可以放到口袋里的电脑!!
 16 | 
 17 | ## 什么是 Node.js?
 18 | 
 19 | ![node-js](https://s1.ax1x.com/2020/10/22/Bi7T58.jpg)
 20 | 
 21 | > Node.js 是一个能执行 Javascript 的环境,一个事件驱动 I/O 的 Javascript 环境,基于 Google 的 V8 引擎.
 22 | 
 23 | ## 什么是人机对话系统 ?
 24 | 
 25 | ![chatbot](https://s1.ax1x.com/2020/10/22/Biqiu9.png)
 26 | 
 27 | > 人机对话（Human-Machine Conversation）是指让机器理解和运用自然语言实现人机通信的技术。
 28 | 
 29 | 对话系统大致可分为 5 个基本模块：语音识别（ASR）、自然语音理解（NLU）、对话管理（DM）、自然语言生成（NLG）、语音合成（TTS）。
 30 | 
 31 | - 语音识别（ASR）:完成语音到文本的转换，将用户说话的声音转化为语音。
 32 | - 自然语言理解（NLU）:完成对文本的语义解析，提取关键信息，进行意图识别与实体识别。
 33 | - 对话管理（DM）:负责对话状态维护、数据库查询、上下文管理等。
 34 | - 自然语言生成（NLG）:生成相应的自然语言文本。
 35 | - 语音合成（TTS）:将生成的文本转换为语音。
 36 | 
 37 | ## 材料准备
 38 | 
 39 | - 树莓派 4B 主板
 40 | - 树莓派 5V3A TYPE C 接口
 41 | - 微型 USB 麦克风
 42 | - 迷你音箱
 43 | - 16G TF 卡
 44 | - 川宇读卡器
 45 | - 杜邦线,外壳,散热片...
 46 | 
 47 | ![material](https://s1.ax1x.com/2020/10/22/BiDGbn.jpg)
 48 | 
 49 | ## 树莓派系统安装及基础配置
 50 | 
 51 | 新的树莓派不像你买的 Macbook Pro 一样开机就能用 🐶,想要顺利体验树莓派,还得一步一步来~
 52 | 
 53 | ### 烧录操作系统
 54 | 
 55 | 树莓派没有硬盘结构,仅有一个 micro SD 卡插槽用于存储,因此要把操作系统装到 micro SD 卡中。
 56 | 
 57 | 树莓派支持许多操作系统,这里选择的是官方推荐的 Raspbian，这是一款基于 Debian Linux 的树莓派专用系统，适用于树莓派所有的型号。
 58 | 
 59 | 安装系统我用的是 Raspberry Pi Imager 工具为树莓派烧录系统镜像。
 60 | 
 61 | ![imager](https://s1.ax1x.com/2020/10/22/BF0X5V.png)
 62 | 
 63 | ### 基础配置
 64 | 
 65 | 要对树莓派进行配置,首先要启动系统(我们安装的是系统镜像,可免安装直接进入),然后将树莓派连接显示器即可看到系统桌面,我这里使用的是另一种方法:
 66 | 
 67 | - 使用 IP Scanner 工具 扫描出 Raspberry Pi 的 IP
 68 | 
 69 | ![ip-scanner](https://s1.ax1x.com/2020/10/22/BkEXT0.png)
 70 | 
 71 | - 扫描出 IP 后使用 VNC Viewer 工具 连接进系统
 72 | 
 73 | ![vnc-viewer](https://s1.ax1x.com/2020/10/22/BkEcyd.png)
 74 | 
 75 | - 也可以直接 ssh 连接,然后通过 raspi-config 命令进行配置
 76 | 
 77 | ![ssh-pi](https://s1.ax1x.com/2020/10/22/BkV6hT.png)
 78 | 
 79 | - 配置网络/分辨率/语言/输入输出音频等参数
 80 | 
 81 | ![asound](https://s1.ax1x.com/2020/10/22/BkeLmd.png)
 82 | 
 83 | ## volute 实现思路
 84 | 
 85 | ![volute](https://s1.ax1x.com/2020/10/22/BiDrr9.png)
 86 | 
 87 | ### 任务调度服务
 88 | 
 89 | ```js
 90 | const fs = require("fs");
 91 | const path = require("path");
 92 | const Speaker = require("speaker");
 93 | const { record } = require("node-record-lpcm16");
 94 | const XunFeiIAT = require("./services/xunfeiiat.service");
 95 | const XunFeiTTS = require("./services/xunfeitts.service");
 96 | const initSnowboy = require("./services/snowboy.service");
 97 | const TulingBotService = require("./services/tulingbot.service");
 98 | // 任务调度服务
 99 | const taskScheduling = {
100 |   // 麦克风
101 |   mic: null,
102 |   speaker: null,
103 |   detector: null,
104 |   // 音频输入流
105 |   inputStream: null,
106 |   // 音頻輸出流
107 |   outputStream: null,
108 |   init() {
109 |     // 初始化snowboy
110 |     this.detector = initSnowboy({
111 |       record: this.recordSound.bind(this),
112 |       stopRecord: this.stopRecord.bind(this),
113 |     });
114 |     // 管道流,将麦克风接收到的流传递给snowboy
115 |     this.mic.pipe(this.detector);
116 |   },
117 |   start() {
118 |     // 监听麦克风输入流
119 |     this.mic = record({
120 |       sampleRate: 16000, // 采样率
121 |       threshold: 0.5,
122 |       verbose: true,
123 |       recordProgram: "arecord",
124 |     }).stream();
125 |     this.init();
126 |   },
127 |   // 记录音频输入
128 |   recordSound() {
129 |     // 每次记录前,先停止上次未播放完成的输出流
130 |     this.stopSpeak();
131 |     console.log("start record");
132 |     // 创建可写流
133 |     this.inputStream = fs.createWriteStream(
134 |       path.resolve(__dirname, "./assets/input.wav"),
135 |       {
136 |         encoding: "binary",
137 |       }
138 |     );
139 |     // 管道流,将麦克风接受到的输入流 传递给 创建的可写流
140 |     this.mic.pipe(this.inputStream);
141 |   },
142 |   // 停止音频输入
143 |   stopRecord() {
144 |     if (this.inputStream) {
145 |       console.log("stop record");
146 |       // 解绑this.mac绑定的管道流
147 |       this.mic.unpipe(this.inputStream);
148 |       this.mic.unpipe(this.detector);
149 |       process.nextTick(() => {
150 |         // 销毁输入流
151 |         this.inputStream.destroy();
152 |         this.inputStream = null;
153 |         // 重新初始化
154 |         this.init();
155 |         // 调用语音听写服务
156 |         this.speech2Text();
157 |       });
158 |     }
159 |   },
160 |   // speech to text
161 |   speech2Text() {
162 |     // 实例化 语音听写服务
163 |     const iatService = new XunFeiIAT({
164 |       onReply: (msg) => {
165 |         console.log("msg", msg);
166 |         // 回调,调用聊天功能
167 |         this.onChat(msg);
168 |       },
169 |     });
170 |     iatService.init();
171 |   },
172 |   // 聊天->图灵机器人
173 |   onChat(text) {
174 |     // 实例化聊天机器人
175 |     TulingBotService.start(text).then((res) => {
176 |       console.log(res);
177 |       // 接收到聊天消息,调用语音合成服务
178 |       this.text2Speech(res);
179 |     });
180 |   },
181 |   // text to speech
182 |   text2Speech(text) {
183 |     // 实例化 语音合成服务
184 |     const ttsService = new XunFeiTTS({
185 |       text,
186 |       onDone: () => {
187 |         console.log("onDone");
188 |         this.onSpeak();
189 |       },
190 |     });
191 |     ttsService.init();
192 |   },
193 |   // 播放,音频输出
194 |   onSpeak() {
195 |     // 实例化speaker,用于播放语音
196 |     this.speaker = new Speaker({
197 |       channels: 1,
198 |       bitDepth: 16,
199 |       sampleRate: 16000,
200 |     });
201 |     // 创建可读流
202 |     this.outputStream = fs.createReadStream(
203 |       path.resolve(__dirname, "./assets/output.pcm")
204 |     );
205 |     // this is just to activate the speaker, 2s delay
206 |     this.speaker.write(Buffer.alloc(32000, 10));
207 |     // 管道流,将输出流传递给speaker进行播放
208 |     this.outputStream.pipe(this.speaker);
209 |     this.outputStream.on("end", () => {
210 |       this.outputStream = null;
211 |       this.speaker = null;
212 |     });
213 |   },
214 |   // 停止播放
215 |   stopSpeak() {
216 |     this.outputStream && this.outputStream.unpipe(this.speaker);
217 |   },
218 | };
219 | taskScheduling.start();
220 | ```
221 | 
222 | ### 热词唤醒 Snowboy
223 | 
224 | 语音助手需要像市面上的设备一样，需要唤醒。 如果没有唤醒步骤，一直做监听的话，对存储资源和网络连接的需求是非常大的。
225 | 
226 | Snowboy 是一款高度可定制的唤醒词检测引擎(Hotwords Detection Library)，可以用于实时嵌入式系统，通过训练热词之后，可以离线运行，并且 功耗很低。当前，它可以运行在 Raspberry Pi、（Ubuntu）Linux 和 Mac OS X 系统上。
227 | 
228 | ![snowboy](https://s1.ax1x.com/2020/10/22/BirEzF.jpg)
229 | 
230 | ```js
231 | const path = require("path");
232 | const snowboy = require("snowboy");
233 | const models = new snowboy.Models();
234 | 
235 | // 添加训练模型
236 | models.add({
237 |   file: path.resolve(__dirname, "../configs/volute.pmdl"),
238 |   sensitivity: "0.5",
239 |   hotwords: "volute",
240 | });
241 | 
242 | // 初始化 Detector 对象
243 | const detector = new snowboy.Detector({
244 |   resource: path.resolve(__dirname, "../configs/common.res"),
245 |   models: models,
246 |   audioGain: 1.0,
247 |   applyFrontend: false,
248 | });
249 | 
250 | /**
251 |  * 初始化 initSnowboy
252 |  * 实现思路:
253 |  * 1. 监听到热词,进行唤醒,开始录音
254 |  * 2. 录音期间,有声音时,重置silenceCount参数
255 |  * 3. 录音期间,未接受到声音时,对silenceCount进行累加,当累加值大于3时,停止录音
256 |  */
257 | function initSnowboy({ record, stopRecord }) {
258 |   const MAX_SILENCE_COUNT = 3;
259 |   let silenceCount = 0,
260 |     speaking = false;
261 |   /**
262 |    * silence事件回调,没声音时触发
263 |    */
264 |   const onSilence = () => {
265 |     console.log("silence");
266 |     if (speaking && ++silenceCount > MAX_SILENCE_COUNT) {
267 |       speaking = false;
268 |       stopRecord && stopRecord();
269 |       detector.off("silence", onSilence);
270 |       detector.off("sound", onSound);
271 |       detector.off("hotword", onHotword);
272 |     }
273 |   };
274 |   /**
275 |    * sound事件回调,有声音时触发
276 |    */
277 |   const onSound = () => {
278 |     console.log("sound");
279 |     if (speaking) {
280 |       silenceCount = 0;
281 |     }
282 |   };
283 |   /**
284 |    * hotword事件回调,监听到热词时触发
285 |    */
286 |   const onHotword = (index, hotword, buffer) => {
287 |     if (!speaking) {
288 |       silenceCount = 0;
289 |       speaking = true;
290 |       record && record();
291 |     }
292 |   };
293 |   detector.on("silence", onSilence);
294 |   detector.on("sound", onSound);
295 |   detector.on("hotword", onHotword);
296 |   return detector;
297 | }
298 | 
299 | module.exports = initSnowboy;
300 | ```
301 | 
302 | ### 语音听写 科大讯飞 API
303 | 
304 | 语音转文字使用的是讯飞开放平台的语音听写服务.它可以将短音频（≤60 秒）精准识别成文字，除中文普通话和英文外，支持 25 种方言和 12 个语种，实时返回结果，达到边说边返回的效果。
305 | 
306 | ```js
307 | require("dotenv").config();
308 | const fs = require("fs");
309 | const WebSocket = require("ws");
310 | const { resolve } = require("path");
311 | const { createAuthParams } = require("../utils/auth");
312 | 
313 | class XunFeiIAT {
314 |   constructor({ onReply }) {
315 |     super();
316 |     // websocket 连接
317 |     this.ws = null;
318 |     // 返回结果,解析后的消息文字
319 |     this.message = "";
320 |     this.onReply = onReply;
321 |     // 需要进行转换的输入流 语音文件
322 |     this.inputFile = resolve(__dirname, "../assets/input.wav");
323 |     // 接口 入参
324 |     this.params = {
325 |       host: "iat-api.xfyun.cn",
326 |       path: "/v2/iat",
327 |       apiKey: process.env.XUNFEI_API_KEY,
328 |       secret: process.env.XUNFEI_SECRET,
329 |     };
330 |   }
331 |   // 生成websocket连接
332 |   generateWsUrl() {
333 |     const { host, path } = this.params;
334 |     // 接口鉴权,参数加密
335 |     const params = createAuthParams(this.params);
336 |     return `ws://${host}${path}?${params}`;
337 |   }
338 |   // 初始化
339 |   init() {
340 |     const reqUrl = this.generateWsUrl();
341 |     this.ws = new WebSocket(reqUrl);
342 |     this.initWsEvent();
343 |   }
344 |   // 初始化websocket事件
345 |   initWsEvent() {
346 |     this.ws.on("open", this.onOpen.bind(this));
347 |     this.ws.on("error", this.onError);
348 |     this.ws.on("close", this.onClose);
349 |     this.ws.on("message", this.onMessage.bind(this));
350 |   }
351 |   /**
352 |    *  websocket open事件,触发表示已成功建立连接
353 |    */
354 |   onOpen() {
355 |     console.log("open");
356 |     this.onPush(this.inputFile);
357 |   }
358 |   onPush(file) {
359 |     this.pushAudioFile(file);
360 |   }
361 |   // websocket 消息接收 回调
362 |   onMessage(data) {
363 |     const payload = JSON.parse(data);
364 |     if (payload.data && payload.data.result) {
365 |       // 拼接消息结果
366 |       this.message += payload.data.result.ws.reduce(
367 |         (acc, item) => acc + item.cw.map((cw) => cw.w),
368 |         ""
369 |       );
370 |       // status 2表示结束
371 |       if (payload.data.status === 2) {
372 |         this.onReply(this.message);
373 |       }
374 |     }
375 |   }
376 |   // websocket 关闭事件
377 |   onClose() {
378 |     console.log("close");
379 |   }
380 |   // websocket 错误事件
381 |   onError(error) {
382 |     console.log(error);
383 |   }
384 |   /**
385 |    * 解析语音文件,将语音以二进制流的形式传送给后端
386 |    */
387 |   pushAudioFile(audioFile) {
388 |     this.message = "";
389 |     // 发送需要的载体参数
390 |     const audioPayload = (statusCode, audioBase64) => ({
391 |       common:
392 |         statusCode === 0
393 |           ? {
394 |               app_id: "5f6cab72",
395 |             }
396 |           : undefined,
397 |       business:
398 |         statusCode === 0
399 |           ? {
400 |               language: "zh_cn",
401 |               domain: "iat",
402 |               ptt: 0,
403 |             }
404 |           : undefined,
405 |       data: {
406 |         status: statusCode,
407 |         format: "audio/L16;rate=16000",
408 |         encoding: "raw",
409 |         audio: audioBase64,
410 |       },
411 |     });
412 |     const chunkSize = 9000;
413 |     // 创建buffer,用于存储二进制数据
414 |     const buffer = Buffer.alloc(chunkSize);
415 |     // 打开语音文件
416 |     fs.open(audioFile, "r", (err, fd) => {
417 |       if (err) {
418 |         throw err;
419 |       }
420 | 
421 |       let i = 0;
422 |       // 以二进制流的形式递归发送
423 |       function readNextChunk() {
424 |         fs.read(fd, buffer, 0, chunkSize, null, (errr, nread) => {
425 |           if (errr) {
426 |             throw errr;
427 |           }
428 |           // nread表示文件流已读完,发送传输结束标识(status=2)
429 |           if (nread === 0) {
430 |             this.ws.send(
431 |               JSON.stringify({
432 |                 data: { status: 2 },
433 |               })
434 |             );
435 | 
436 |             return fs.close(fd, (err) => {
437 |               if (err) {
438 |                 throw err;
439 |               }
440 |             });
441 |           }
442 | 
443 |           let data;
444 |           if (nread < chunkSize) {
445 |             data = buffer.slice(0, nread);
446 |           } else {
447 |             data = buffer;
448 |           }
449 | 
450 |           const audioBase64 = data.toString("base64");
451 |           const payload = audioPayload(i >= 1 ? 1 : 0, audioBase64);
452 |           this.ws.send(JSON.stringify(payload));
453 |           i++;
454 |           readNextChunk.call(this);
455 |         });
456 |       }
457 | 
458 |       readNextChunk.call(this);
459 |     });
460 |   }
461 | }
462 | 
463 | module.exports = XunFeiIAT;
464 | ```
465 | 
466 | ### 聊天机器人 图灵机器人 API
467 | 
468 | 图灵机器人 API V2.0 是基于图灵机器人平台语义理解、深度学习等核心技术，为广大开发者和企业提供的在线服务和开发接口。
469 | 
470 | 目前 API 接口可调用聊天对话、语料库、技能三大模块的语料：
471 | 
472 | 聊天对话是指平台免费提供的近 10 亿条公有对话语料，满足用户对话娱乐需求；
473 | 
474 | 语料库是指用户在平台上传的私有语料，仅供个人查看使用，帮助用户最便捷的搭建专业领域次的语料。
475 | 
476 | 技能服务是指平台打包的 26 种实用服务技能。涵盖生活、出行、购物等多个领域，一站式满足用户需求。
477 | 
478 | ```js
479 | require("dotenv").config();
480 | const axios = require("axios");
481 | 
482 | // 太简单了..懒得解释 🐶
483 | 
484 | const TulingBotService = {
485 |   requestUrl: "http://openapi.tuling123.com/openapi/api/v2",
486 |   start(text) {
487 |     return new Promise((resolve) => {
488 |       axios
489 |         .post(this.requestUrl, {
490 |           reqType: 0,
491 |           perception: {
492 |             inputText: {
493 |               text,
494 |             },
495 |           },
496 |           userInfo: {
497 |             apiKey: process.env.TULING_BOT_API_KEY,
498 |             userId: process.env.TULING_BOT_USER_ID,
499 |           },
500 |         })
501 |         .then((res) => {
502 |           // console.log(JSON.stringify(res.data, null, 2));
503 |           resolve(res.data.results[0].values.text);
504 |         });
505 |     });
506 |   },
507 | };
508 | 
509 | module.exports = TulingBotService;
510 | ```
511 | 
512 | ### 语音合成 科大讯飞 API
513 | 
514 | 语音合成流式接口将文字信息转化为声音信息，同时提供了众多极具特色的发音人（音库）供您选择。
515 | 
516 | 该语音能力是通过 Websocket API 的方式给开发者提供一个通用的接口。Websocket API 具备流式传输能力，适用于需要流式数据传输的 AI 服务场景。相较于 SDK，API 具有轻量、跨语言的特点；相较于 HTTP API，Websocket API 协议有原生支持跨域的优势。
517 | 
518 | ```js
519 | require("dotenv").config();
520 | const fs = require("fs");
521 | const WebSocket = require("ws");
522 | const { resolve } = require("path");
523 | const { createAuthParams } = require("../utils/auth");
524 | 
525 | class XunFeiTTS {
526 |   constructor({ text, onDone }) {
527 |     super();
528 |     this.ws = null;
529 |     // 要转换的文字
530 |     this.text = text;
531 |     this.onDone = onDone;
532 |     // 转换后的语音文件
533 |     this.outputFile = resolve(__dirname, "../assets/output.pcm");
534 |     // 接口入参
535 |     this.params = {
536 |       host: "tts-api.xfyun.cn",
537 |       path: "/v2/tts",
538 |       appid: process.env.XUNFEI_APP_ID,
539 |       apiKey: process.env.XUNFEI_API_KEY,
540 |       secret: process.env.XUNFEI_SECRET,
541 |     };
542 |   }
543 |   // 生成websocket连接
544 |   generateWsUrl() {
545 |     const { host, path } = this.params;
546 |     const params = createAuthParams(this.params);
547 |     return `ws://${host}${path}?${params}`;
548 |   }
549 |   // 初始化
550 |   init() {
551 |     const reqUrl = this.generateWsUrl();
552 |     console.log(reqUrl);
553 |     this.ws = new WebSocket(reqUrl);
554 |     this.initWsEvent();
555 |   }
556 |   // 初始化websocket事件
557 |   initWsEvent() {
558 |     this.ws.on("open", this.onOpen.bind(this));
559 |     this.ws.on("error", this.onError);
560 |     this.ws.on("close", this.onClose);
561 |     this.ws.on("message", this.onMessage.bind(this));
562 |   }
563 |   /**
564 |    *  websocket open事件,触发表示已成功建立连接
565 |    */
566 |   onOpen() {
567 |     console.log("open");
568 |     this.onSend();
569 |     if (fs.existsSync(this.outputFile)) {
570 |       fs.unlinkSync(this.outputFile);
571 |     }
572 |   }
573 |   // 发送要转换的参数信息
574 |   onSend() {
575 |     const frame = {
576 |       // 填充common
577 |       common: {
578 |         app_id: this.params.appid,
579 |       },
580 |       // 填充business
581 |       business: {
582 |         aue: "raw",
583 |         auf: "audio/L16;rate=16000",
584 |         vcn: "xiaoyan",
585 |         tte: "UTF8",
586 |       },
587 |       // 填充data
588 |       data: {
589 |         text: Buffer.from(this.text).toString("base64"),
590 |         status: 2,
591 |       },
592 |     };
593 |     this.ws.send(JSON.stringify(frame));
594 |   }
595 |   // 保存转换后的语音结果
596 |   onSave(data) {
597 |     fs.writeFileSync(this.outputFile, data, { flag: "a" });
598 |   }
599 |   // websocket 消息接收 回调
600 |   onMessage(data, err) {
601 |     if (err) return;
602 |     const res = JSON.parse(data);
603 |     if (res.code !== 0) {
604 |       this.ws.close();
605 |       return;
606 |     }
607 |     // 接收消息结果并进行保存
608 |     const audio = res.data.audio;
609 |     const audioBuf = Buffer.from(audio, "base64");
610 |     this.onSave(audioBuf);
611 |     if (res.code == 0 && res.data.status == 2) {
612 |       this.ws.close();
613 |       this.onDone();
614 |     }
615 |   }
616 |   onClose() {
617 |     console.log("close");
618 |   }
619 |   onError(error) {
620 |     console.log(error);
621 |   }
622 | }
623 | 
624 | module.exports = XunFeiTTS;
625 | ```
626 | 
627 | ## 效果演示
628 | 
629 | [语雀-文章最底部可看效果](https://www.yuque.com/docs/share/df7fbb6d-d1ae-45cf-a7db-a37a38bd1e23?#%20%E3%80%8Avolute%E3%80%8B)
630 | 
631 | ## 源码地址
632 | 
633 | [Github 源码地址](https://github.com/webfansplz/volute)
634 | 


--------------------------------------------------------------------------------
/app/app.js:
--------------------------------------------------------------------------------
  1 | const fs = require("fs");
  2 | const path = require("path");
  3 | const Speaker = require("speaker");
  4 | const { record } = require("node-record-lpcm16");
  5 | const XunFeiIAT = require("./services/xunfeiiat.service");
  6 | const XunFeiTTS = require("./services/xunfeitts.service");
  7 | const initSnowboy = require("./services/snowboy.service");
  8 | const TulingBotService = require("./services/tulingbot.service");
  9 | // 任务调度服务
 10 | const taskScheduling = {
 11 |   // 麦克风
 12 |   mic: null,
 13 |   speaker: null,
 14 |   detector: null,
 15 |   // 音频输入流
 16 |   inputStream: null,
 17 |   // 音頻輸出流
 18 |   outputStream: null,
 19 |   init() {
 20 |     // 初始化snowboy
 21 |     this.detector = initSnowboy({
 22 |       record: this.recordSound.bind(this),
 23 |       stopRecord: this.stopRecord.bind(this),
 24 |     });
 25 |     // 管道流,将麦克风接收到的流传递给snowboy
 26 |     this.mic.pipe(this.detector);
 27 |   },
 28 |   start() {
 29 |     // 监听麦克风输入流
 30 |     this.mic = record({
 31 |       sampleRate: 16000, // 采样率
 32 |       threshold: 0.5,
 33 |       verbose: true,
 34 |       recordProgram: "arecord",
 35 |     }).stream();
 36 |     this.init();
 37 |   },
 38 |   // 记录音频输入
 39 |   recordSound() {
 40 |     // 每次记录前,先停止上次未播放完成的输出流
 41 |     this.stopSpeak();
 42 |     console.log("start record");
 43 |     // 创建可写流
 44 |     this.inputStream = fs.createWriteStream(
 45 |       path.resolve(__dirname, "./assets/input.wav"),
 46 |       {
 47 |         encoding: "binary",
 48 |       }
 49 |     );
 50 |     // 管道流,将麦克风接受到的输入流 传递给 创建的可写流
 51 |     this.mic.pipe(this.inputStream);
 52 |   },
 53 |   // 停止音频输入
 54 |   stopRecord() {
 55 |     if (this.inputStream) {
 56 |       console.log("stop record");
 57 |       // 解绑this.mac绑定的管道流
 58 |       this.mic.unpipe(this.inputStream);
 59 |       this.mic.unpipe(this.detector);
 60 |       process.nextTick(() => {
 61 |         // 销毁输入流
 62 |         this.inputStream.destroy();
 63 |         this.inputStream = null;
 64 |         // 重新初始化
 65 |         this.init();
 66 |         // 调用语音听写服务
 67 |         this.speech2Text();
 68 |       });
 69 |     }
 70 |   },
 71 |   // speech to text
 72 |   speech2Text() {
 73 |     // 实例化 语音听写服务
 74 |     const iatService = new XunFeiIAT({
 75 |       onReply: (msg) => {
 76 |         console.log("msg", msg);
 77 |         // 回调,调用聊天功能
 78 |         this.onChat(msg);
 79 |       },
 80 |     });
 81 |     iatService.init();
 82 |   },
 83 |   // 聊天->图灵机器人
 84 |   onChat(text) {
 85 |     // 实例化聊天机器人
 86 |     TulingBotService.start(text).then((res) => {
 87 |       console.log(res);
 88 |       // 接收到聊天消息,调用语音合成服务
 89 |       this.text2Speech(res);
 90 |     });
 91 |   },
 92 |   // text to speech
 93 |   text2Speech(text) {
 94 |     // 实例化 语音合成服务
 95 |     const ttsService = new XunFeiTTS({
 96 |       text,
 97 |       onDone: () => {
 98 |         console.log("onDone");
 99 |         this.onSpeak();
100 |       },
101 |     });
102 |     ttsService.init();
103 |   },
104 |   // 播放,音频输出
105 |   onSpeak() {
106 |     // 实例化speaker,用于播放语音
107 |     this.speaker = new Speaker({
108 |       channels: 1,
109 |       bitDepth: 16,
110 |       sampleRate: 16000,
111 |     });
112 |     // 创建可读流
113 |     this.outputStream = fs.createReadStream(
114 |       path.resolve(__dirname, "./assets/output.pcm")
115 |     );
116 |     // this is just to activate the speaker, 2s delay
117 |     this.speaker.write(Buffer.alloc(32000, 10));
118 |     // 管道流,将输出流传递给speaker进行播放
119 |     this.outputStream.pipe(this.speaker);
120 |     this.outputStream.on("end", () => {
121 |       this.outputStream = null;
122 |       this.speaker = null;
123 |     });
124 |   },
125 |   // 停止播放
126 |   stopSpeak() {
127 |     this.outputStream && this.outputStream.unpipe(this.speaker);
128 |   },
129 | };
130 | taskScheduling.start();
131 | 


--------------------------------------------------------------------------------
/app/assets/input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/assets/input.wav


--------------------------------------------------------------------------------
/app/assets/output.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/assets/output.wav


--------------------------------------------------------------------------------
/app/configs/common.res:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/common.res


--------------------------------------------------------------------------------
/app/configs/kobe.pmdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/kobe.pmdl


--------------------------------------------------------------------------------
/app/configs/kobe.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/kobe.wav


--------------------------------------------------------------------------------
/app/configs/mamba.pmdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/mamba.pmdl


--------------------------------------------------------------------------------
/app/configs/volute.pmdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webfansplz/volute/dbca1ead68ca4bd7552dd5940c5a381d0374c513/app/configs/volute.pmdl


--------------------------------------------------------------------------------
/app/services/snowboy.service.js:
--------------------------------------------------------------------------------
 1 | const path = require("path");
 2 | const snowboy = require("snowboy");
 3 | const models = new snowboy.Models();
 4 | 
 5 | // 添加训练模型
 6 | models.add({
 7 |   file: path.resolve(__dirname, "../configs/volute.pmdl"),
 8 |   sensitivity: "0.5",
 9 |   hotwords: "volute",
10 | });
11 | 
12 | // 初始化 Detector 对象
13 | const detector = new snowboy.Detector({
14 |   resource: path.resolve(__dirname, "../configs/common.res"),
15 |   models: models,
16 |   audioGain: 1.0,
17 |   applyFrontend: false,
18 | });
19 | 
20 | /**
21 |  * 初始化 initSnowboy
22 |  * 实现思路:
23 |  * 1. 监听到热词,进行唤醒,开始录音
24 |  * 2. 录音期间,有声音时,重置silenceCount参数
25 |  * 3. 录音期间,未接受到声音时,对silenceCount进行累加,当累加值大于3时,停止录音
26 |  */
27 | function initSnowboy({ record, stopRecord }) {
28 |   const MAX_SILENCE_COUNT = 3;
29 |   let silenceCount = 0,
30 |     speaking = false;
31 |   /**
32 |    * silence事件回调,没声音时触发
33 |    */
34 |   const onSilence = () => {
35 |     console.log("silence");
36 |     if (speaking && ++silenceCount > MAX_SILENCE_COUNT) {
37 |       speaking = false;
38 |       stopRecord && stopRecord();
39 |       detector.off("silence", onSilence);
40 |       detector.off("sound", onSound);
41 |       detector.off("hotword", onHotword);
42 |     }
43 |   };
44 |   /**
45 |    * sound事件回调,有声音时触发
46 |    */
47 |   const onSound = () => {
48 |     console.log("sound");
49 |     if (speaking) {
50 |       silenceCount = 0;
51 |     }
52 |   };
53 |   /**
54 |    * hotword事件回调,监听到热词时触发
55 |    */
56 |   const onHotword = (index, hotword, buffer) => {
57 |     if (!speaking) {
58 |       silenceCount = 0;
59 |       speaking = true;
60 |       record && record();
61 |     }
62 |   };
63 |   detector.on("silence", onSilence);
64 |   detector.on("sound", onSound);
65 |   detector.on("hotword", onHotword);
66 |   return detector;
67 | }
68 | 
69 | module.exports = initSnowboy;
70 | 


--------------------------------------------------------------------------------
/app/services/tulingbot.service.js:
--------------------------------------------------------------------------------
 1 | require("dotenv").config();
 2 | const axios = require("axios");
 3 | 
 4 | // 太简单了..懒得解释 🐶
 5 | 
 6 | const TulingBotService = {
 7 |   requestUrl: "http://openapi.tuling123.com/openapi/api/v2",
 8 |   start(text) {
 9 |     return new Promise((resolve) => {
10 |       axios
11 |         .post(this.requestUrl, {
12 |           reqType: 0,
13 |           perception: {
14 |             inputText: {
15 |               text,
16 |             },
17 |           },
18 |           userInfo: {
19 |             apiKey: process.env.TULING_BOT_API_KEY,
20 |             userId: process.env.TULING_BOT_USER_ID,
21 |           },
22 |         })
23 |         .then((res) => {
24 |           // console.log(JSON.stringify(res.data, null, 2));
25 |           resolve(res.data.results[0].values.text);
26 |         });
27 |     });
28 |   },
29 | };
30 | 
31 | module.exports = TulingBotService;
32 | 


--------------------------------------------------------------------------------
/app/services/xunfeiiat.service.js:
--------------------------------------------------------------------------------
  1 | require("dotenv").config();
  2 | const fs = require("fs");
  3 | const WebSocket = require("ws");
  4 | const { resolve } = require("path");
  5 | const { createAuthParams } = require("../utils/auth");
  6 | 
  7 | class XunFeiIAT {
  8 |   constructor({ onReply }) {
  9 |     // websocket 连接
 10 |     this.ws = null;
 11 |     // 返回结果,解析后的消息文字
 12 |     this.message = "";
 13 |     this.onReply = onReply;
 14 |     // 需要进行转换的输入流 语音文件
 15 |     this.inputFile = resolve(__dirname, "../assets/input.wav");
 16 |     // 接口 入参
 17 |     this.params = {
 18 |       host: "iat-api.xfyun.cn",
 19 |       path: "/v2/iat",
 20 |       apiKey: process.env.XUNFEI_API_KEY,
 21 |       secret: process.env.XUNFEI_SECRET,
 22 |     };
 23 |   }
 24 |   // 生成websocket连接
 25 |   generateWsUrl() {
 26 |     const { host, path } = this.params;
 27 |     // 接口鉴权,参数加密
 28 |     const params = createAuthParams(this.params);
 29 |     return `ws://${host}${path}?${params}`;
 30 |   }
 31 |   // 初始化
 32 |   init() {
 33 |     const reqUrl = this.generateWsUrl();
 34 |     this.ws = new WebSocket(reqUrl);
 35 |     this.initWsEvent();
 36 |   }
 37 |   // 初始化websocket事件
 38 |   initWsEvent() {
 39 |     this.ws.on("open", this.onOpen.bind(this));
 40 |     this.ws.on("error", this.onError);
 41 |     this.ws.on("close", this.onClose);
 42 |     this.ws.on("message", this.onMessage.bind(this));
 43 |   }
 44 |   /**
 45 |    *  websocket open事件,触发表示已成功建立连接
 46 |    */
 47 |   onOpen() {
 48 |     console.log("open");
 49 |     this.onPush(this.inputFile);
 50 |   }
 51 |   onPush(file) {
 52 |     this.pushAudioFile(file);
 53 |   }
 54 |   // websocket 消息接收 回调
 55 |   onMessage(data) {
 56 |     const payload = JSON.parse(data);
 57 |     if (payload.data && payload.data.result) {
 58 |       // 拼接消息结果
 59 |       this.message += payload.data.result.ws.reduce(
 60 |         (acc, item) => acc + item.cw.map((cw) => cw.w),
 61 |         ""
 62 |       );
 63 |       // status 2表示结束
 64 |       if (payload.data.status === 2) {
 65 |         this.onReply(this.message);
 66 |       }
 67 |     }
 68 |   }
 69 |   // websocket 关闭事件
 70 |   onClose() {
 71 |     console.log("close");
 72 |   }
 73 |   // websocket 错误事件
 74 |   onError(error) {
 75 |     console.log(error);
 76 |   }
 77 |   /**
 78 |    * 解析语音文件,将语音以二进制流的形式传送给后端
 79 |    */
 80 |   pushAudioFile(audioFile) {
 81 |     this.message = "";
 82 |     // 发送需要的载体参数
 83 |     const audioPayload = (statusCode, audioBase64) => ({
 84 |       common:
 85 |         statusCode === 0
 86 |           ? {
 87 |               app_id: "5f6cab72",
 88 |             }
 89 |           : undefined,
 90 |       business:
 91 |         statusCode === 0
 92 |           ? {
 93 |               language: "zh_cn",
 94 |               domain: "iat",
 95 |               ptt: 0,
 96 |             }
 97 |           : undefined,
 98 |       data: {
 99 |         status: statusCode,
100 |         format: "audio/L16;rate=16000",
101 |         encoding: "raw",
102 |         audio: audioBase64,
103 |       },
104 |     });
105 |     const chunkSize = 9000;
106 |     // 创建buffer,用于存储二进制数据
107 |     const buffer = Buffer.alloc(chunkSize);
108 |     // 打开语音文件
109 |     fs.open(audioFile, "r", (err, fd) => {
110 |       if (err) {
111 |         throw err;
112 |       }
113 | 
114 |       let i = 0;
115 |       // 以二进制流的形式递归发送
116 |       function readNextChunk() {
117 |         fs.read(fd, buffer, 0, chunkSize, null, (errr, nread) => {
118 |           if (errr) {
119 |             throw errr;
120 |           }
121 |           // nread表示文件流已读完,发送传输结束标识(status=2)
122 |           if (nread === 0) {
123 |             this.ws.send(
124 |               JSON.stringify({
125 |                 data: { status: 2 },
126 |               })
127 |             );
128 | 
129 |             return fs.close(fd, (err) => {
130 |               if (err) {
131 |                 throw err;
132 |               }
133 |             });
134 |           }
135 | 
136 |           let data;
137 |           if (nread < chunkSize) {
138 |             data = buffer.slice(0, nread);
139 |           } else {
140 |             data = buffer;
141 |           }
142 | 
143 |           const audioBase64 = data.toString("base64");
144 |           const payload = audioPayload(i >= 1 ? 1 : 0, audioBase64);
145 |           this.ws.send(JSON.stringify(payload));
146 |           i++;
147 |           readNextChunk.call(this);
148 |         });
149 |       }
150 | 
151 |       readNextChunk.call(this);
152 |     });
153 |   }
154 | }
155 | 
156 | module.exports = XunFeiIAT;
157 | 


--------------------------------------------------------------------------------
/app/services/xunfeitts.service.js:
--------------------------------------------------------------------------------
  1 | require("dotenv").config();
  2 | const fs = require("fs");
  3 | const WebSocket = require("ws");
  4 | const { resolve } = require("path");
  5 | const { createAuthParams } = require("../utils/auth");
  6 | 
  7 | class XunFeiTTS {
  8 |   constructor({ text, onDone }) {
  9 |     this.ws = null;
 10 |     // 要转换的文字
 11 |     this.text = text;
 12 |     this.onDone = onDone;
 13 |     // 转换后的语音文件
 14 |     this.outputFile = resolve(__dirname, "../assets/output.pcm");
 15 |     // 接口入参
 16 |     this.params = {
 17 |       host: "tts-api.xfyun.cn",
 18 |       path: "/v2/tts",
 19 |       appid: process.env.XUNFEI_APP_ID,
 20 |       apiKey: process.env.XUNFEI_API_KEY,
 21 |       secret: process.env.XUNFEI_SECRET,
 22 |     };
 23 |   }
 24 |   // 生成websocket连接
 25 |   generateWsUrl() {
 26 |     const { host, path } = this.params;
 27 |     const params = createAuthParams(this.params);
 28 |     return `ws://${host}${path}?${params}`;
 29 |   }
 30 |   // 初始化
 31 |   init() {
 32 |     const reqUrl = this.generateWsUrl();
 33 |     console.log(reqUrl);
 34 |     this.ws = new WebSocket(reqUrl);
 35 |     this.initWsEvent();
 36 |   }
 37 |   // 初始化websocket事件
 38 |   initWsEvent() {
 39 |     this.ws.on("open", this.onOpen.bind(this));
 40 |     this.ws.on("error", this.onError);
 41 |     this.ws.on("close", this.onClose);
 42 |     this.ws.on("message", this.onMessage.bind(this));
 43 |   }
 44 |   /**
 45 |    *  websocket open事件,触发表示已成功建立连接
 46 |    */
 47 |   onOpen() {
 48 |     console.log("open");
 49 |     this.onSend();
 50 |     if (fs.existsSync(this.outputFile)) {
 51 |       fs.unlinkSync(this.outputFile);
 52 |     }
 53 |   }
 54 |   // 发送要转换的参数信息
 55 |   onSend() {
 56 |     const frame = {
 57 |       // 填充common
 58 |       common: {
 59 |         app_id: this.params.appid,
 60 |       },
 61 |       // 填充business
 62 |       business: {
 63 |         aue: "raw",
 64 |         auf: "audio/L16;rate=16000",
 65 |         vcn: "xiaoyan",
 66 |         tte: "UTF8",
 67 |       },
 68 |       // 填充data
 69 |       data: {
 70 |         text: Buffer.from(this.text).toString("base64"),
 71 |         status: 2,
 72 |       },
 73 |     };
 74 |     this.ws.send(JSON.stringify(frame));
 75 |   }
 76 |   // 保存转换后的语音结果
 77 |   onSave(data) {
 78 |     fs.writeFileSync(this.outputFile, data, { flag: "a" });
 79 |   }
 80 |   // websocket 消息接收 回调
 81 |   onMessage(data, err) {
 82 |     if (err) return;
 83 |     const res = JSON.parse(data);
 84 |     if (res.code !== 0) {
 85 |       this.ws.close();
 86 |       return;
 87 |     }
 88 |     // 接收消息结果并进行保存
 89 |     const audio = res.data.audio;
 90 |     const audioBuf = Buffer.from(audio, "base64");
 91 |     this.onSave(audioBuf);
 92 |     if (res.code == 0 && res.data.status == 2) {
 93 |       this.ws.close();
 94 |       this.onDone();
 95 |     }
 96 |   }
 97 |   onClose() {
 98 |     console.log("close");
 99 |   }
100 |   onError(error) {
101 |     console.log(error);
102 |   }
103 | }
104 | 
105 | module.exports = XunFeiTTS;
106 | 


--------------------------------------------------------------------------------
/app/utils/auth.js:
--------------------------------------------------------------------------------
 1 | const crypto = require("crypto");
 2 | const qs = require("qs");
 3 | 
 4 | // 加密,算法 hmac-sha256
 5 | const encrypt = (secret, value) => {
 6 |   const hmac = crypto.createHmac("sha256", Buffer.from(secret));
 7 |   hmac.update(value);
 8 |   return hmac.digest("base64");
 9 | };
10 | // 生成 signature参数
11 | const createSignature = (host, path, dateString) => {
12 |   return `host: ${host}\ndate: ${dateString}\nGET ${path} HTTP/1.1`;
13 | };
14 | // 生成 authorization_origin参数
15 | const createAuthOrigin = (apiKey, signature) => {
16 |   return `api_key="${apiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${signature}"`;
17 | };
18 | // string -> base64 string
19 | const toBase64Str = (value) => Buffer.from(value).toString("base64");
20 | 
21 | // 生成鉴权参数
22 | function createAuthParams({ host, path, apiKey, secret }) {
23 |   const dateString = new Date().toUTCString();
24 |   const signature = encrypt(secret, createSignature(host, path, dateString));
25 |   const authorization = toBase64Str(createAuthOrigin(apiKey, signature));
26 |   return qs.stringify({
27 |     host,
28 |     date: dateString,
29 |     authorization,
30 |   });
31 | }
32 | exports.createAuthParams = createAuthParams;
33 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "volute",
 3 |   "version": "1.0.0",
 4 |   "description": "speech robot",
 5 |   "main": "app.js",
 6 |   "scripts": {
 7 |     "dev": "nodemon ./app/app.js"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/webfansplz/volute.git"
12 |   },
13 |   "author": "webfansplz",
14 |   "license": "MIT",
15 |   "bugs": {
16 |     "url": "https://github.com/webfansplz/volute/issues"
17 |   },
18 |   "homepage": "https://github.com/webfansplz/volute#readme",
19 |   "devDependencies": {
20 |     "nodemon": "^2.0.5"
21 |   },
22 |   "dependencies": {
23 |     "axios": "^0.20.0",
24 |     "dotenv": "^8.2.0",
25 |     "nan": "^2.14.2",
26 |     "node-record-lpcm16": "^1.0.1",
27 |     "qs": "^6.9.4",
28 |     "snowboy": "^1.3.1-alpha.1",
29 |     "speaker": "^0.5.2",
30 |     "ws": "^7.3.1"
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------