├── .gitignore
├── README.md
├── assistant_config.mjs
├── babel.config.js
├── package-lock.json
├── package.json
├── public
    ├── favicon.ico
    └── index.html
├── sounds
    ├── de
    │   ├── hotword_answer_1.mp3
    │   └── meme_hotword_answer.mp3
    └── en
    │   ├── hotword_answer_1.mp3
    │   └── meme_hotword_answer.mp3
├── src
    ├── App.vue
    ├── assets
    │   ├── cyborg_corgi.jpg
    │   └── cyborg_corgi.webp
    ├── background.js
    ├── components
    │   └── AiAssistant.vue
    ├── google_stt.mjs
    ├── main.js
    ├── plugins
    │   └── vuetify.js
    ├── test.mjs
    ├── tts.mjs
    ├── voice_assistant_google.mjs
    └── voice_assistant_vosk.mjs
├── vue.config.js
└── yarn.lock


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /dist
 4 | 
 5 | 
 6 | # local env files
 7 | .env
 8 | .env.local
 9 | .env.*.local
10 | 
11 | # Log files
12 | npm-debug.log*
13 | yarn-debug.log*
14 | yarn-error.log*
15 | pnpm-debug.log*
16 | 
17 | # Editor directories and files
18 | .idea
19 | .vscode
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 
26 | #Electron-builder output
27 | /dist_electron
28 | ./temp.wav
29 | language_models
30 | hotword_trigger.txt
31 | latest_recording.txt
32 | google_client_secret.json
33 | sounds/*/gpt_answer.mp3
34 | public/sounds


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ai-voice-assistant
 2 | 
 3 | electron based ai voice assistant utilizing gcloud text-to-speech and speech-to-text and openai chatgpt.
 4 | on machines with enough power, you can use vosk for complete offline voice recgnition.
 5 | 
 6 | ## Prerequisite
 7 | 
 8 | - debian based OS (tested on Ubuntu and Raspberrypi OS)
 9 | - node 16 (recommended install via nvm https://github.com/nvm-sh/nvm)
10 | ## Project setup
11 | ```
12 | npm install --legacy-peer-deps
13 | ```
14 | 
15 | ## Google Setup
16 | - install gcloud cli (https://cloud.google.com/sdk/docs/install?hl=de#deb)
17 | - you need google account with Cloud Text-to-Speech API enabled (https://cloud.google.com/text-to-speech/docs)
18 | - you also need Cloud Speech-to-Text API enabled if not using VOSK (can be configured in assistant_config.mjs)
19 | - after enabling both, you need to create a service account for this project and download a credentials JSON file that looks like this
20 | 
21 | ```
22 | {
23 |     "type": "service_account",
24 |     "project_id": "{{your_project_id}}",
25 |     "private_key_id": "{{your_private_key_id}}",
26 |     "private_key": "{{your_private_key}}",
27 |     "client_email": "local-account@{{your_project_id}}.iam.gserviceaccount.com",
28 |     "client_id": "{{your_client_id}}",
29 |     "auth_uri": "https://accounts.google.com/o/oauth2/auth",
30 |     "token_uri": "https://oauth2.googleapis.com/token",
31 |     "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
32 |     "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/local-account%40{{your_project_id}}.iam.gserviceaccount.com",
33 |     "universe_domain": "googleapis.com"
34 |   }
35 |   
36 | ```
37 | 
38 | - put this file in the root directory of the project and adjust your .env file to point to this credentials file
39 | ```
40 | GOOGLE_APPLICATION_CREDENTIALS="/home/your-projects-directory/raspberry-ai-voice-assitant/google_client_secret.json"
41 | ```
42 | ## ChatGPT Setup
43 | - create `.env` file with this content replacing `{{your-api-key}}`
44 | - can be obtained on openai homepage (https://platform.openai.com/account/api-keys)
45 | 
46 | ```
47 | OPEN_AI_APIKEY="{{your-api-key}}"
48 | 
49 | ```
50 | 
51 | ## Language Model Setup
52 | 
53 | - voice assistant can be used with offline voice recognition
54 | - language models will be loaded into RAM, so this requires a laptop or later version of raspberry pi
55 | - to enable vosk, adjust `assistant_config.mjs`
56 | - download models here: https://alphacephei.com/vosk/models
57 | - create folder language_models in project root
58 | - extract downloaded language model in there
59 | - adjust `modelPaths` in `src/voice_assistant_vosk.mjs`
60 | 
61 | ## Run the voice assitant
62 | ```
63 | npm run electron:serve
64 | ```
65 | 
66 | ## Customization
67 | 
68 | - check `assistant_config.mjs`
69 | 
70 | ```
71 | const assistantName = 'Felix';
72 | 
73 | export default {
74 |     'useLocalSpeechToText': false, // if true uses vosk (free), if false uses google speech to text
75 |     'lowMemoryVariant': false, // if useLocalSpeechToText is true, this determines, if big language model is used
76 |     'language': 'de', // language used for speech to text and answers from chatGPT
77 |     'answerWordLimit': 30, // limits the requested words of an answer from chatGPT
78 |     'assistantName': assistantName, // this is how you activate the voice assistant using "Hey, ${name}",
79 |     "memeTrigger": 'listiges Bild', // if you say this, assistant will fetch a meme from reddit and show it,
80 |     "gptSystemMessage": `Du bist einer virtueller Sprachassistent. Dein Name ist ${assistantName}. Du gibst kurze genaue Antworten. Das Aktuelle Datum ist: ${new Date().toISOString()}\n\n`
81 | };
82 | ```


--------------------------------------------------------------------------------
/assistant_config.mjs:
--------------------------------------------------------------------------------
 1 | const assistantName = 'Felix';
 2 | 
 3 | export default {
 4 |     'useLocalSpeechToText': false, // if true uses vosk (free), if false uses google speech to text
 5 |     'lowMemoryVariant': false, // if useLocalSpeechToText is true, this determines, if big language model is used
 6 |     'language': 'de', // language used for speech to text and answers from chatGPT
 7 |     'answerWordLimit': 30, // limits the requested words of an answer from chatGPT
 8 |     'assistantName': assistantName, // this is how you activate the voice assistant using "Hey, ${name}",
 9 |     "memeTrigger": 'listiges Bild', // if you say this, assistant will fetch a meme from reddit and show it,
10 |     "gptSystemMessage": `Du bist einer virtueller Sprachassistent. Dein Name ist ${assistantName}. Du gibst kurze genaue Antworten. Das Aktuelle Datum ist: ${new Date().toISOString()}\n\n`
11 | };
12 | 
13 | // example en-US config
14 | // const assistantName = 'buddy';
15 | 
16 | // export default {
17 | //     'useLocalSpeechToText': false, // if true uses vosk (free), if false uses google speech to text
18 | //     'lowMemoryVariant': false, // if useLocalSpeechToText is true, this determines, if big language model is used
19 | //     'language': 'en', // language used for speech to text and answers from chatGPT
20 | //     'answerWordLimit': 50, // limits the requested words of an answer from chatGPT
21 | //     'assistantName': assistantName, // this is how you activate the voice assistant using "Hey, ${name}",
22 | //     "memeTrigger": 'funny image', // if you say this, assistant will fetch a meme from reddit and show it,
23 | //     "gptSystemMessage": `You are a virtual voice assistant. Your name is ${assistantName}. You give short concrete answers. Current date is: ${new Date().toISOString()}\n\n`
24 | // };


--------------------------------------------------------------------------------
/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [
3 |     '@vue/cli-plugin-babel/preset'
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "ai-voice-assistant",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "serve": "vue-cli-service serve",
 7 |     "build": "vue-cli-service build",
 8 |     "lint": "vue-cli-service lint",
 9 |     "electron:build": "vue-cli-service electron:build",
10 |     "electron:serve": "vue-cli-service electron:serve",
11 |     "postinstall": "electron-builder install-app-deps",
12 |     "postuninstall": "electron-builder install-app-deps"
13 |   },
14 |   "main": "background.js",
15 |   "dependencies": {
16 |     "@google-cloud/speech": "5.5.0",
17 |     "@google-cloud/text-to-speech": "4.2.1",
18 |     "audic": "3.0.1",
19 |     "chatgpt": "5.2.2",
20 |     "core-js": "^3.6.5",
21 |     "dotenv": "16.0.3",
22 |     "get-mp3-duration": "1.0.0",
23 |     "isomorphic-fetch": "3.0.0",
24 |     "node-fetch": "3.3.1",
25 |     "node-record-lpcm16": "1.0.1",
26 |     "play-sound": "1.1.5",
27 |     "sound-play": "1.1.0",
28 |     "systeminformation": "4.34.23",
29 |     "vosk": "^0.3.39",
30 |     "vue": "^2.6.11",
31 |     "vuetify": "^2.6.0"
32 |   },
33 |   "devDependencies": {
34 |     "@vue/cli-plugin-babel": "5.0.8",
35 |     "@vue/cli-plugin-eslint": "5.0.8",
36 |     "@vue/cli-service": "5.0.8",
37 |     "babel-eslint": "^10.1.0",
38 |     "electron": "24.0.0",
39 |     "electron-devtools-installer": "^3.1.0",
40 |     "eslint": "7.32.0",
41 |     "eslint-plugin-vue": "^6.2.2",
42 |     "sass": "~1.32.0",
43 |     "sass-loader": "^10.0.0",
44 |     "vue-cli-plugin-electron-builder": "1.4.6",
45 |     "vue-cli-plugin-vuetify": "~2.5.8",
46 |     "vue-template-compiler": "^2.6.11",
47 |     "vuetify-loader": "^1.7.0"
48 |   },
49 |   "eslintConfig": {
50 |     "root": true,
51 |     "env": {
52 |       "node": true
53 |     },
54 |     "extends": [
55 |       "plugin:vue/essential",
56 |       "eslint:recommended"
57 |     ],
58 |     "parserOptions": {
59 |       "parser": "babel-eslint"
60 |     },
61 |     "rules": {}
62 |   },
63 |   "browserslist": [
64 |     "> 1%",
65 |     "last 2 versions",
66 |     "not dead"
67 |   ]
68 | }
69 | 


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/public/favicon.ico


--------------------------------------------------------------------------------
/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width,initial-scale=1.0">
 7 |     <link rel="icon" href="<%= BASE_URL %>favicon.ico">
 8 |     <title><%= htmlWebpackPlugin.options.title %></title>
 9 |     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:100,300,400,500,700,900">
10 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@mdi/font@latest/css/materialdesignicons.min.css">
11 |   </head>
12 |   <body>
13 |     <noscript>
14 |       <strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
15 |     </noscript>
16 |     <div id="app"></div>
17 |     <!-- built files will be auto injected -->
18 |   </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/sounds/de/hotword_answer_1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/sounds/de/hotword_answer_1.mp3


--------------------------------------------------------------------------------
/sounds/de/meme_hotword_answer.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/sounds/de/meme_hotword_answer.mp3


--------------------------------------------------------------------------------
/sounds/en/hotword_answer_1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/sounds/en/hotword_answer_1.mp3


--------------------------------------------------------------------------------
/sounds/en/meme_hotword_answer.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/sounds/en/meme_hotword_answer.mp3


--------------------------------------------------------------------------------
/src/App.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <v-app>
 3 |     <v-main class="main-container">
 4 |       <AiAssistant />
 5 |     </v-main>
 6 |   </v-app>
 7 | </template>
 8 | 
 9 | <script>
10 | import AiAssistant from './components/AiAssistant';
11 | 
12 | export default {
13 |   name: 'App',
14 | 
15 |   components: {
16 |     AiAssistant,
17 |   },
18 | 
19 |   data: () => ({
20 |     //
21 |   }),
22 | };
23 | </script>
24 | 
25 | <style>
26 | .main-container {
27 |   background-color: black;
28 |   overflow: hidden;
29 | }
30 | 
31 | ::-webkit-scrollbar {
32 |     display: none;
33 | }
34 | 
35 | body {
36 |   overflow: hidden;
37 | }
38 | </style>


--------------------------------------------------------------------------------
/src/assets/cyborg_corgi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/src/assets/cyborg_corgi.jpg


--------------------------------------------------------------------------------
/src/assets/cyborg_corgi.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oshell/raspberry-ai-voice-assistant/3e9d7e801662a492cefdaf680788365a209c8ece/src/assets/cyborg_corgi.webp


--------------------------------------------------------------------------------
/src/background.js:
--------------------------------------------------------------------------------
  1 | 'use strict'
  2 | import { app, protocol, BrowserWindow, ipcMain } from 'electron'
  3 | import { createProtocol } from 'vue-cli-plugin-electron-builder/lib'
  4 | import installExtension, { VUEJS_DEVTOOLS } from 'electron-devtools-installer'
  5 | import nodeChildProcess from 'child_process';
  6 | import si from 'systeminformation';
  7 | import 'isomorphic-fetch';
  8 | import config from '../assistant_config.mjs'
  9 | 
 10 | const isDevelopment = process.env.NODE_ENV !== 'production'
 11 | 
 12 | let win;
 13 | 
 14 | // Scheme must be registered before the app is ready
 15 | protocol.registerSchemesAsPrivileged([
 16 |   { scheme: 'app', privileges: { secure: true, standard: true } }
 17 | ])
 18 | 
 19 | async function createWindow() {
 20 |   // Create the browser window.
 21 |   win = new BrowserWindow({
 22 |     webPreferences: {
 23 |       nodeIntegration: true,
 24 |       contextIsolation: false,
 25 |       enableRemoteModule: true,
 26 |       devTools: false
 27 |     },
 28 |     show: false,
 29 |     fullscreen: true
 30 |   })
 31 | 
 32 |   win.show();
 33 |   if (process.env.WEBPACK_DEV_SERVER_URL) {
 34 |     // Load the url of the dev server if in development mode
 35 |     await win.loadURL(process.env.WEBPACK_DEV_SERVER_URL)
 36 |     if (!process.env.IS_TEST) win.webContents.openDevTools()
 37 |   } else {
 38 |     createProtocol('app')
 39 |     // Load the index.html when not in development
 40 |     win.loadURL('app://./index.html')
 41 |   }
 42 | }
 43 | 
 44 | // Quit when all windows are closed.
 45 | app.on('window-all-closed', () => {
 46 |   // On macOS it is common for applications and their menu bar
 47 |   // to stay active until the user quits explicitly with Cmd + Q
 48 |   if (process.platform !== 'darwin') {
 49 |     app.quit()
 50 |   }
 51 | })
 52 | 
 53 | app.on('activate', () => {
 54 |   // On macOS it's common to re-create a window in the app when the
 55 |   // dock icon is clicked and there are no other windows open.
 56 |   if (BrowserWindow.getAllWindows().length === 0) createWindow()
 57 | })
 58 | 
 59 | // This method will be called when Electron has finished
 60 | // initialization and is ready to create browser windows.
 61 | // Some APIs can only be used after this event occurs.
 62 | app.on('ready', async () => {
 63 |   if (isDevelopment && !process.env.IS_TEST) {
 64 |     // Install Vue Devtools
 65 |     try {
 66 |       await installExtension(VUEJS_DEVTOOLS)
 67 |     } catch (e) {
 68 |       console.error('Vue Devtools failed to install:', e.toString())
 69 |     }
 70 |   }
 71 |   createWindow()
 72 | })
 73 | 
 74 | // Exit cleanly on request from parent process in development mode.
 75 | if (isDevelopment) {
 76 |   if (process.platform === 'win32') {
 77 |     process.on('message', (data) => {
 78 |       if (data === 'graceful-exit') {
 79 |         app.quit()
 80 |       }
 81 |     })
 82 |   } else {
 83 |     process.on('SIGTERM', () => {
 84 |       app.quit()
 85 |     })
 86 |   }
 87 | }
 88 | 
 89 | 
 90 | function handleNotification(data) {
 91 |   try {
 92 |     const events = JSON.parse(data);
 93 |     events.forEach(event => {
 94 |       console.log(`${event.name}: ${event.value}`);
 95 |       win.webContents.send(event.name, event.value);
 96 |     });
 97 |   } catch (error) {
 98 |     console.log(`ERR: parsing failed: ${data}`);
 99 |   } 
100 | }
101 | 
102 | const speechToTextService = config.useLocalSpeechToText ? 'vosk' : 'google';
103 | 
104 | const startVoskChildProcess = true;
105 | if (startVoskChildProcess) {
106 |   let script = nodeChildProcess.spawn('node', [`src/voice_assistant_${speechToTextService}.mjs`]);
107 |   script.stdout.on('data', handleNotification);
108 |   
109 |   process.on('exit', function() {
110 |     console.log('Killing child process onexit!');
111 |     script.kill();
112 |   });
113 | }
114 | const tempratureCheckIntervall = 5000
115 | 
116 | function checkCpuTemprature() {
117 |     si.cpuTemperature().then((result) => {
118 |         if (result) {
119 |             const mainTemp = result.main;
120 |             if (win && win.webContents) {
121 |               win.webContents.send('cpu', mainTemp);
122 |             }
123 |         }
124 |     })
125 | }
126 | 
127 | checkCpuTemprature();
128 | setInterval(checkCpuTemprature, tempratureCheckIntervall);


--------------------------------------------------------------------------------
/src/components/AiAssistant.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <v-container :class="['ai-assistant', status]">
  3 |     <v-row class="text-center">
  4 |       <v-col cols="12">
  5 |         <div class="system-info">
  6 |           <span v-if="temprature">
  7 |             CPU: {{ temprature }}°
  8 |           </span>
  9 |           <span>
 10 |             Time: {{ time }}
 11 |           </span>
 12 |         </div>
 13 |         <v-progress-circular class="spinner" size="70" v-if="loading" indeterminate color="#71eeff"></v-progress-circular>
 14 |         <div class="avatar">
 15 |           <v-row id="eyes">
 16 |             <v-col cols="6" class="eye">
 17 |               <div class="inner-eye left">
 18 |                 <div class="pupil"></div>
 19 |               </div>
 20 |             </v-col>
 21 |             <v-col cols="6" class="eye">
 22 |               <div class="inner-eye right">
 23 |                 <div class="pupil"></div>
 24 |               </div>
 25 |             </v-col>
 26 |           </v-row>
 27 |           <v-row>
 28 |             <v-col></v-col>
 29 |             <v-col>
 30 |               <div id="bars" :class="[status]">
 31 |                 <div class="bar" v-for="index in 10" :key="index"></div>
 32 |               </div>
 33 |             </v-col>
 34 |             <v-col></v-col>
 35 |           </v-row>
 36 |         </div>
 37 |       </v-col>
 38 |     </v-row>
 39 |     <v-row>
 40 |       <v-col cols="auto">
 41 |         <v-dialog v-model="dialog" width="auto">
 42 |           <v-card>
 43 |             <v-toolbar color="#1e2038" dark>Meme Generator</v-toolbar>
 44 |             <v-card-text>
 45 |               <img class="meme" :src="meme" />
 46 |             </v-card-text>
 47 |             <v-card-actions>
 48 |               <v-btn dark color="#1e2038" block @click="handleClose">{{ translations.thanks }}</v-btn>
 49 |             </v-card-actions>
 50 |           </v-card>
 51 |         </v-dialog>
 52 |       </v-col>
 53 |     </v-row>
 54 |     <span class="active-indicator top"></span>
 55 |     <span class="active-indicator right"></span>
 56 |     <span class="active-indicator bottom"></span>
 57 |     <span class="active-indicator left"></span>
 58 |   </v-container>
 59 | </template>
 60 | 
 61 | <script>
 62 | import { ipcRenderer } from 'electron';
 63 | 
 64 | let playerActive = false;
 65 | let audio = null;
 66 | const lang = 'de';
 67 | 
 68 | const translations = {
 69 |   en: {
 70 |     thanks: 'Thanks'
 71 |   },
 72 |   de: {
 73 |     thanks: 'Danke'
 74 |   }
 75 | }
 76 | 
 77 | export default {
 78 |   name: 'AiAssistant',
 79 | 
 80 |   data: () => ({
 81 |     status: 'inactive',
 82 |     loading: false,
 83 |     dialog: false,
 84 |     meme: null,
 85 |     temprature: null,
 86 |     translations: translations[lang],
 87 |     time: ''
 88 |   }),
 89 |   mounted() {
 90 |     ipcRenderer.on('hotword', this.handleHotwordTrigger);
 91 |     ipcRenderer.on('voice_input_start', this.handleVoiceInput);
 92 |     ipcRenderer.on('gpt_start', this.handleGptStart);
 93 |     ipcRenderer.on('tts', this.handlePlayAnswer);
 94 |     ipcRenderer.on('tts_end', this.handlePlayEnd);
 95 |     ipcRenderer.on('stop', this.handleStop);
 96 |     ipcRenderer.on('meme', this.handleMeme);
 97 |     ipcRenderer.on('meme_stop', this.handleMemeStop);
 98 |     ipcRenderer.on('cpu', this.handleCpu);
 99 |     window.addEventListener('keypress', this.handleKeyPress);
100 |     this.showTime();
101 |   },
102 |   methods: {
103 |     handleCpu(eventName, cpuCelsius) {
104 |       this.temprature = cpuCelsius;
105 |     },
106 |     handleHotwordTrigger() {
107 |       this.status = 'speaking';
108 |     },
109 |     handleVoiceInput() {
110 |       this.status = 'active';
111 |     },
112 |     handlePlayAnswer() {
113 |       this.loading = false;
114 |       setTimeout(() => {
115 |         this.status = 'speaking';
116 |         playerActive = true;
117 |       }, 2000);
118 |     },
119 |     handlePlayEnd() {
120 |       setTimeout(() => { this.status = 'awake'; }, 200);
121 |       setTimeout(() => { this.status = 'inactive'; }, 2000);
122 |     },
123 |     handleStop() {
124 |       this.status = 'inactive';
125 |       if (audio) {
126 |         audio.pause();
127 |         audio = null;
128 |       }
129 |     },
130 |     handleGptStart() {
131 |       this.status = 'thinking';
132 |       this.loading = true;
133 |     },
134 |     handleKeyPress(e) {
135 |       const num = parseInt(e.key);
136 |       if (playerActive) return;
137 |       if (isNaN(num)) return;
138 |       const fileName = `sounds/${lang}/temp_${num}.mp3`;
139 |       audio = document.createElement('audio');
140 |       audio.setAttribute('controls', '');
141 |       audio.src = fileName;
142 |       audio.onended = () => {
143 |         setTimeout(() => {
144 |           this.status = 'inactive';
145 |           playerActive = false;
146 |         }, 1000)
147 |       };
148 | 
149 |       audio.play();
150 | 
151 |       playerActive = true;
152 |       this.status = 'speaking';
153 |     },
154 |     handleMeme(eventName, value) {
155 |       this.meme = value;
156 |       this.dialog = true;
157 |     },
158 |     handleMemeStop() {
159 |       this.dialog = false;
160 |       this.meme = null;
161 |     },
162 |     handleClose() {
163 |       this.meme = null;
164 |       this.dialog = false;
165 |     },
166 |     showTime() {
167 |       const date = new Date();
168 |       let h = date.getHours(); // 0 - 23
169 |       let m = date.getMinutes(); // 0 - 59
170 |       let s = date.getSeconds(); // 0 - 59
171 |       let session = "AM";
172 | 
173 |       if (h == 0) {
174 |         h = 12;
175 |       }
176 | 
177 |       if (h > 12) {
178 |         h = h - 12;
179 |         session = "PM";
180 |       }
181 | 
182 |       h = (h < 10) ? "0" + h : h;
183 |       m = (m < 10) ? "0" + m : m;
184 |       s = (s < 10) ? "0" + s : s;
185 | 
186 |       var time = h + ":" + m + ":" + s + " " + session;
187 |       this.time = time;
188 |       setTimeout(this.showTime, 1000);
189 |     }
190 |   },
191 | };
192 | </script>
193 | 
194 | <style lang="scss">
195 | .ai-assistant {
196 |   background-color: black;
197 |   height: 100vh;
198 | 
199 |   &.active {
200 |     .active-indicator {
201 |       display: block;
202 |     }
203 |   }
204 | }
205 | 
206 | .active {
207 |   color: green;
208 | }
209 | 
210 | .inactive {
211 |   color: red;
212 | }
213 | 
214 | .v-image.active {
215 |   background: rgba(52, 172, 224, 1);
216 |   box-shadow: 0 0 0 0 rgba(52, 172, 224, 1);
217 |   animation: pulse-blue 2s infinite;
218 | }
219 | 
220 | @keyframes pulse-blue {
221 |   0% {
222 |     transform: scale(0.95);
223 |     box-shadow: 0 0 0 0 rgba(52, 172, 224, 0.7);
224 |   }
225 | 
226 |   70% {
227 |     transform: scale(1);
228 |     box-shadow: 0 0 0 30px rgba(52, 172, 224, 0);
229 |   }
230 | 
231 |   100% {
232 |     transform: scale(0.95);
233 |     box-shadow: 0 0 0 0 rgba(52, 172, 224, 0);
234 |   }
235 | }
236 | 
237 | .avatar {
238 |   position: relative;
239 |   margin: 30vh auto;
240 |   width: 575px;
241 |   height: 575px;
242 | 
243 |   &-img {
244 |     height: 575px;
245 |   }
246 | }
247 | 
248 | .spinner {
249 |   position: absolute;
250 |   top: 100px;
251 |   right: 100px;
252 | }
253 | 
254 | 
255 | #bars {
256 |   display: flex;
257 |   justify-content: center;
258 |   align-items: center;
259 |   z-index: 1;
260 |   width: 50%;
261 |   margin-top: 30px;
262 | 
263 |   &.active,
264 |   &.thinking {
265 |     margin-top: 0;
266 |   }
267 | 
268 |   &.speaking {
269 |     transform: translate(-50px, -40px) rotate3d(2, -2, 1, 45deg);
270 |     margin-top: 0px;
271 |     width: 100%;
272 | 
273 |     .bar {
274 |       animation: sound 0ms -600ms linear infinite alternate;
275 |     }
276 | 
277 |     .bar:nth-child(1) {
278 |       left: 1px;
279 |       animation-duration: 474ms;
280 |     }
281 | 
282 |     .bar:nth-child(2) {
283 |       left: 15px;
284 |       animation-duration: 433ms;
285 |     }
286 | 
287 |     .bar:nth-child(3) {
288 |       left: 29px;
289 |       animation-duration: 407ms;
290 |     }
291 | 
292 |     .bar:nth-child(4) {
293 |       left: 43px;
294 |       animation-duration: 458ms;
295 |     }
296 | 
297 |     .bar:nth-child(5) {
298 |       left: 57px;
299 |       animation-duration: 400ms;
300 |     }
301 | 
302 |     .bar:nth-child(6) {
303 |       left: 71px;
304 |       animation-duration: 427ms;
305 |     }
306 | 
307 |     .bar:nth-child(7) {
308 |       left: 85px;
309 |       animation-duration: 441ms;
310 |     }
311 | 
312 |     .bar:nth-child(8) {
313 |       left: 99px;
314 |       animation-duration: 419ms;
315 |     }
316 | 
317 |     .bar:nth-child(9) {
318 |       left: 113px;
319 |       animation-duration: 487ms;
320 |     }
321 | 
322 |     .bar:nth-child(10) {
323 |       left: 127px;
324 |       animation-duration: 442ms;
325 |     }
326 | 
327 |   }
328 | }
329 | 
330 | .bar {
331 |   background: rgb(113, 238, 255);
332 |   bottom: 1px;
333 |   height: 3px;
334 |   width: 10px;
335 |   margin: 0px 4px;
336 |   border-radius: 5px;
337 | }
338 | 
339 | @keyframes sound {
340 |   0% {
341 |     opacity: .35;
342 |     height: 3px;
343 |   }
344 | 
345 |   100% {
346 |     opacity: 1;
347 |     height: 70px;
348 |   }
349 | }
350 | 
351 | @keyframes sound {
352 |   0% {
353 |     opacity: .35;
354 |     height: 3px;
355 |   }
356 | 
357 |   100% {
358 |     opacity: 1;
359 |     height: 70px;
360 |   }
361 | }
362 | 
363 | img.meme {
364 |   max-width: 100%;
365 |   max-height: 600px;
366 | }
367 | 
368 | 
369 | span.active-indicator {
370 |   position: absolute;
371 | 
372 |   display: none;
373 | 
374 |   &.top {
375 |     top: 0;
376 |     left: 0;
377 |     width: 0;
378 |     height: 20px;
379 |     background: linear-gradient(90deg,
380 |         transparent 50%,
381 |         rgb(113, 238, 255),
382 |         rgba(105, 170, 248, 0.5),
383 |       );
384 |     animation: animateTop 4s linear infinite;
385 |   }
386 | 
387 |   &.bottom {
388 |     right: 0;
389 |     bottom: 0;
390 |     height: 20px;
391 |     background: linear-gradient(90deg,
392 |         rgb(113, 238, 255),
393 |         rgba(105, 170, 248, 0.5),
394 |         transparent 50%);
395 |     animation: animateBottom 4s linear infinite;
396 |   }
397 | 
398 |   &.right {
399 |     top: 0;
400 |     right: 0;
401 |     width: 20px;
402 |     height: 0;
403 |     background: linear-gradient(180deg,
404 |         transparent 30%,
405 |         rgb(113, 238, 255),
406 |         rgba(105, 170, 248, 0.5),
407 |       );
408 |     animation: animateRight 4s linear infinite;
409 |   }
410 | 
411 |   &.left {
412 |     left: 0;
413 |     bottom: 0;
414 |     width: 20px;
415 |     height: 0;
416 |     background: linear-gradient(180deg,
417 |         rgb(113, 238, 255),
418 |         rgba(105, 170, 248, 0.5),
419 |         transparent 70%);
420 |     animation: animateLeft 4s linear infinite;
421 |   }
422 | }
423 | 
424 | .ai-assistant {
425 | 
426 |   &.active,
427 |   &.speaking,
428 |   &.thinking,
429 |   &.awake {
430 |     #eyes {
431 |       .inner-eye {
432 |         height: 150px;
433 |         border-radius: 40%;
434 |         width: 150px;
435 |         transform: translateY(-65px);
436 |         position: relative;
437 | 
438 |         .pupil {
439 |           opacity: 1;
440 |         }
441 |       }
442 |     }
443 |   }
444 | 
445 |   &.thinking {
446 |     #eyes {
447 |       .inner-eye {
448 |         .pupil {
449 |           position: absolute;
450 |           top: 0;
451 |           left: 60px;
452 |         }
453 |       }
454 |     }
455 |   }
456 | }
457 | 
458 | #eyes {
459 |   position: relative;
460 | 
461 |   .pupil {
462 |     transition: all 0.2s;
463 |     width: 60px;
464 |     height: 60px;
465 |     border-radius: 50%;
466 |     border: 30px solid #71eeff;
467 |     position: absolute;
468 |     top: 70px;
469 |     left: 30px;
470 |     opacity: 0;
471 |   }
472 | 
473 |   .inner-eye {
474 |     transition: all .2s ease-out;
475 |     border: 10px solid rgb(113, 238, 255);
476 |     height: 20px;
477 |     width: 100px;
478 |   }
479 | 
480 |   .eye {
481 |     padding: 30px;
482 |   }
483 | 
484 |   .left {
485 |     float: right;
486 |   }
487 | 
488 |   .right {
489 |     float: left;
490 |   }
491 | }
492 | 
493 | @keyframes animateTop {
494 |   25% {
495 |     width: 100%;
496 |     opacity: 1;
497 |   }
498 | 
499 |   26%,
500 |   100% {
501 |     opacity: 0;
502 |   }
503 | }
504 | 
505 | @keyframes animateBottom {
506 | 
507 |   0%,
508 |   50% {
509 |     opacity: 1;
510 |     width: 0;
511 |   }
512 | 
513 |   75% {
514 |     opacity: 1;
515 |     width: 100%;
516 |   }
517 | 
518 |   76%,
519 |   100% {
520 |     opacity: 0;
521 |   }
522 | }
523 | 
524 | @keyframes animateRight {
525 | 
526 |   0%,
527 |   25% {
528 |     opacity: 1;
529 |     height: 0;
530 |   }
531 | 
532 |   50% {
533 |     opacity: 1;
534 |     height: 100%;
535 |   }
536 | 
537 |   51%,
538 |   100% {
539 |     height: 100%;
540 |     opacity: 0;
541 |   }
542 | }
543 | 
544 | @keyframes animateLeft {
545 |   0% {
546 |     opacity: 0;
547 |     height: 0%;
548 |   }
549 | 
550 |   75% {
551 |     opacity: 1;
552 |     bottom: 0;
553 |     height: 0;
554 |   }
555 | 
556 |   100% {
557 |     opacity: 1;
558 |     height: 100%;
559 |   }
560 | }
561 | 
562 | .system-info {
563 |   position: absolute;
564 |   left: 10px;
565 |   color: rgb(113, 238, 255);
566 |   span {
567 |     display: block;
568 |     text-align: left;
569 |   }
570 | }
571 | </style>


--------------------------------------------------------------------------------
/src/google_stt.mjs:
--------------------------------------------------------------------------------
 1 | import recorder from 'node-record-lpcm16';
 2 | import speech from '@google-cloud/speech';
 3 | 
 4 | // Creates a client
 5 | const client = new speech.SpeechClient();
 6 | 
 7 | const encoding = 'LINEAR16';
 8 | const sampleRateHertz = 16000;
 9 | let request = {};
10 | let callback = () => {};
11 | let recognizeStream = null;
12 | 
13 | 
14 | function startGoogleSpeechToText(func, langCode) {
15 |   request = {
16 |     config: {
17 |       encoding: encoding,
18 |       sampleRateHertz: sampleRateHertz,
19 |       languageCode: langCode,
20 |     },
21 |     interimResults: false, // If you want interim results, set this to true
22 |   };
23 |   callback = func;
24 | 
25 |   recognizeStream = client
26 |     .streamingRecognize(request)
27 |     .on('error', console.error)
28 |     .on('data', data => {
29 |       const success = data.results[0] && data.results[0].alternatives[0];
30 |       if (success) {
31 |         const result = data.results[0].alternatives[0].transcript;
32 |         callback(result);
33 |       }
34 |     });
35 |   // Start recording and send the microphone input to the Speech API.
36 |   // Ensure SoX is installed, see https://www.npmjs.com/package/node-record-lpcm16#dependencies
37 |   recorder
38 |     .record({
39 |       sampleRateHertz: sampleRateHertz,
40 |       threshold: 0,
41 |       verbose: false,
42 |       recordProgram: 'rec', // Try also "arecord" or "sox"
43 |       silence: '1.0',
44 |     })
45 |     .stream()
46 |     .on('error', console.error)
47 |     .pipe(recognizeStream);
48 | }
49 | 
50 | export default startGoogleSpeechToText;
51 | 


--------------------------------------------------------------------------------
/src/main.js:
--------------------------------------------------------------------------------
 1 | import Vue from 'vue'
 2 | import App from './App.vue'
 3 | import vuetify from './plugins/vuetify'
 4 | 
 5 | Vue.config.productionTip = false
 6 | 
 7 | new Vue({
 8 |   vuetify,
 9 |   render: h => h(App)
10 | }).$mount('#app')
11 | 


--------------------------------------------------------------------------------
/src/plugins/vuetify.js:
--------------------------------------------------------------------------------
 1 | import Vue from 'vue';
 2 | import Vuetify from 'vuetify/lib/framework';
 3 | 
 4 | Vue.use(Vuetify);
 5 | 
 6 | export default new Vuetify({
 7 |     theme: {
 8 |         defaultTheme: 'dark'
 9 |     }
10 | });
11 | 


--------------------------------------------------------------------------------
/src/test.mjs:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import Audic from 'audic';
 3 | import getMP3Duration from 'get-mp3-duration';
 4 | const lang = 'en';
 5 | 
 6 | 
 7 | function triggerEvent(name, value) {
 8 |     const event = { name, value };
 9 |     const events = [event];
10 |     const data = JSON.stringify(events);
11 |     console.log(data);
12 | }
13 | 
14 | 
15 | async function playSound(name) {
16 |     const mp3File = `./sounds/${lang}/${name}.mp3`;
17 |     const buffer = fs.readFileSync(mp3File);
18 |     const duration = getMP3Duration(buffer);
19 |     const audic = new Audic(mp3File);
20 |     // ended event does not work correctly
21 |     // workaround is getting duration of mp3 in ms
22 |     // then when sound starts playing we set timeout 
23 |     // to trigger end event of tts
24 |     audic.addEventListener('playing', () => {
25 |         setTimeout(() => {
26 |             triggerEvent('tts_end', true);
27 |             process.exit();
28 |         }, duration);
29 |     });
30 |     audic.play();
31 | }
32 | 
33 | playSound('hotword_answer_1')


--------------------------------------------------------------------------------
/src/tts.mjs:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import util from "util";
 3 | import dotenv from "dotenv";
 4 | import textToSpeech from "@google-cloud/text-to-speech";
 5 | 
 6 | dotenv.config();
 7 | 
 8 | const ttsApi = new textToSpeech.TextToSpeechClient();
 9 | 
10 | const languageMapping = {
11 |     en: 'en-US',
12 |     de: 'de-DE'
13 | }
14 | 
15 | const voices = {
16 |     de: 'de-DE-Neural2-B',//A,C,D
17 |     en: 'en-US-Neural2-I'//A,D,I
18 | }
19 | 
20 | async function synthesizeSpeech(text, name, lang) {
21 |     // Construct the request
22 |     const request = {
23 |         input: { text: text },
24 |         // Select the language and SSML voice gender (optional)
25 |         voice: { languageCode: languageMapping[lang], name: voices[lang], ssmlGender: 'MALE' },
26 |         // select the type of audio encoding
27 |         audioConfig: { audioEncoding: 'MP3' },
28 |     };
29 | 
30 |     // Performs the text-to-speech request
31 |     const [response] = await ttsApi.synthesizeSpeech(request);
32 |     // Write the binary audio content to a local file
33 |     const writeFile = util.promisify(fs.writeFile);
34 |     await writeFile(`./sounds/${lang}/${name}.mp3`, response.audioContent, 'binary');
35 |     console.log('Done!');
36 | }
37 | 
38 | async function listVoices(lang) {
39 |     const [result] = await ttsApi.listVoices({});
40 |     const voices = result.voices;
41 | 
42 |     console.log('Voices:');
43 |     voices.forEach(voice => {
44 |         if (!voice.name.includes('Neural')) return;
45 |         if (!voice.languageCodes.includes(languageMapping[lang])) return;
46 |         console.log(`Name: ${voice.name}`);
47 |         console.log(`  SSML Voice Gender: ${voice.ssmlGender}`);
48 |         console.log(`  Natural Sample Rate Hertz: ${voice.naturalSampleRateHertz}`);
49 |         console.log('  Supported languages:');
50 |         voice.languageCodes.forEach(languageCode => {
51 |             console.log(`    ${languageCode}`);
52 |         });
53 |     });
54 | }
55 | 
56 | const text = "Spiel 'Die da', von den Fantastischen Vier";
57 | const name = 'temp_4';
58 | const lang = 'de';
59 | 
60 | synthesizeSpeech(text, name, lang);
61 | 


--------------------------------------------------------------------------------
/src/voice_assistant_google.mjs:
--------------------------------------------------------------------------------
  1 | import fs from 'fs';
  2 | import 'isomorphic-fetch';
  3 | import util from "util";
  4 | import dotenv from "dotenv";
  5 | import { ChatGPTAPI } from 'chatgpt';
  6 | import textToSpeech from "@google-cloud/text-to-speech";
  7 | import Audic from 'audic';
  8 | import getMP3Duration from 'get-mp3-duration';
  9 | import config from '../assistant_config.mjs';
 10 | import startGoogleSpeechToText from './google_stt.mjs';
 11 | 
 12 | dotenv.config();
 13 | 
 14 | const lang = config.language;
 15 | const answerWordLimit = config.answerWordLimit;
 16 | const assistantName = config.assistantName;
 17 | const memeTrigger = config.memeTrigger;
 18 | const systemMessage = config.gptSystemMessage;
 19 | 
 20 | let lastRequestId = null;
 21 | const eventTimeoutMs = 500;
 22 | const debug = false;
 23 | 
 24 | const languageMapping = {
 25 |     en: 'en-US',
 26 |     de: 'de-DE'
 27 | }
 28 | 
 29 | const voices = {
 30 |     de: 'de-DE-Neural2-B',//A,C,D
 31 |     en: 'en-US-Neural2-I'//A,D,I
 32 | }
 33 | 
 34 | const messagePostFix = {
 35 |     en: `Answer in less than ${answerWordLimit} words if possible.`,
 36 |     de: `Antworte in unter ${answerWordLimit} Wörtern, wenn möglich.`
 37 | }
 38 | 
 39 | const continueMatches = {
 40 |     de: 'nochmal',
 41 |     en: 'next'
 42 | }
 43 | 
 44 | const chatGPTAPI = new ChatGPTAPI({ apiKey: process.env.OPEN_AI_APIKEY, systemMessage })
 45 | const ttsApi = new textToSpeech.TextToSpeechClient();
 46 | let memeLoop = false;
 47 | 
 48 | function sleep(ms) {
 49 |     return new Promise(resolve => setTimeout(resolve, ms));
 50 | }
 51 | 
 52 | function triggerEvent(name, value) {
 53 |     const event = { name, value };
 54 |     const events = [event];
 55 |     const data = JSON.stringify(events);
 56 |     console.log(data);
 57 | }
 58 | 
 59 | async function synthesizeSpeech(text) {
 60 |     // Construct the request
 61 |     const request = {
 62 |         input: { text: text },
 63 |         // Select the language and SSML voice gender (optional)
 64 |         voice: { languageCode: languageMapping[lang], name: voices[lang], ssmlGender: 'MALE' },
 65 |         // select the type of audio encoding
 66 |         audioConfig: { audioEncoding: 'MP3' },
 67 |     };
 68 | 
 69 |     // Performs the text-to-speech request
 70 |     const [response] = await ttsApi.synthesizeSpeech(request);
 71 |     // Write the binary audio content to a local file
 72 |     const writeFile = util.promisify(fs.writeFile);
 73 |     await writeFile(`./sounds/${lang}/gpt_answer.mp3`, response.audioContent, 'binary');
 74 |     await sleep(eventTimeoutMs);
 75 | }
 76 | 
 77 | async function askChatGpt(message) {
 78 |     const opts = {};
 79 | 
 80 |     message = `${message}. ${messagePostFix[lang]}`;
 81 |     if (lastRequestId) {
 82 |         opts.parentMessageId = lastRequestId
 83 |     }
 84 |     const response = await chatGPTAPI.sendMessage(message, opts);
 85 |     lastRequestId = response.id;
 86 |     return response.text;
 87 | }
 88 | 
 89 | 
 90 | async function fetchMeme() {
 91 |     const memeApi = 'https://meme-api.com/gimme';
 92 |     const response = await fetch(memeApi);
 93 |     const result = await response.json();
 94 | 
 95 |     triggerEvent('meme', result.url);
 96 | }
 97 | 
 98 | let active = false;
 99 | let disabled = false;
100 | const minimumDisabledMs = 5000;
101 | 
102 | const voiceRecognition = {
103 |     hotwords: {
104 |         activate: [],
105 |         activateMeme: ''
106 |     },
107 |     initHotwords: () => {
108 |         const prefixes = ['hey ', 'he ', 'the ', 'hi ', '']
109 |         voiceRecognition.hotwords.activate = prefixes.map(prefix => {
110 |             return `${prefix}${assistantName.toLocaleLowerCase()}`;
111 |         });
112 |         voiceRecognition.hotwords.activateMeme = memeTrigger;
113 |     },
114 |     googleSttCallback: (text) => {
115 |         if (text && debug) {
116 |             triggerEvent('google_stt_debug', text);
117 |             return;
118 |         }
119 |         voiceRecognition.checkHotword(text);
120 |         voiceRecognition.handleInput(text);
121 |         voiceRecognition.checkStop(text);
122 |     },
123 |     start: () => {
124 |         voiceRecognition.initHotwords();
125 |         const googleLang = languageMapping[lang];
126 |         startGoogleSpeechToText(voiceRecognition.googleSttCallback, googleLang);
127 |     },
128 |     handleInput: async (text) => {
129 |         if (!active || disabled) return;
130 |         if (text.includes('stop') && text.length <= 10) {
131 |             triggerEvent('stop', true);
132 |             active = false;
133 |             return;
134 |         }
135 | 
136 |         text = text.trim();
137 |         let inputTooShort = text && text.length < 6;
138 |         let notEnoughWords = text.split(' ').length < 3;
139 |         let containsHotWord = text.includes(assistantName) && text.legth < 12;
140 |         if (!text || inputTooShort || notEnoughWords || containsHotWord) {
141 |             return;
142 |         }
143 | 
144 |         if (text.includes('stop') && text.length <= 20) {
145 |             triggerEvent('stop', true);
146 |             active = false;
147 |             return;
148 |         }
149 | 
150 |         let questionEvent = {
151 |             name: 'question',
152 |             value: text
153 |         };
154 | 
155 |         const gptStartEvent = {
156 |             name: 'gpt_start',
157 |             value: true
158 |         };
159 | 
160 |         let events = [questionEvent, gptStartEvent];
161 |         let data = JSON.stringify(events);
162 |         console.log(data);
163 |         active = false;
164 | 
165 |         const answer = await askChatGpt(text);
166 | 
167 |         triggerEvent('answer', answer);
168 |         await synthesizeSpeech(answer);
169 |         triggerEvent('tts', true);
170 |         playSound('gpt_answer', true);
171 | 
172 |     },
173 |     checkHotword: async (text) => {
174 |         if (active || disabled) return;
175 | 
176 |         text = normalizeResult(text);
177 |         let match = false;
178 |         voiceRecognition.hotwords.activate.forEach(hotword => {
179 |             if (text.includes(hotword)) {
180 |                 match = true;
181 |             }
182 |         });
183 | 
184 |         if (match) {
185 |             active = true;
186 |             setTimeout(() => {
187 |                 disabled = false;
188 |             }, eventTimeoutMs);
189 |             triggerEvent('voice_input_start', true);
190 |         }
191 | 
192 |         let memeMatch = false;
193 |         if (text.includes(voiceRecognition.hotwords.activateMeme)) {
194 |             memeMatch = true;
195 |         }
196 | 
197 |         if (memeMatch) {
198 |             memeLoop = true;
199 |             disabled = true;
200 |             triggerEvent('meme_hotword', true);
201 |             fetchMeme();
202 |             playSound('meme_hotword_answer', false);
203 |             setTimeout(() => {
204 |                 disabled = false;
205 |             }, minimumDisabledMs)
206 |         }
207 | 
208 |         let memeContinueMatch = text.includes(continueMatches[lang]);
209 | 
210 |         if (memeContinueMatch && memeLoop) {
211 |             disabled = true;
212 |             triggerEvent('meme', true);
213 |             fetchMeme();
214 |             setTimeout(() => {
215 |                 disabled = false;
216 |             }, 1000);
217 |         }
218 | 
219 |         let stopMatch = text.includes('stop');
220 | 
221 |         if (stopMatch && memeLoop) {
222 |             memeLoop = false;
223 |             triggerEvent('meme_stop', true);
224 |         }
225 |     },
226 |     checkStop: async (text) => {
227 |         if (!disabled) return;
228 |         text = normalizeResult(text);
229 |         let match = false;
230 |         if (text.includes('stop')) {
231 |             match = true;
232 |         }
233 | 
234 |         if (match) {
235 |             active = false;
236 |             disabled = false;
237 |             triggerEvent('stop', true);
238 |         }
239 |     }
240 | }
241 | 
242 | async function playSound(name) {
243 |     disabled = true;
244 |     const mp3File = `./sounds/${lang}/${name}.mp3`;
245 |     const buffer = fs.readFileSync(mp3File);
246 |     const duration = getMP3Duration(buffer);
247 |     const audic = new Audic(mp3File);
248 |     // ended event does not work correctly
249 |     // workaround is getting duration of mp3 in ms
250 |     // then when sound starts playing we set timeout 
251 |     // to trigger end event of tts
252 |     audic.addEventListener('playing', () => {
253 |         setTimeout(() => {
254 |             disabled = false;
255 |             triggerEvent('tts_end', true);
256 |         }, duration);
257 |     });
258 |     audic.play();
259 | }
260 | 
261 | function normalizeResult(text) {
262 |     text = text.trim();
263 |     return text.toLocaleLowerCase();
264 | }
265 | 
266 | console.log(JSON.stringify([{ name: 'LOG:', value: 'Google Speech-to-Text started!' }]));
267 | voiceRecognition.start();
268 | 


--------------------------------------------------------------------------------
/src/voice_assistant_vosk.mjs:
--------------------------------------------------------------------------------
  1 | import vosk from 'vosk';
  2 | import fs from 'fs';
  3 | import mic from 'mic';
  4 | import { fileURLToPath } from 'url';
  5 | import path from 'path';
  6 | import 'isomorphic-fetch';
  7 | import util from "util";
  8 | import dotenv from "dotenv";
  9 | import { ChatGPTAPI } from 'chatgpt';
 10 | import textToSpeech from "@google-cloud/text-to-speech";
 11 | import Audic from 'audic';
 12 | import getMP3Duration from 'get-mp3-duration';
 13 | import config from '../assistant_config.mjs';
 14 |  
 15 | dotenv.config();
 16 | const __filename = fileURLToPath(import.meta.url);
 17 | const __dirname = path.dirname(__filename);
 18 | 
 19 | const lang = config.language;
 20 | const answerWordLimit = config.answerWordLimit;
 21 | const lowMemoryVariant = config.lowMemoryVariant;
 22 | const assistantName = config.assistantName;
 23 | const memeTrigger = config.memeTrigger;
 24 | const systemMessage = config.gptSystemMessage;
 25 | 
 26 | let lastRequestId = null;
 27 | const eventTimeoutMs = 200;
 28 | 
 29 | const debug = false;
 30 | 
 31 | let modelPaths = {
 32 |     de: __dirname + "/../language_models/vosk-model-de-0.21",
 33 |     en: __dirname + "/../language_models/vosk-model-en-us-0.22"
 34 | }
 35 | 
 36 | if (lowMemoryVariant) {
 37 |     modelPaths = {
 38 |         de: __dirname + "/../language_models/vosk-model-small-de-0.15",
 39 |         en: __dirname + "/../language_models/vosk-model-en-us-0.22-lgraph"
 40 |     }
 41 | }
 42 | 
 43 | const languageMapping = {
 44 |     en: 'en-US',
 45 |     de: 'de-DE'
 46 | }
 47 | 
 48 | const voices = {
 49 |     de: 'de-DE-Neural2-B',//A,C,D
 50 |     en: 'en-US-Neural2-I'//A,D,I
 51 | }
 52 | 
 53 | const messagePostFix = {
 54 |     en: `Answer in less than ${answerWordLimit} words if possible.`,
 55 |     de: `Antworte in unter ${answerWordLimit} Wörtern, wenn möglich.`
 56 | }
 57 | 
 58 | const continueMatches = {
 59 |     de: 'nochmal',
 60 |     en: 'next'
 61 | }
 62 | 
 63 | const MODEL_PATH = modelPaths[lang];
 64 | const SAMPLE_RATE = 16000;
 65 | 
 66 | if (!fs.existsSync(MODEL_PATH)) {
 67 |     console.log("Please download the model from https://alphacephei.com/vosk/models and unpack as " + MODEL_PATH + " in the current folder.")
 68 |     process.exit()
 69 | }
 70 | 
 71 | vosk.setLogLevel(0);
 72 | const model = new vosk.Model(MODEL_PATH);
 73 | let rec = new vosk.Recognizer({ model: model, sampleRate: SAMPLE_RATE });
 74 | 
 75 | 
 76 | const chatGPTAPI = new ChatGPTAPI({ apiKey: process.env.OPEN_AI_APIKEY, systemMessage })
 77 | const ttsApi = new textToSpeech.TextToSpeechClient();
 78 | let memeLoop = false;
 79 | 
 80 | function sleep(ms) {
 81 |     return new Promise(resolve => setTimeout(resolve, ms));
 82 | }
 83 | 
 84 | function triggerEvent(name, value) {
 85 |     const event = { name, value };
 86 |     const events = [event];
 87 |     const data = JSON.stringify(events);
 88 |     console.log(data);
 89 | }
 90 | 
 91 | async function synthesizeSpeech(text) {
 92 |     // Construct the request
 93 |     const request = {
 94 |         input: { text: text },
 95 |         // Select the language and SSML voice gender (optional)
 96 |         voice: { languageCode: languageMapping[lang], name: voices[lang], ssmlGender: 'MALE' },
 97 |         // select the type of audio encoding
 98 |         audioConfig: { audioEncoding: 'MP3' },
 99 |     };
100 | 
101 |     // Performs the text-to-speech request
102 |     const [response] = await ttsApi.synthesizeSpeech(request);
103 |     // Write the binary audio content to a local file
104 |     const writeFile = util.promisify(fs.writeFile);
105 |     await writeFile(`./sounds/${lang}/gpt_answer.mp3`, response.audioContent, 'binary');
106 |     await sleep(eventTimeoutMs);
107 | }
108 | 
109 | async function askChatGpt(message) {
110 |     const opts = {};
111 | 
112 |     message = `${message}. ${messagePostFix[lang]}`;
113 |     if (lastRequestId) {
114 |         opts.parentMessageId = lastRequestId
115 |     }
116 |     const response = await chatGPTAPI.sendMessage(message, opts);
117 |     lastRequestId = response.id;
118 |     return response.text;
119 | }
120 | 
121 | 
122 | async function fetchMeme() {
123 |     const memeApi = 'https://meme-api.com/gimme';
124 |     const response = await fetch(memeApi);
125 |     const result = await response.json();
126 | 
127 |     triggerEvent('meme', result.url);
128 | }
129 | 
130 | const micInstance = mic({
131 |     rate: String(SAMPLE_RATE),
132 |     channels: '1',
133 |     debug: false,
134 |     device: 'default',
135 | });
136 | 
137 | let active = false;
138 | let disabled = false;
139 | let recordingCache = '';
140 | let cacheCounter = 0;
141 | const minimumDisabledMs = 5000;
142 | const maxAttemptsRecording = 5;
143 | 
144 | const voiceRecognition = {
145 |     hotwords: {
146 |         activate: [],
147 |         activateMeme: ''
148 |     },
149 |     initHotwords: () => {
150 |         const prefixes = ['hey', 'he', 'the']
151 |         voiceRecognition.hotwords.activate = prefixes.map(prefix => {
152 |             return `${prefix} ${assistantName}`;
153 |         });
154 |         voiceRecognition.hotwords.activateMeme = memeTrigger;
155 |     },
156 |     start: () => {
157 |         voiceRecognition.initHotwords();
158 |         const micInputStream = micInstance.getAudioStream();
159 | 
160 |         micInputStream.on('data', data => {
161 |             voiceRecognition.checkHotword(data);
162 |             voiceRecognition.handleInput(data);
163 |             voiceRecognition.checkStop(data);
164 |         });
165 | 
166 |         micInputStream.on('audioProcessExitComplete', function () {
167 |             console.log(rec.finalResult());
168 |             rec.free();
169 |             model.free();
170 |         });
171 | 
172 |         process.on('SIGINT', function () {
173 |             console.log("\nStopping");
174 |             micInstance.stop();
175 |         });
176 | 
177 |         micInstance.start();
178 |     },
179 |     handleInput: async (data) => {
180 |         if (disabled) {
181 |             rec.reset();
182 |         }
183 |         if (!active || disabled) return;
184 |         const isSilent = rec.acceptWaveform(data); 4
185 | 
186 |         let isFinalAttempt = false;
187 |         let result = rec.partialResult();
188 | 
189 |         if (result.partial.includes('stop') && result.partial.length <= 10) {
190 |             triggerEvent('stop', true);
191 |             rec.reset();
192 |             active = false;
193 |             return;
194 |         }
195 | 
196 |         let inputTooShort = result.partial && result.partial.length < 6;
197 |         let notEnoughWords = result.partial.split(' ').length < 3;
198 |         if (!result.partial || inputTooShort || notEnoughWords) {
199 |             return;
200 |         }
201 |         if (result.partial === recordingCache) {
202 |             cacheCounter++;
203 |         } else {
204 |             recordingCache = result.partial;
205 |             cacheCounter = 0;
206 |         }
207 | 
208 |         if (cacheCounter > maxAttemptsRecording) {
209 |             isFinalAttempt = true;
210 |             result = rec.finalResult();
211 |         }
212 | 
213 | 
214 |         if (isSilent || isFinalAttempt) {
215 |             result = isFinalAttempt ? result : rec.result();
216 |             result = normalizeResult(result);
217 | 
218 |             if (result.text.includes('stop') && result.text.length <= 20) {
219 |                 triggerEvent('stop', true);
220 |                 rec.reset();
221 |                 active = false;
222 |                 return;
223 |             }
224 | 
225 |             
226 |             if (result.text && debug) {
227 |                 triggerEvent('voice_input_debug', result.text)
228 |             }
229 | 
230 |             let questionEvent = {
231 |                 name: 'question',
232 |                 value: result.text
233 |             };
234 | 
235 |             const gptStartEvent = {
236 |                 name: 'gpt_start',
237 |                 value: true
238 |             };
239 | 
240 |             let events = [questionEvent, gptStartEvent];
241 |             data = JSON.stringify(events);
242 |             console.log(data);
243 |             active = false;
244 | 
245 |             const answer = await askChatGpt(result.text);
246 | 
247 |             triggerEvent('answer', answer);
248 |             await synthesizeSpeech(answer);
249 |             triggerEvent('tts', true);
250 |             playSound('gpt_answer', true);
251 |         } else {
252 |             if (result.partial && debug) {
253 |                 triggerEvent('voice_input_partial', result.partial)
254 |             }
255 | 
256 |         }
257 |     },
258 |     checkHotword: async (data) => {
259 |         if (disabled) {
260 |             rec.reset();
261 |         }
262 |         if (active || disabled) return;
263 |         let result = '';
264 |         if (rec.acceptWaveform(data)) {
265 |             result = rec.result();
266 |         } else {
267 |             result = rec.partialResult();
268 |             result.text = result.partial;
269 |         }
270 | 
271 |         result = normalizeResult(result);
272 |         let match = false;
273 |         voiceRecognition.hotwords.activate.forEach(hotword => {
274 |             if (result.text.includes(hotword)) {
275 |                 match = true;
276 |             }
277 |         });
278 | 
279 |         if (result.text && debug) {
280 |             triggerEvent('voice_input_hotword', result.text);
281 |         }
282 | 
283 |         if (match) {
284 |             rec.reset();
285 |             active = true;
286 |             disabled = false;
287 |             triggerEvent('voice_input_start', true);
288 |         }
289 | 
290 |         let memeMatch = false;
291 |         if (result.text.includes(voiceRecognition.hotwords.activateMeme)) {
292 |             memeMatch = true;
293 |         }
294 | 
295 |         if (memeMatch) {
296 |             memeLoop = true;
297 |             rec.reset();
298 |             disabled = true;
299 |             triggerEvent('meme_hotword', true);
300 |             fetchMeme();
301 |             playSound('meme_hotword_answer', false);
302 |             setTimeout(() => {
303 |                 disabled = false;
304 |             }, minimumDisabledMs)
305 |         }
306 | 
307 |         let memeContinueMatch = result.text.includes(continueMatches[lang]);
308 | 
309 |         if (memeContinueMatch && memeLoop) {
310 |             disabled = true;
311 |             triggerEvent('meme', true);
312 |             fetchMeme();
313 |             setTimeout(() => {
314 |                 disabled = false;
315 |             }, 1000);
316 |         }
317 | 
318 |         let stopMatch = result.text.includes('stop');
319 | 
320 |         if (stopMatch && memeLoop) {
321 |             memeLoop = false;
322 |             rec.reset();
323 |             triggerEvent('meme_stop', true);
324 |         }
325 |     },
326 |     checkStop: async (data) => {
327 |         if (!disabled) return;
328 |         let result = '';
329 |         if (rec.acceptWaveform(data)) {
330 |             result = rec.result();
331 |         } else {
332 |             result = rec.partialResult();
333 |             result.text = result.partial;
334 |         }
335 | 
336 |         result = normalizeResult(result);
337 |         let match = false;
338 |         if (result.text.includes('stop')) {
339 |             match = true;
340 |         }
341 | 
342 |         if (match) {
343 |             rec.reset();
344 |             active = false;
345 |             disabled = false;
346 |             triggerEvent('stop', true);
347 |         }
348 |     }
349 | }
350 | 
351 | async function playSound(name) {
352 |     disabled = true;
353 |     const mp3File = `./sounds/${lang}/${name}.mp3`;
354 |     const buffer = fs.readFileSync(mp3File);
355 |     const duration = getMP3Duration(buffer);
356 |     const audic = new Audic(mp3File);
357 |     // ended event does not work correctly
358 |     // workaround is getting duration of mp3 in ms
359 |     // then when sound starts playing we set timeout 
360 |     // to trigger end event of tts
361 |     audic.addEventListener('playing', () => {
362 |         setTimeout(() => {
363 |             disabled = false;
364 |             triggerEvent('tts_end', true);
365 |         }, duration);
366 |     });
367 |     audic.play();
368 | }
369 | 
370 | function normalizeResult(result) {
371 |     if (lang === 'de') {
372 |         if (result.text.startsWith('einen')) {
373 |             result.text = result.text === 'einen' ? '' : result.text.substring(4);
374 |         }
375 | 
376 |         result.text = result.text.replace('wie kann ich helfen', '');
377 |     }
378 |     if (lang === 'en') {
379 |         if (result.text.startsWith('a ')) {
380 |             result.text = result.text.substring(3);
381 |         }
382 |         if (result.text.startsWith('please')) {
383 |             result.text = result.text === 'please' ? '' : result.text.substring(7);
384 |         }
385 |         if (result.text.startsWith('the')) {
386 |             result.text = result.text === 'the' ? '' : result.text.substring(4);
387 |         }
388 | 
389 |         result.text = result.text.replace('how can I help', '');
390 |     }
391 | 
392 |     result.text = result.text.trim();
393 | 
394 |     return result;
395 | }
396 | 
397 | console.log(JSON.stringify([{ name: 'LOG:', value: 'Vosk Speech-to-Text started!' }]));
398 | voiceRecognition.start();
399 | 


--------------------------------------------------------------------------------
/vue.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   transpileDependencies: [
 3 |     'vuetify'
 4 |   ],
 5 |   pluginOptions: {
 6 |     electronBuilder: {
 7 |       nodeIntegration: true
 8 |     }
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------