├── requirements.txt ├── README.md ├── LICENSE └── main.py /requirements.txt: -------------------------------------------------------------------------------- 1 | whisper 2 | SpeechRecognition 3 | gpt4all==2.2.1.post1 4 | pyaudio==0.2.13 5 | pyttsx3==2.90 6 | soundfile==0.12.1 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT4ALL-Voice-Assistant 2 | This is a 100% offline GPT4ALL Voice Assistant. Completely open source and privacy-friendly. Use any language model on GPT4ALL. Background process voice detection. Watch the full YouTube tutorial for the setup guide: https://youtu.be/6zAk0KHmiGw 3 | ## Setup 4 | I highly advise watching the YouTube tutorial to use this code. You will need to modify the OpenAI whisper library to work offline and I walk through that in the video as well as setting up all the other dependencies to function properly. 5 | 6 | If you're planning on installing it on Arch-based distros, you need to install `espeak` and `python-espeak` packages from the AUR. You can install them using `yay` utility by running: 7 | ```bash 8 | yay -S espeak python-espeak 9 | ``` 10 | ## Improvements to think about adding to yours 11 | Give a system prompt. These open-source models perform far better when you send a system prompt as specified in the GPT4ALL documentation: https://docs.gpt4all.io/gpt4all_python.html#introspection 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Austin Dobbins 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from os import system 2 | import speech_recognition as sr 3 | from gpt4all import GPT4All 4 | import sys 5 | import whisper 6 | import warnings 7 | import time 8 | import os 9 | 10 | wake_word = 'jarvis' 11 | 12 | model_complete_filepath='/Users/YOUR_USERNAME_HERE/Library/Application Support/nomic.ai/GPT4All/ggml-model-gpt4all-falcon-q4_0.gguf' 13 | model_path_directory, model_filename_complete = os.path.split(model_complete_filepath) 14 | model_filename, model_extension = os.path.splitext(model_filename_complete) 15 | 16 | device_for_running_LLM=input("Which device would you like to use for running the LLM?\nPlease type your selection without the quotes and press ENTER.\nSelections available: \"gpu\", \"cpu\", \"intel\", and \"amd\"\n\nYou've selected: ") 17 | 18 | model = GPT4All(model_filename, model_path=model_path_directory, allow_download=False, device=device_for_running_LLM) 19 | r = sr.Recognizer() 20 | tiny_model_path = os.path.expanduser('~/.cache/whisper/tiny.pt') 21 | base_model_path = os.path.expanduser('~/.cache/whisper/base.pt') 22 | tiny_model = whisper.load_model(tiny_model_path) 23 | base_model = whisper.load_model(base_model_path) 24 | listening_for_wake_word = True 25 | source = sr.Microphone() 26 | warnings.filterwarnings("ignore", category=UserWarning, module='whisper.transcribe', lineno=114) 27 | 28 | if sys.platform != 'darwin': 29 | import pyttsx3 30 | engine = pyttsx3.init() 31 | 32 | def speak(text): 33 | if sys.platform == 'darwin': 34 | ALLOWED_CHARS = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,?!-_$:+-/ ') 35 | clean_text = ''.join(c for c in text if c in ALLOWED_CHARS) 36 | system(f"say '{clean_text}'") 37 | else: 38 | engine.say(text) 39 | engine.runAndWait() 40 | 41 | def listen_for_wake_word(audio): 42 | global listening_for_wake_word 43 | with open('wake_detect.wav', 'wb') as f: 44 | f.write(audio.get_wav_data()) 45 | result = tiny_model.transcribe('wake_detect.wav') 46 | text_input = result['text'] 47 | if wake_word in text_input.lower().strip(): 48 | print('Wake word detected. Please speak your prompt to GPT4All.') 49 | speak('Listening') 50 | listening_for_wake_word = False 51 | 52 | def prompt_gpt(audio): 53 | global listening_for_wake_word 54 | try: 55 | with open('prompt.wav', 'wb') as f: 56 | f.write(audio.get_wav_data()) 57 | result = base_model.transcribe('prompt.wav') 58 | prompt_text = result['text'] 59 | if len(prompt_text.strip()) == 0: 60 | print('Empty prompt. Please speak again.') 61 | speak('Empty prompt. Please speak again.') 62 | listening_for_wake_word = True 63 | else: 64 | print('User: ' + prompt_text) 65 | output = model.generate(prompt_text, max_tokens=200) 66 | print('GPT4All: ', output) 67 | speak(output) 68 | print('\nSay', wake_word, 'to wake me up. \n') 69 | listening_for_wake_word = True 70 | except Exception as e: 71 | print('Prompt error: ', e) 72 | 73 | def callback(recognizer, audio): 74 | global listening_for_wake_word 75 | if listening_for_wake_word: 76 | listen_for_wake_word(audio) 77 | else: 78 | prompt_gpt(audio) 79 | 80 | def start_listening(): 81 | with source as s: 82 | r.adjust_for_ambient_noise(s, duration=2) 83 | print('\nSay', wake_word, 'to wake me up. \n') 84 | r.listen_in_background(source, callback) 85 | while True: 86 | time.sleep(1) 87 | 88 | if __name__ == '__main__': 89 | start_listening() 90 | --------------------------------------------------------------------------------