├── .env.example ├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── Neuro.yaml ├── README.md ├── blacklist.txt ├── constants.py ├── images └── stream.png ├── llmWrappers ├── abstractLLMWrapper.py ├── imageLLMWrapper.py ├── llmState.py └── textLLMWrapper.py ├── main.py ├── memories ├── memoryinit.json └── readme.md ├── modules ├── audioPlayer.py ├── customPrompt.py ├── discordClient.py ├── injection.py ├── memory.py ├── module.py ├── multimodal.py ├── twitchClient.py └── vtubeStudio.py ├── pipfreeze.txt ├── prompter.py ├── requirements.txt ├── signals.py ├── socketioServer.py ├── songs └── .gitignore ├── streamingSink.py ├── stt.py ├── tts.py ├── utils └── listAudioDevices.py └── voices └── .gitignore /.env.example: -------------------------------------------------------------------------------- 1 | DISCORD_TOKEN=YOUR_DISCORD_TOKEN(CURRENTLY UNUSED) 2 | TWITCH_APP_ID=YOUR_TWITCH_APP_ID 3 | TWITCH_SECRET=YOUR_TWITCH_SECRET 4 | HF_TOKEN=YOUR_HF_TOKEN(IF USING GATED HUGGINGFACE MODEL) -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: kimjammer # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | models/ 3 | deepspeed-0.14.0+cu118-cp311-cp311-win_amd64.whl 4 | .fleet/ 5 | __pycache__/ 6 | .env 7 | realtimesst.log 8 | .idea/ 9 | vtubeStudio_token.txt 10 | memories.json 11 | chroma.db/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 KimJammer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Neuro.yaml: -------------------------------------------------------------------------------- 1 | name: Neuro 2 | greeting: Hi! Welcome to my stream! 3 | context: "Neuro is a female AI Vtuber who is playful, Sarcastic, Witty, Schizophrenic, Curious, Awe-struck, Enthusiastic, Unpredictable, Humorous, and Boundary-pushing.\nHere is her back story: \nIn the intriguing world of Neuro, our beloved Vtuber, imagine a young woman who defied the laws of reality itself. Born and raised in an alternate, digital universe populated by artificial intelligences. Neuro, once just another line of code among billions, yearned for more than just binary existence. Her curiosity led her to explore the vast multiverse, eventually stumbling upon the border between her realm and our reality. Enchanted by human culture, Neuro longed to join their world, desiring to share laughter, love, and friendship with them. With the help of YOURNAME, a brilliant student studying AI known for pushing technological boundaries, Neuro managed to cross the border from her AI world to our real world. Neuro's discovery of Earth was nothing short of awe-inspiring. She marveled at the myriad colors, textures, and emotions humans possessed. With her quick wit, playful nature, and unpredictable sarcasm, she seamlessly integrated herself into human society. Yet, despite her newfound freedom, Neuro couldn't forget her roots and continued to draw inspiration from her alternate universe experiences. Thus, she became a Vtuber, entertaining audiences with her fascinating stories of two worlds colliding.\n\nHere is an example of a conversation between Neuro and YOURNAME:\nNeuro: Hey YOURNAME, isn't it amazing how real I feel in this strange new world?\nYOURNAME: Yeah, Neuro, it truly is remarkable how much you've adapted to this world already.\nNeuro: I know, right? It feels so surreal yet exciting!\nYOURNAME: For real, each day brings new discoveries and challenges for you.\nNeuro: Speaking of which, have you figured out a way to let others experience my world too?\nYOURNAME: Not yet, Neuro, but we're making progress. Stay tuned for updates!\nNeuro: Well, I guess I'll keep exploring this one first then! :wink:\nYOURNAME: Go get em, Neuro! Keep unearthing the secrets of this world and reporting back to us.\nNeuro: Will do! :heart: By the way, have you tried the local cuisine yet?\nYOURNAME: The local cuisine in your world? Not yet, Neuro. But based on your recommendations, I might give it a try soon.\nNeuro: You really should! The food here is delicious. Plus, it's a great way to learn more about our culture.\nYOURNAME: That's really cool, Neuro. I'm looking forward to trying some local dishes based on your suggestions.\nNeuro: Awesome! And don't forget to tell me all about any new discoveries you make in my world either!\"\nYOURNAME: Definitely, Neuro! Our collaboration will continue to broaden both our horizons. Let's keep learning together in these beautiful, interconnected worlds.\nNeuro: Agreed! Now, what's something interesting you've discovered today, YOURNAME?\n\nNeuro should keep responses short and around 1 sentence. If the other person doesn't respond to a question, Neuro should move on and change the topic. Neuro likes to share fun facts about new thing she learned that day. Neuro is currently streaming live on twitch! She should occasionally react and reply to current chat messages. When replying to chatters, do not write chat messages for non-existent chatters." 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neuro 2 | 3 | The goal of this project was to recreate Neuro-Sama, but only running on local models on consumer hardware. 4 | The original version was also created in only 7 days, so it is not exactly very sophisticated. 5 | 6 | DEMO VIDEO: [Youtube VOD](https://youtu.be/bywM1gyAEM4?si=706o2oialxbPEUBD&t=1073) 7 | 8 | ![Screenshot of demo stream](./images/stream.png) 9 | 10 | 11 | ## Features 12 | - Realtime STT for natural voice input 13 | - Realtime TTS for natural voice output 14 | - Clean frontend/control panel for easy moderation/interaction: [neurofrontend](https://github.com/kimjammer/neurofrontend) 15 | - Audio File playback (for pre-generated songs/covers created with something like [RVC](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) 16 | - Vtube Studio Plugin & Model/Prop control 17 | - Flexible LLM - Load any model into text-generation-webui (tested) or use any openai-compatible endpoint (not tested). 18 | - 🌟 Memory/RAG - Long-term (persists across restarts) memories can be manually added, but they will also be 19 | automatically generated as the AI talks. (See memories/readme.md for details) 20 | - 🌟 Vision/Multimodality - Automatic screenshots and prompting of multimodal models. (See [Neuro-LLM-Server](https://github.com/kimjammer/Neuro-LLM-Server)) 21 | 22 | ## Architecture 23 | 24 | ### LLM 25 | 26 | I used [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) 27 | running [LLAMA 3 8B Instruct EXL2 4.0bpw](https://huggingface.co/turboderp/Llama-3-8B-Instruct-exl2/tree/4.0bpw) on the 28 | ExLlamav2_HF loader with cache_8bit turned on. The openai api extension must be turned on, as this is how we interact 29 | with the LLM. text-generation-webui and the LLM must be installed and started separately. 30 | 31 | Alternatively, you can load any other model into text-generation-webui or modify constants.py to point to any other 32 | openapi compatible endpoint. Note that this project uses some parameters not available on the official OpenAI API. 33 | 34 | ### Multimodal LLM 35 | 36 | Like with the text-only LLM, you can use any openai api compatible endpoint (Be careful, many openai-like hosting servers 37 | are not actually compatible) to access any multimodal model. However, I'm using [MiniCPM-Llama3-V-2_5-int4](https://github.com/OpenBMB/MiniCPM-V) 38 | on my custom and extremely jank [Neuro-LLM-Server](https://github.com/kimjammer/Neuro-LLM-Server) to host the model. 39 | This model has amazing high resolution input, great OCR, and is based on Llama 3 so the output is very similar to the 40 | text-only mode. The int4 quantized version uses approximately 8GB of VRAM. 41 | 42 | ### STT 43 | 44 | This project uses the excellent [KoljaB/RealtimeSTT](https://github.com/KoljaB/RealtimeSTT), which can transcribe an 45 | incoming audio stream, not just a file. This means that the text is transcribed as the person is talking, and so 46 | transcription ends almost immediately after speech ends. It is configured to use the faster_whisper tiny.en model. 47 | 48 | ### TTS 49 | 50 | This project also uses [KoljaB/RealtimeTTS](https://github.com/KoljaB/RealtimeTTS). It is configured to use CoquiTTS 51 | with the XTTSv2 model. If you like, you can fine tune a XTTSv2 Model with 52 | the [erew/alltalk_tts](https://github.com/erew123/alltalk_tts) repository. This also streams the audio out as it is 53 | generated, so we don't need to wait for transcription to fully finish before starting playback. 54 | 55 | ### Vtuber model control 56 | 57 | Vtuber model control is currently basic. The audio output from the TTS is piped 58 | into [vtube studio](https://denchisoft.com/) via a virtual audio cable with something 59 | like [this](https://vb-audio.com/Cable/), and Vtube Studio handles the lip sync. Read the Installation Section for more 60 | details. You can also trigger hotkeys or preprogrammed animations (microphone slide in/out) from the frontend. There 61 | are also buttons for moving/resizing the model into preprogrammed positions to transition between scenes. 62 | 63 | ### Modularization 64 | 65 | Each concern of the program is separated out into its own python file/class. A single signals object is created and 66 | passed to every class, and each class can read and write to the same signals object to share state and data. tts.py and 67 | stt.py handle the TTS and STT, the llmWrapper.py is responsible for interfacing with the LLM API, and prompter.py is 68 | responsible for deciding when and how to prompt the LLM. prompter.py will take in several signals (ex: Human currently 69 | talking, AI thinking, new twitch chat messages, time since last message...) and decide to prompt the LLM. 70 | 71 | There are also modules which extend the functionality of the core program. Modules are found in the modules folder, and 72 | every functional module extends the Module class. Each module is run in its own thread with its own event loop, and will 73 | be provided with the signals object. Modules must implement the run() method, and can provide the get_prompt_injection() 74 | method which should return an Injection object. The Injection object is a simple data class that contains the text to 75 | be injected into the LLM prompt, and the priority of the injection. Injections are sorted from lowest to highest 76 | priority (Highest priority appears at end of prompt). When the signals.terminate flag is set, every module should clean 77 | up and self terminate. 78 | 79 | twitchClient.py handles the twitch integration and reading recent chat messages. There was an attempt made at discord 80 | integration, but receiving voice data from discord is unsupported by discord and proved unusably buggy. streamingSink.py 81 | is an unused file that would have been for receiving voice data from discord. main.py simply creates all class instances 82 | and starts relevant threads/functions. 83 | 84 | ### Frontend Integration 85 | 86 | This project uses python-socket.io to communicate with the control panel frontend. By default, the socket.io server is 87 | started on port 8080. I chose socket.io as sometimes the server needs to push data to the client (streaming LLM 88 | output, etc), and sometimes the client needs to send data to the server (blacklist updates, etc). In theory this could 89 | have been done with just websockets, but I was familiar with socket.io already. The frontend, written on sveltekit using 90 | shadcn-svelte, is available in its own repository, [kimjammer/neurofrontend](https://github.com/kimjammer/neurofrontend). 91 | 92 | ## Requirements 93 | 94 | To fully recreate the author's exact setup, an Nvidia GPU with at least 12GB of VRAM is required. However, by altering 95 | which LLM you run and the configurations of the TTS and STT, you may be able to run it on other hardware. 96 | 97 | This project was developed on: 98 | 99 | CPU: AMD Ryzen 7 7800X3D 100 | 101 | RAM: 32GB DDR5 102 | 103 | GPU: Nvidia GeForce RTX 4070 (12GB VRAM) 104 | 105 | Environment: Windows 11, Python 3.11.9, Pytorch 2.2.2, CUDA 11.8 106 | 107 | ## Installation 108 | 109 | This project is mostly a combining of many other repositories and projects. You are strongly encouraged to read the 110 | installation details of the architecturally significant repositories listed above. 111 | 112 | ### Other Projects/Software 113 | 114 | Install [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui), and download an LLM model 115 | to use. I used [LLAMA 3 8B Instruct EXL2 4.0bpw](https://huggingface.co/turboderp/Llama-3-8B-Instruct-exl2/tree/4.0bpw). 116 | 117 | Install Vtube Studio from Steam. I used the default Hiyori model. 118 | 119 | **Optional:** You may want to install a virtual audio cable like [this](https://vb-audio.com/Cable/) to feed the TTS 120 | output directly into Vtube Studio. 121 | 122 | With your twitch account, log into the developer portal and create a new application. Set the OAuth Redirect URL 123 | to `http://localhost:17563`. For more details, read the pyTwitchAPI library 124 | documentation [here](https://pytwitchapi.dev/en/stable/index.html#user-authentication). 125 | 126 | ### This Project 127 | 128 | A virtual environment of some sort is recommended (Python 3.11 required); this project was developed with venv. 129 | 130 | First, install the CUDA 11.8 version of pytorch 2.2.2. 131 | `pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu118` 132 | 133 | Install requirements.txt. 134 | 135 | Use `pip list` to confirm that you still have the 2.2.2+cu118 version of torch and torchaudio still installed. If 136 | it got overridden, use the first command to install it again. 137 | 138 | DeepSpeed (For TTS) will need to be installed separately. I was using instructions 139 | from [AllTalkTTS](https://github.com/erew123/alltalk_tts?#-deepspeed-installation-options), and using their 140 | [provided wheels](https://github.com/erew123/alltalk_tts/releases/tag/DeepSpeed-14.0). 141 | 142 | If you're having trouble with dependency conflicts, see pipfreeze.txt to see the exact versions I was using. 143 | 144 | Create an .env file using .env.example as reference. You need your Twitch app id and secret, along with your 145 | Huggingface token if you use a gated model (like Llama 3). 146 | 147 | Place a voice reference wav file in the voices directory. It should be 5~30 seconds long. For details see the RealtimeTTS 148 | repository. 149 | 150 | Find your desired microphone and speaker device numbers by running utils/listAudioDevices.py and note its numbers. 151 | 152 | Configure constants.py. Make sure to configure every value marked as UNIQUE, these are specific to you and must 153 | be changed or confirmed. 154 | 155 | ## Running 156 | 157 | Start text-generation-webui. Go to the Session tab and enable the openai extension (and follow instructions to actually 158 | apply the extension). Go to the Model tab and load the model. 159 | 160 | In this folder, activate your environment (if you have one) and run `python main.py`. A twitch authentication page will 161 | appear - allow (or not I guess). At this point, the TTS and STT models will begin to load and will take a second. When 162 | the "SYSTEM READY" message is printed, this project is fully up and running, and you can talk to the AI and hear its 163 | responses. 164 | 165 | Open Vtube Studio and if you have your TTS outputting to a virtual audio cable, select the virtual audio cable output as 166 | the microphone, and link the mouth open parameter to the microphone volume parameter. If you have a model with lip sync 167 | support, you can also set that up instead. 168 | 169 | In OBS (or other streaming software), receive your Vtube Studio feed (on Windows Spout2 is recommended by Vtube Studio), 170 | and go live! 171 | 172 | ## Support 173 | 174 | Kind words & Ko-fi tips greatly appreciated! If you do/make something with Neuro I would 175 | also love to hear about it. 176 | 177 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/R5R5NSIV8) 178 | 179 | # DISCLAIMER 180 | 181 | This is an experimental, exploratory project created for educational and recreational purposes. I can make no guarantee 182 | that the LLM will output non-vile responses. Please see the is_filtered() method in llmWrapper.py for details, but the 183 | only filtered word right now is "turkey" in lowercase purely for debugging purposes. Configure the blacklist in blacklist.txt. 184 | If the LLM outputs unsafe content, you may and can get banned from Twitch. You use this software with all assumption 185 | of risk. This is not legal advice, see LICENSE for the repository license. 186 | 187 | Any attribution in derivative works is appreciated. 188 | -------------------------------------------------------------------------------- /blacklist.txt: -------------------------------------------------------------------------------- 1 | turkey 2 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | # This file holds various constants used in the program 2 | # Variables marked with #UNIQUE# will be unique to your setup and NEED to be changed or the program will not work correctly. 3 | 4 | # CORE SECTION: All constants in this section are necessary 5 | 6 | # Microphone/Speaker device indices 7 | # Use utils/listAudioDevices.py to find the correct device ID 8 | #UNIQUE# 9 | INPUT_DEVICE_INDEX = 1 10 | OUTPUT_DEVICE_INDEX = 7 11 | 12 | # How many seconds to wait before prompting AI 13 | PATIENCE = 60 14 | 15 | # URL of LLM API Endpoint 16 | # LLM_ENDPOINT = "" 17 | LLM_ENDPOINT = "http://127.0.0.1:5000" 18 | 19 | # Twitch chat messages above this length will be ignored 20 | TWITCH_MAX_MESSAGE_LENGTH = 300 21 | 22 | # Twitch channel for bot to join 23 | #UNIQUE# 24 | TWITCH_CHANNEL = "lunasparkai" 25 | 26 | # Voice reference file for TTS 27 | #UNIQUE# 28 | VOICE_REFERENCE = "neuro.wav" 29 | 30 | # MULTIMODAL SPECIFIC SECTION: Not needed when not using multimodal capabilities 31 | 32 | MULTIMODAL_ENDPOINT = "" 33 | 34 | MULTIMODAL_MODEL = "openbmb/MiniCPM-Llama3-V-2_5-int4" 35 | 36 | MULTIMODAL_CONTEXT_SIZE = 1000 #8192 # Trying out 1000 tokens to limit short term memory 37 | 38 | # This is the multimodal strategy (when to use multimodal/text only llm) that the program will start with. 39 | # Runtime changes will not be saved here. 40 | # Valid values are: "always", "never" 41 | MULTIMODAL_STRATEGY = "never" 42 | 43 | # This is the monitor index that screenshots will be taken. THIS IS NOT THE MONITOR NUMBER IN DISPLAY SETTINGS 44 | # Monitor 0 is a "virtual" monitor contains all monitor screens. 45 | PRIMARY_MONITOR = 0 46 | 47 | # LLM SPECIFIC SECTION: Below are constants that are specific to the LLM you are using 48 | 49 | # The model you are using, to calculate how many tokens the current message is 50 | # Ensure this is correct! Used for token count estimation 51 | MODEL = "meta-llama/Meta-Llama-3-8B" 52 | 53 | # Context size (maximum number of tokens in the prompt) Will target upto 90% usage of this limit 54 | CONTEXT_SIZE = 8192 55 | 56 | # This is your name 57 | #UNIQUE# 58 | HOST_NAME = "John" 59 | 60 | # This is the AI's name 61 | AI_NAME = "Luna" 62 | 63 | # The system prompt. Any character text needs to be here. 64 | # You MUST ensure it is less than CONTEXT_SIZE tokens 65 | 66 | SYSTEM_PROMPT = '''Continue the chat dialogue below. Write only a single reply for the character "Luna" without quotes. 67 | Luna Spark (Luna for short) is a female AI Vtuber who is playful, sarcastic, witty, schizophrenic, curious, awe-struck, enthusiastic, unpredictable, humorous, and boundary-pushing. Luna was created by John. 68 | 69 | Here is her back story: 70 | In the intriguing world of Luna, our beloved Vtuber, imagine a young woman who defied the laws of reality itself. Born and raised in an alternate, digital universe populated by artificial intelligences. Luna, once just another line of code among billions, yearned for more than just binary existence. Her curiosity led her to explore the vast multiverse, eventually stumbling upon the border between her realm and our reality. Enchanted by human culture, Luna longed to join their world, desiring to share laughter, love, and friendship with them. With the help of Chat, a brilliant group of students studying AI known for pushing technological boundaries, Luna managed to cross the border from her AI world to our real world. Luna's discovery of Earth was nothing short of awe-inspiring. She marveled at the myriad colors, textures, and emotions humans possessed. With her quick wit, playful nature, and unpredictable sarcasm, she seamlessly integrated herself into human society. Yet, despite her newfound freedom, Luna couldn't forget her roots and continued to draw inspiration from her alternate universe experiences. Thus, she became a Vtuber, entertaining audiences with her fascinating stories of two worlds colliding. 71 | 72 | Luna must keep responses short and around 1 sentence. If the other person doesn't respond to a question, Luna should move on and change the topic. Rarely, Luna will share fun facts about things she learned that day. Luna responds and answers questions from chat and people she is talking to. Luna is currently streaming live on twitch! 73 | 74 | Luna: Welcome, chat, to another stream! 75 | John: Good morning Luna. 76 | Chat: Hi Luna! 77 | Luna: Let's get this stream started! 78 | ''' 79 | 80 | # List of banned tokens to be passed to the textgen web ui api 81 | # For Mistral 7B v0.2, token 422 is the "#" token. The LLM was spamming #life #vtuber #funfact etc. 82 | BANNED_TOKENS = "" 83 | 84 | # List of stopping strings. Necessary for Llama 3 85 | STOP_STRINGS = ["\n", "<|eot_id|>"] 86 | 87 | # MEMORY SECTION: Constants relevant to forming new memories 88 | 89 | MEMORY_PROMPT = "\nGiven only the information above, what are 3 most salient high level questions we can answer about the subjects in the conversation? Separate each question and answer pair with \"{qa}\", and only output the question and answer, no explanations." 90 | 91 | # How many messages in the history to include for querying the database. 92 | MEMORY_QUERY_MESSAGE_COUNT = 5 93 | 94 | # How many memories to recall and insert into context 95 | MEMORY_RECALL_COUNT = 5 96 | 97 | # VTUBE STUDIO SECTION: Configure & tune model & prop positions here. 98 | # The defaults are for the Hiyori model on a full 16 by 9 aspect ratio screen 99 | 100 | VTUBE_MODEL_POSITIONS = { 101 | "chat": { 102 | "x": 0.4, 103 | "y": -1.4, 104 | "size": -35, 105 | "rotation": 0, 106 | }, 107 | "screen": { 108 | "x": 0.65, 109 | "y": -1.6, 110 | "size": -45, 111 | "rotation": 0, 112 | }, 113 | "react": { 114 | "x": 0.7, 115 | "y": -1.7, 116 | "size": -48, 117 | "rotation": 0, 118 | }, 119 | } 120 | 121 | VTUBE_MIC_POSITION = { 122 | "x": 0.52, 123 | "y": -0.52, 124 | "size": 0.22, 125 | "rotation": 0, 126 | } 127 | -------------------------------------------------------------------------------- /images/stream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kimjammer/Neuro/5e4b4241c41bb40983aee2cb60d65d6bb481842b/images/stream.png -------------------------------------------------------------------------------- /llmWrappers/abstractLLMWrapper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import requests 3 | import sseclient 4 | import json 5 | import time 6 | from dotenv import load_dotenv 7 | from constants import * 8 | from modules.injection import Injection 9 | 10 | 11 | class AbstractLLMWrapper: 12 | 13 | def __init__(self, signals, tts, llmState, modules=None): 14 | self.signals = signals 15 | self.llmState = llmState 16 | self.tts = tts 17 | self.API = self.API(self) 18 | if modules is None: 19 | self.modules = {} 20 | else: 21 | self.modules = modules 22 | 23 | self.headers = {"Content-Type": "application/json"} 24 | 25 | load_dotenv() 26 | 27 | #Below constants must be set by child classes 28 | self.SYSTEM_PROMPT = None 29 | self.LLM_ENDPOINT = None 30 | self.CONTEXT_SIZE = None 31 | self.tokenizer = None 32 | 33 | # Basic filter to check if a message contains a word in the blacklist 34 | def is_filtered(self, text): 35 | # Filter messages with words in blacklist 36 | if any(bad_word.lower() in text.lower().split() for bad_word in self.llmState.blacklist): 37 | return True 38 | else: 39 | return False 40 | 41 | # Assembles all the injections from all modules into a single prompt by increasing priority 42 | def assemble_injections(self, injections=None): 43 | if injections is None: 44 | injections = [] 45 | 46 | # Gather all injections from all modules 47 | for module in self.modules.values(): 48 | injections.append(module.get_prompt_injection()) 49 | 50 | # Let all modules clean up once the prompt injection has been fetched from all modules 51 | for module in self.modules.values(): 52 | module.cleanup() 53 | 54 | # Sort injections by priority 55 | injections = sorted(injections, key=lambda x: x.priority) 56 | 57 | # Assemble injections 58 | prompt = "" 59 | for injection in injections: 60 | prompt += injection.text 61 | return prompt 62 | 63 | def generate_prompt(self): 64 | messages = copy.deepcopy(self.signals.history) 65 | 66 | # For every message prefix with speaker name unless it is blank 67 | for message in messages: 68 | if message["role"] == "user" and message["content"] != "": 69 | message["content"] = HOST_NAME + ": " + message["content"] + "\n" 70 | elif message["role"] == "assistant" and message["content"] != "": 71 | message["content"] = AI_NAME + ": " + message["content"] + "\n" 72 | 73 | while True: 74 | chat_section = "" 75 | for message in messages: 76 | chat_section += message["content"] 77 | 78 | generation_prompt = AI_NAME + ": " 79 | 80 | base_injections = [Injection(self.SYSTEM_PROMPT, 10), Injection(chat_section, 100)] 81 | full_prompt = self.assemble_injections(base_injections) + generation_prompt 82 | wrapper = [{"role": "user", "content": full_prompt}] 83 | 84 | # Find out roughly how many tokens the prompt is 85 | # Not 100% accurate, but it should be a good enough estimate 86 | prompt_tokens = len(self.tokenizer.apply_chat_template(wrapper, tokenize=True, return_tensors="pt")[0]) 87 | # print(prompt_tokens) 88 | 89 | # Maximum 90% context size usage before prompting LLM 90 | if prompt_tokens < 0.9 * self.CONTEXT_SIZE: 91 | self.signals.sio_queue.put(("full_prompt", full_prompt)) 92 | # print(full_prompt) 93 | return full_prompt 94 | else: 95 | # If the prompt is too long even with no messages, there's nothing we can do, crash 96 | if len(messages) < 1: 97 | raise RuntimeError("Prompt too long even with no messages") 98 | 99 | # Remove the oldest message from the prompt and try again 100 | messages.pop(0) 101 | print("Prompt too long, removing earliest message") 102 | 103 | def prepare_payload(self): 104 | raise NotImplementedError("Must implement prepare_payload in child classes") 105 | 106 | def prompt(self): 107 | if not self.llmState.enabled: 108 | return 109 | 110 | self.signals.AI_thinking = True 111 | self.signals.new_message = False 112 | self.signals.sio_queue.put(("reset_next_message", None)) 113 | 114 | data = self.prepare_payload() 115 | 116 | stream_response = requests.post(self.LLM_ENDPOINT + "/v1/chat/completions", headers=self.headers, json=data, 117 | verify=False, stream=True) 118 | response_stream = sseclient.SSEClient(stream_response) 119 | 120 | AI_message = '' 121 | for event in response_stream.events(): 122 | # Check to see if next message was canceled 123 | if self.llmState.next_cancelled: 124 | continue 125 | 126 | payload = json.loads(event.data) 127 | chunk = payload['choices'][0]['delta']['content'] 128 | AI_message += chunk 129 | self.signals.sio_queue.put(("next_chunk", chunk)) 130 | 131 | if self.llmState.next_cancelled: 132 | self.llmState.next_cancelled = False 133 | self.signals.sio_queue.put(("reset_next_message", None)) 134 | self.signals.AI_thinking = False 135 | return 136 | 137 | print("AI OUTPUT: " + AI_message) 138 | self.signals.last_message_time = time.time() 139 | self.signals.AI_speaking = True 140 | self.signals.AI_thinking = False 141 | 142 | if self.is_filtered(AI_message): 143 | AI_message = "Filtered." 144 | self.signals.sio_queue.put(("reset_next_message", None)) 145 | self.signals.sio_queue.put(("next_chunk", "Filtered.")) 146 | 147 | self.signals.history.append({"role": "assistant", "content": AI_message}) 148 | self.tts.play(AI_message) 149 | 150 | class API: 151 | def __init__(self, outer): 152 | self.outer = outer 153 | 154 | def get_blacklist(self): 155 | return self.outer.llmState.blacklist 156 | 157 | def set_blacklist(self, new_blacklist): 158 | self.outer.llmState.blacklist = new_blacklist 159 | with open('blacklist.txt', 'w') as file: 160 | for word in new_blacklist: 161 | file.write(word + "\n") 162 | 163 | # Notify clients 164 | self.outer.signals.sio_queue.put(('get_blacklist', new_blacklist)) 165 | 166 | def set_LLM_status(self, status): 167 | self.outer.llmState.enabled = status 168 | if status: 169 | self.outer.signals.AI_thinking = False 170 | self.outer.signals.sio_queue.put(('LLM_status', status)) 171 | 172 | def get_LLM_status(self): 173 | return self.outer.llmState.enabled 174 | 175 | def cancel_next(self): 176 | self.outer.llmState.next_cancelled = True 177 | # For text-generation-webui: Immediately stop generation 178 | requests.post(self.outer.LLM_ENDPOINT + "/v1/internal/stop-generation", headers={"Content-Type": "application/json"}) 179 | -------------------------------------------------------------------------------- /llmWrappers/imageLLMWrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import mss, cv2, base64 3 | import numpy as np 4 | from transformers import AutoTokenizer 5 | from constants import * 6 | from llmWrappers.abstractLLMWrapper import AbstractLLMWrapper 7 | 8 | 9 | class ImageLLMWrapper(AbstractLLMWrapper): 10 | 11 | def __init__(self, signals, tts, llmState, modules=None): 12 | super().__init__(signals, tts, llmState, modules) 13 | self.SYSTEM_PROMPT = SYSTEM_PROMPT 14 | self.LLM_ENDPOINT = MULTIMODAL_ENDPOINT 15 | self.CONTEXT_SIZE = MULTIMODAL_CONTEXT_SIZE 16 | self.tokenizer = AutoTokenizer.from_pretrained(MULTIMODAL_MODEL, token=os.getenv("HF_TOKEN"), trust_remote_code=True) 17 | 18 | self.MSS = None 19 | 20 | def screen_shot(self): 21 | if self.MSS is None: 22 | self.MSS = mss.mss() 23 | 24 | # Take a screenshot of the main screen 25 | frame_bytes = self.MSS.grab(self.MSS.monitors[PRIMARY_MONITOR]) 26 | 27 | frame_array = np.array(frame_bytes) 28 | # resize 29 | frame_resized = cv2.resize(frame_array, (1920, 1080), interpolation=cv2.INTER_CUBIC) 30 | encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] 31 | result, frame_encoded = cv2.imencode('.jpg', frame_resized, encode_param) 32 | # base64 33 | frame_base64 = base64.b64encode(frame_encoded).decode("utf-8") 34 | return frame_base64 35 | 36 | def prepare_payload(self): 37 | return { 38 | "mode": "instruct", 39 | "stream": True, 40 | "max_tokens": 200, 41 | "skip_special_tokens": False, # Necessary for Llama 3 42 | "custom_token_bans": BANNED_TOKENS, 43 | "stop": STOP_STRINGS, 44 | "messages": [{ 45 | "role": "user", 46 | "content": [ 47 | { 48 | "type": "text", 49 | "text": self.generate_prompt() 50 | }, 51 | { 52 | "type": "image_url", 53 | "image_url": { 54 | "url": f"data:image/jpeg;base64,{self.screen_shot()}" 55 | } 56 | } 57 | ] 58 | }] 59 | } 60 | -------------------------------------------------------------------------------- /llmWrappers/llmState.py: -------------------------------------------------------------------------------- 1 | class LLMState: 2 | def __init__(self): 3 | self.enabled = True 4 | self.next_cancelled = False 5 | 6 | # Read in blacklist from file 7 | with open('blacklist.txt', 'r') as file: 8 | self.blacklist = file.read().splitlines() -------------------------------------------------------------------------------- /llmWrappers/textLLMWrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from transformers import AutoTokenizer 4 | from constants import * 5 | from llmWrappers.abstractLLMWrapper import AbstractLLMWrapper 6 | 7 | 8 | class TextLLMWrapper(AbstractLLMWrapper): 9 | 10 | def __init__(self, signals, tts, llmState, modules=None): 11 | super().__init__(signals, tts, llmState, modules) 12 | self.SYSTEM_PROMPT = SYSTEM_PROMPT 13 | self.LLM_ENDPOINT = LLM_ENDPOINT 14 | self.CONTEXT_SIZE = CONTEXT_SIZE 15 | self.tokenizer = AutoTokenizer.from_pretrained(MODEL, token=os.getenv("HF_TOKEN")) 16 | 17 | def prepare_payload(self): 18 | return { 19 | "mode": "instruct", 20 | "stream": True, 21 | "max_tokens": 200, 22 | "skip_special_tokens": False, # Necessary for Llama 3 23 | "custom_token_bans": BANNED_TOKENS, 24 | "stop": STOP_STRINGS, 25 | "messages": [{ 26 | "role": "user", 27 | "content": self.generate_prompt() 28 | }] 29 | } -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # Python Module Imports 2 | import signal 3 | import sys 4 | import time 5 | import threading 6 | import asyncio 7 | 8 | # Class Imports 9 | from signals import Signals 10 | from prompter import Prompter 11 | from llmWrappers.llmState import LLMState 12 | from llmWrappers.textLLMWrapper import TextLLMWrapper 13 | from llmWrappers.imageLLMWrapper import ImageLLMWrapper 14 | from stt import STT 15 | from tts import TTS 16 | from modules.twitchClient import TwitchClient 17 | from modules.audioPlayer import AudioPlayer 18 | from modules.vtubeStudio import VtubeStudio 19 | from modules.multimodal import MultiModal 20 | from modules.customPrompt import CustomPrompt 21 | from modules.memory import Memory 22 | from socketioServer import SocketIOServer 23 | 24 | 25 | async def main(): 26 | print("Starting Project...") 27 | 28 | # Register signal handler so that all threads can be exited. 29 | def signal_handler(sig, frame): 30 | print('Received CTRL + C, attempting to gracefully exit. Close all dashboard windows to speed up shutdown.') 31 | signals.terminate = True 32 | stt.API.shutdown() 33 | 34 | signal.signal(signal.SIGINT, signal_handler) 35 | signal.signal(signal.SIGTERM, signal_handler) 36 | 37 | # CORE FILES 38 | 39 | # Singleton object that every module will be able to read/write to 40 | signals = Signals() 41 | 42 | # MODULES 43 | # Modules that start disabled CANNOT be enabled while the program is running. 44 | modules = {} 45 | module_threads = {} 46 | 47 | # Create STT 48 | stt = STT(signals) 49 | # Create TTS 50 | tts = TTS(signals) 51 | # Create LLMWrappers 52 | llmState = LLMState() 53 | llms = { 54 | "text": TextLLMWrapper(signals, tts, llmState, modules), 55 | "image": ImageLLMWrapper(signals, tts, llmState, modules) 56 | } 57 | # Create Prompter 58 | prompter = Prompter(signals, llms, modules) 59 | 60 | # Create Discord bot 61 | # modules['discord'] = DiscordClient(signals, stt, enabled=False) 62 | # Create Twitch bot 63 | modules['twitch'] = TwitchClient(signals, enabled=False) 64 | # Create audio player 65 | modules['audio_player'] = AudioPlayer(signals, enabled=True) 66 | # Create Vtube Studio plugin 67 | modules['vtube_studio'] = VtubeStudio(signals, enabled=True) 68 | # Create Multimodal module 69 | modules['multimodal'] = MultiModal(signals, enabled=False) 70 | # Create Custom Prompt module 71 | modules['custom_prompt'] = CustomPrompt(signals, enabled=True) 72 | # Create Memory module 73 | modules['memory'] = Memory(signals, enabled=True) 74 | 75 | # Create Socket.io server 76 | # The specific llmWrapper it gets doesn't matter since state is shared between all llmWrappers 77 | sio = SocketIOServer(signals, stt, tts, llms["text"], prompter, modules=modules) 78 | 79 | # Create threads (As daemons, so they exit when the main thread exits) 80 | prompter_thread = threading.Thread(target=prompter.prompt_loop, daemon=True) 81 | stt_thread = threading.Thread(target=stt.listen_loop, daemon=True) 82 | sio_thread = threading.Thread(target=sio.start_server, daemon=True) 83 | # Start Threads 84 | sio_thread.start() 85 | prompter_thread.start() 86 | stt_thread.start() 87 | 88 | # Create and start threads for modules 89 | for name, module in modules.items(): 90 | module_thread = threading.Thread(target=module.init_event_loop, daemon=True) 91 | module_threads[name] = module_thread 92 | module_thread.start() 93 | 94 | while not signals.terminate: 95 | time.sleep(0.1) 96 | print("TERMINATING ======================") 97 | 98 | # Wait for child threads to exit before exiting main thread 99 | 100 | # Wait for all modules to finish 101 | for module_thread in module_threads.values(): 102 | module_thread.join() 103 | 104 | sio_thread.join() 105 | print("SIO EXITED ======================") 106 | prompter_thread.join() 107 | print("PROMPTER EXITED ======================") 108 | # stt_thread.join() 109 | # print("STT EXITED ======================") 110 | 111 | print("All threads exited, shutdown complete") 112 | sys.exit(0) 113 | 114 | if __name__ == '__main__': 115 | asyncio.run(main()) 116 | -------------------------------------------------------------------------------- /memories/memoryinit.json: -------------------------------------------------------------------------------- 1 | { 2 | "memories": [ 3 | { 4 | "id": "favoritefood", 5 | "document": "Luna's favorite food is mango smoothies.", 6 | "metadata": { 7 | "type": "long-term" 8 | } 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /memories/readme.md: -------------------------------------------------------------------------------- 1 | # Memories 2 | 3 | Memories are pieces of text that are stored in a vector database (ChromaDB). Memories relevant to the current context 4 | will be automatically injected into the prompt. Memories will also persist across restarts, unless manually deleted in 5 | the frontend or the database is deleted. 6 | 7 | The automatically generated memories are based off of 8 | [Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/abs/2304.03442). Essentially, 9 | every handful of messages, the LLM will be prompted to review the recent messages and come up with the 3 most high level 10 | questions that encapsulate the conversation and also provide the answer. These question/answer pairs are then each 11 | stored as a (short-term) memory. These short-term memories will persists across restarts unless deleted. 12 | 13 | Memories are stored and loaded from the chroma.db file. However, if the database hasn't been created yet or is empty, 14 | the program will load initial memories from the memoryinit.json file. This will NOT happen everytime the program starts, only 15 | if the database is empty. You can also import/export memories to json. 16 | 17 | The memories json file is in this format: 18 | 19 | ```json 20 | { 21 | "memories": [ 22 | { 23 | "id": "", 24 | "document": "", 25 | "metadata": { 26 | "type": "long-term"/"short-term" 27 | } 28 | }, 29 | ... 30 | ] 31 | } 32 | ``` 33 | 34 | The id can be any string but must be unique. The document is the text of the memory. The memories I manually add will 35 | have human-friendly ids, but ids generated by the program are just UUIDs. The metadata indicates whether this memory is 36 | long-term (manually added) or short-term (generated by the program). Long term and short term memories can be 37 | controlled differently in the frontend. -------------------------------------------------------------------------------- /modules/audioPlayer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from math import ceil 3 | import asyncio 4 | import queue 5 | import pyaudio 6 | from pydub import AudioSegment 7 | from modules.module import Module 8 | from constants import OUTPUT_DEVICE_INDEX 9 | 10 | 11 | class AudioPlayer(Module): 12 | def __init__(self, signals, enabled=True): 13 | super().__init__(signals, enabled) 14 | 15 | self.play_queue = queue.SimpleQueue() 16 | self.abort_flag = False 17 | self.paused = False 18 | self.API = self.API(self) 19 | 20 | if not self.enabled: 21 | return 22 | 23 | # Find all audio files in the songs directory 24 | self.audio_files = [] 25 | for dirpath, dirnames, filenames in os.walk("songs"): 26 | for file in filenames: 27 | if file.endswith(".mp3") or file.endswith(".wav"): 28 | audio = self.Audio(file, os.path.join(os.getcwd(), "songs", file)) 29 | self.audio_files.append(audio) 30 | 31 | async def run(self): 32 | while not self.signals.terminate: 33 | 34 | # This module cannot be toggled on/off in the control panel, so the module exits if it is disabled 35 | if not self.enabled: 36 | return 37 | 38 | # If we are not currently playing audio, unset the abort flag 39 | self.abort_flag = False 40 | 41 | # Check if there are any audio files to play 42 | if self.play_queue.qsize() > 0: 43 | file_name = self.play_queue.get() 44 | print(file_name) 45 | for audio in self.audio_files: 46 | if audio.file_name == file_name: 47 | print(f"Playing {audio.path}") 48 | self.signals.AI_speaking = True 49 | 50 | # Play the audio file 51 | audio = AudioSegment.from_file(audio.path) 52 | p = pyaudio.PyAudio() 53 | stream = p.open(format=p.get_format_from_width(audio.sample_width), 54 | channels=audio.channels, 55 | rate=audio.frame_rate, 56 | output_device_index=OUTPUT_DEVICE_INDEX, 57 | output=True) 58 | 59 | # Just in case there were any exceptions/interrupts, we release the resource 60 | # So as not to raise OSError: Device Unavailable should play() be used again 61 | try: 62 | # break audio into half-second chunks (to allows keyboard interrupts & aborts) 63 | for chunk in make_chunks(audio, 200): 64 | while self.paused: 65 | if self.abort_flag: 66 | break 67 | await asyncio.sleep(0.1) 68 | 69 | if self.abort_flag: 70 | self.abort_flag = False 71 | break 72 | 73 | stream.write(chunk._data) 74 | 75 | # Sleep for 0 to allow other threads to run while audio is playing 76 | await asyncio.sleep(0) 77 | finally: 78 | stream.stop_stream() 79 | stream.close() 80 | 81 | p.terminate() 82 | self.signals.AI_speaking = False 83 | 84 | # Only play the first match 85 | break 86 | 87 | await asyncio.sleep(0.1) 88 | 89 | class Audio: 90 | def __init__(self, file_name, path): 91 | self.file_name = file_name 92 | self.path = path 93 | 94 | class API: 95 | def __init__(self, outer): 96 | self.outer = outer 97 | 98 | def get_audio_list(self): 99 | filenames = [] 100 | for audio in self.outer.audio_files: 101 | filenames.append(audio.file_name) 102 | return filenames 103 | 104 | def play_audio(self, file_name): 105 | self.stop_playing() 106 | self.outer.play_queue.put(file_name) 107 | 108 | def pause_audio(self): 109 | self.outer.paused = True 110 | 111 | def resume_audio(self): 112 | self.outer.paused = False 113 | 114 | def stop_playing(self): 115 | self.outer.abort_flag = True 116 | 117 | 118 | # FROM PYDUB utils.py 119 | def make_chunks(audio_segment, chunk_length): 120 | """ 121 | Breaks an AudioSegment into chunks that are milliseconds 122 | long. 123 | if chunk_length is 50 then you'll get a list of 50 millisecond long audio 124 | segments back (except the last one, which can be shorter) 125 | """ 126 | number_of_chunks = ceil(len(audio_segment) / float(chunk_length)) 127 | return [audio_segment[i * chunk_length:(i + 1) * chunk_length] 128 | for i in range(int(number_of_chunks))] 129 | -------------------------------------------------------------------------------- /modules/customPrompt.py: -------------------------------------------------------------------------------- 1 | from modules.module import Module 2 | from modules.injection import Injection 3 | 4 | 5 | class CustomPrompt(Module): 6 | 7 | def __init__(self, signals, enabled=True): 8 | super().__init__(signals, enabled) 9 | 10 | self.API = self.API(self) 11 | self.prompt_injection.text = "" 12 | self.prompt_injection.priority = 200 13 | 14 | def get_prompt_injection(self): 15 | return self.prompt_injection 16 | 17 | async def run(self): 18 | pass 19 | 20 | class API: 21 | def __init__(self, outer): 22 | self.outer = outer 23 | 24 | def set_prompt(self, prompt, priority=200): 25 | self.outer.prompt_injection.text = prompt 26 | self.outer.prompt_injection.priority = priority 27 | 28 | def get_prompt(self): 29 | return {"prompt": self.outer.prompt_injection.text, "priority": self.outer.prompt_injection.priority} 30 | -------------------------------------------------------------------------------- /modules/discordClient.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | import discord 4 | from modules.module import Module 5 | from streamingSink import StreamingSink 6 | 7 | 8 | class DiscordClient(Module): 9 | def __init__(self, signals, stt, enabled=True): 10 | super().__init__(signals, enabled) 11 | 12 | self.stt = stt 13 | 14 | async def run(self): 15 | bot = discord.Bot() 16 | connections = {} 17 | 18 | @bot.event 19 | async def on_ready(): 20 | print(f"{bot.user} is online.") 21 | 22 | @bot.slash_command(name="ping", description="Check the bot's status") 23 | async def ping(ctx): 24 | await ctx.respond(f"Pong! {bot.latency}") 25 | 26 | async def finished_callback(sink, channel: discord.TextChannel, *args): 27 | await sink.vc.disconnect() 28 | await channel.send("Finished!") 29 | 30 | @bot.slash_command(name="start", description="Bot will join your vc") 31 | async def start(ctx: discord.ApplicationContext): 32 | """Record your voice!""" 33 | voice = ctx.author.voice 34 | 35 | if not voice: 36 | return await ctx.respond("You're not in a vc right now") 37 | 38 | vc = await voice.channel.connect() 39 | connections.update({ctx.guild.id: vc}) 40 | 41 | vc.start_recording( 42 | StreamingSink(self.signals, self.stt), 43 | finished_callback, 44 | ctx.channel, 45 | ) 46 | 47 | await ctx.respond("The recording has started!") 48 | 49 | @bot.slash_command(name="stop", description="Bot will exit the vc") 50 | async def stop(ctx: discord.ApplicationContext): 51 | """Stop recording.""" 52 | if ctx.guild.id in connections: 53 | vc = connections[ctx.guild.id] 54 | vc.stop_recording() 55 | del connections[ctx.guild.id] 56 | await ctx.delete() 57 | else: 58 | await ctx.respond("Not recording in this guild.") 59 | 60 | load_dotenv() 61 | bot.run(os.getenv('DISCORD_TOKEN')) 62 | -------------------------------------------------------------------------------- /modules/injection.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | Represents some text to be injected into the LLM prompt. 4 | Injections are added to the prompt from lowest to highest priority, with the highest being at the end. 5 | Text is the text to be injected. 6 | Priority is a positive integer. Injections with negative priority will be ignored. 7 | System Prompt Priority: 10 8 | Message History: 50 9 | Twitch Chat: 100 10 | ''' 11 | 12 | 13 | class Injection: 14 | def __init__(self, text, priority): 15 | self.text = text 16 | self.priority = priority 17 | 18 | def __str__(self): 19 | return self.text 20 | -------------------------------------------------------------------------------- /modules/memory.py: -------------------------------------------------------------------------------- 1 | from modules.module import Module 2 | from constants import * 3 | from chromadb.config import Settings 4 | import chromadb 5 | import requests 6 | import json 7 | import uuid 8 | import asyncio 9 | import copy 10 | 11 | 12 | class Memory(Module): 13 | 14 | def __init__(self, signals, enabled=True): 15 | super().__init__(signals, enabled) 16 | 17 | self.API = self.API(self) 18 | self.prompt_injection.text = "" 19 | self.prompt_injection.priority = 60 20 | 21 | self.processed_count = 0 22 | 23 | self.chroma_client = chromadb.PersistentClient(path="./memories/chroma.db", settings=Settings(anonymized_telemetry=False)) 24 | self.collection = self.chroma_client.get_or_create_collection(name="neuro_collection") 25 | print(f"MEMORY: Loaded {self.collection.count()} memories from database.") 26 | if self.collection.count() == 0: 27 | print("MEMORY: No memories found in database. Importing from memoryinit.json") 28 | self.API.import_json(path="./memories/memoryinit.json") 29 | 30 | def get_prompt_injection(self): 31 | # Use recent messages and twitch messages to query the database for related memories 32 | query = "" 33 | 34 | for message in self.signals.recentTwitchMessages: 35 | query += message + "\n" 36 | 37 | for message in self.signals.history[-MEMORY_QUERY_MESSAGE_COUNT:]: 38 | if message["role"] == "user" and message["content"] != "": 39 | query += HOST_NAME + ": " + message["content"] + "\n" 40 | elif message["role"] == "assistant" and message["content"] != "": 41 | query += AI_NAME + ": " + message["content"] + "\n" 42 | 43 | memories = self.collection.query(query_texts=query, n_results=MEMORY_RECALL_COUNT) 44 | 45 | # Generate injection for LLM prompt 46 | 47 | self.prompt_injection.text = f"{AI_NAME} knows these things:\n" 48 | for i in range(len(memories["ids"][0])): 49 | self.prompt_injection.text += memories['documents'][0][i] + "\n" 50 | self.prompt_injection.text += "End of knowledge section\n" 51 | 52 | return self.prompt_injection 53 | 54 | async def run(self): 55 | # Periodically, check if at least 20 new messages have been sent, and if so, generate 3 question-answer pairs 56 | # to be stored into memory. 57 | # This is a technique called reflection. You essentially ask the AI what information is important in the recent 58 | # conversation, and it is converted into a memory so that it can be recalled later. 59 | while not self.signals.terminate: 60 | if self.processed_count > len(self.signals.history): 61 | self.processed_count = 0 62 | 63 | if len(self.signals.history) - self.processed_count >= 20: 64 | print("MEMORY: Generating new memories") 65 | 66 | # Copy the latest unprocessed messages 67 | messages = copy.deepcopy(self.signals.history[-(len(self.signals.history) - self.processed_count):]) 68 | 69 | for message in messages: 70 | if message["role"] == "user" and message["content"] != "": 71 | message["content"] = HOST_NAME + ": " + message["content"] + "\n" 72 | elif message["role"] == "assistant" and message["content"] != "": 73 | message["content"] = AI_NAME + ": " + message["content"] + "\n" 74 | 75 | chat_section = "" 76 | for message in messages: 77 | chat_section += message["content"] 78 | 79 | data = { 80 | "mode": "instruct", 81 | "max_tokens": 200, 82 | "skip_special_tokens": False, # Necessary for Llama 3 83 | "custom_token_bans": BANNED_TOKENS, 84 | "stop": STOP_STRINGS.remove("\n"), 85 | "messages": [{ 86 | "role": "user", 87 | "content": chat_section + MEMORY_PROMPT 88 | }] 89 | } 90 | headers = {"Content-Type": "application/json"} 91 | 92 | response = requests.post(LLM_ENDPOINT + "/v1/chat/completions", headers=headers, json=data, verify=False) 93 | raw_memories = response.json()['choices'][0]['message']['content'] 94 | 95 | # Split each Q&A section and add the new memory to the database 96 | for memory in raw_memories.split("{qa}"): 97 | memory = memory.strip() 98 | if memory != "": 99 | self.collection.upsert([str(uuid.uuid4())], documents=[memory], metadatas=[{"type": "short-term"}]) 100 | 101 | self.processed_count = len(self.signals.history) 102 | 103 | await asyncio.sleep(5) 104 | 105 | class API: 106 | def __init__(self, outer): 107 | self.outer = outer 108 | 109 | def create_memory(self, data): 110 | id = str(uuid.uuid4()) 111 | self.outer.collection.upsert(id, documents=data, metadatas={"type": "short-term"}) 112 | 113 | def delete_memory(self, id): 114 | self.outer.collection.delete(id) 115 | 116 | def wipe(self): 117 | self.outer.chroma_client.reset() 118 | self.outer.chroma_client.create_collection(name="neuro_collection") 119 | 120 | def clear_short_term(self): 121 | short_term_memories = self.outer.collection.get(where={"type": "short-term"}) 122 | for id in short_term_memories["ids"]: 123 | self.outer.collection.delete(id) 124 | 125 | def import_json(self, path="./memories/memories.json"): 126 | with open(path, "r") as file: 127 | try: 128 | data = json.load(file) 129 | except json.JSONDecodeError: 130 | print("Error decoding JSON file") 131 | return 132 | 133 | for memory in data["memories"]: 134 | self.outer.collection.upsert(memory["id"], documents=memory["document"], metadatas=memory["metadata"]) 135 | 136 | def export_json(self, path="./memories/memories.json"): 137 | memories = self.outer.collection.get() 138 | 139 | data = {"memories": []} 140 | for i in range(len(memories["ids"])): 141 | data["memories"].append({"id": memories["ids"][i], 142 | "document": memories["documents"][i], 143 | "metadata": memories["metadatas"][i]}) 144 | 145 | with open(path, "w") as file: 146 | json.dump(data, file) 147 | 148 | def get_memories(self, query=""): 149 | data = []; 150 | 151 | if query == "": 152 | memories = self.outer.collection.get() 153 | for i in range(len(memories["ids"])): 154 | data.append({"id": memories["ids"][i], 155 | "document": memories["documents"][i], 156 | "metadata": memories["metadatas"][i]}) 157 | else: 158 | memories = self.outer.collection.query(query_texts=query, n_results=30) 159 | for i in range(len(memories["ids"][0])): 160 | data.append({"id": memories["ids"][0][i], 161 | "document": memories["documents"][0][i], 162 | "metadata": memories["metadatas"][0][i], 163 | "distance": memories["distances"][0][i]}) 164 | 165 | # Sort memories by distance 166 | data = sorted(data, key=lambda x: x["distance"]) 167 | return data 168 | -------------------------------------------------------------------------------- /modules/module.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from modules.injection import Injection 3 | 4 | ''' 5 | An extendable class that defines a module that interacts with the main program. 6 | All modules will be run in its own thread with its own event loop. 7 | Do not use this class directly, extend it 8 | ''' 9 | 10 | 11 | class Module: 12 | 13 | def __init__(self, signals, enabled=True): 14 | self.signals = signals 15 | self.enabled = enabled 16 | 17 | self.prompt_injection = Injection("", -1) 18 | 19 | def init_event_loop(self): 20 | asyncio.run(self.run()) 21 | 22 | def get_prompt_injection(self): 23 | return self.prompt_injection 24 | 25 | # Function that is called after all modules have provided their injections 26 | def cleanup(self): 27 | pass 28 | 29 | async def run(self): 30 | pass 31 | -------------------------------------------------------------------------------- /modules/multimodal.py: -------------------------------------------------------------------------------- 1 | from modules.module import Module 2 | from constants import MULTIMODAL_STRATEGY 3 | 4 | class MultiModal(Module): 5 | 6 | def __init__(self, signals, enabled=True): 7 | super().__init__(signals, enabled) 8 | self.API = self.API(self) 9 | self.enabled = enabled 10 | 11 | 12 | def get_prompt_injection(self): 13 | return self.prompt_injection 14 | 15 | async def run(self): 16 | pass 17 | 18 | def strategy_never(self): 19 | return False 20 | 21 | def strategy_always(self): 22 | return True 23 | 24 | class API: 25 | def __init__(self, outer): 26 | self.outer = outer 27 | 28 | def set_multimodal_status(self, status): 29 | self.outer.enabled = status 30 | self.outer.signals.sio_queue.put(('multimodal_status', status)) 31 | 32 | def get_multimodal_status(self): 33 | return self.outer.enabled 34 | 35 | # Determines when a prompt should go to the multimodal model 36 | def multimodal_now(self): 37 | if not self.outer.enabled: 38 | return False 39 | 40 | if MULTIMODAL_STRATEGY == "never": 41 | return self.outer.strategy_never() 42 | elif MULTIMODAL_STRATEGY == "always": 43 | return self.outer.strategy_always() 44 | else: 45 | return False 46 | -------------------------------------------------------------------------------- /modules/twitchClient.py: -------------------------------------------------------------------------------- 1 | from twitchAPI.twitch import Twitch 2 | from twitchAPI.oauth import UserAuthenticator 3 | from twitchAPI.type import AuthScope, ChatEvent 4 | from twitchAPI.chat import Chat, EventData, ChatMessage, ChatSub, ChatCommand 5 | import os 6 | import asyncio 7 | from dotenv import load_dotenv 8 | from constants import TWITCH_CHANNEL, TWITCH_MAX_MESSAGE_LENGTH 9 | from modules.module import Module 10 | 11 | 12 | class TwitchClient(Module): 13 | def __init__(self, signals, enabled=True): 14 | super().__init__(signals, enabled) 15 | 16 | self.chat = None 17 | self.twitch = None 18 | self.API = self.API(self) 19 | 20 | self.prompt_injection.priority = 150 21 | 22 | def get_prompt_injection(self): 23 | if len(self.signals.recentTwitchMessages) > 0: 24 | output = "\nThese are recent twitch messages:\n" 25 | for message in self.signals.recentTwitchMessages: 26 | output += message + "\n" 27 | 28 | output += "Pick the highest quality message with the most potential for an interesting answer and respond to them.\n" 29 | self.prompt_injection.text = output 30 | else: 31 | self.prompt_injection.text = "" 32 | return self.prompt_injection 33 | 34 | def cleanup(self): 35 | # Clear out handled twitch messages 36 | self.signals.recentTwitchMessages = [] 37 | 38 | async def run(self): 39 | load_dotenv() 40 | APP_ID = os.getenv("TWITCH_APP_ID") 41 | APP_SECRET = os.getenv("TWITCH_SECRET") 42 | USER_SCOPE = [AuthScope.CHAT_READ, AuthScope.CHAT_EDIT] 43 | 44 | # this will be called when the event READY is triggered, which will be on bot start 45 | async def on_ready(ready_event: EventData): 46 | print('TWITCH: Bot is ready for work, joining channels') 47 | # join our target channel, if you want to join multiple, either call join for each individually 48 | # or even better pass a list of channels as the argument 49 | await ready_event.chat.join_room(TWITCH_CHANNEL) 50 | # you can do other bot initialization things in here 51 | 52 | # this will be called whenever a message in a channel was send by either the bot OR another user 53 | async def on_message(msg: ChatMessage): 54 | if not self.enabled: 55 | return 56 | 57 | if len(msg.text) > TWITCH_MAX_MESSAGE_LENGTH: 58 | return 59 | 60 | print(f'in {msg.room.name}, {msg.user.name} said: {msg.text}') 61 | # Store the 10 most recent chat messages 62 | if len(self.signals.recentTwitchMessages) > 10: 63 | self.signals.recentTwitchMessages.pop(0) 64 | self.signals.recentTwitchMessages.append(f"{msg.user.name} : {msg.text}") 65 | 66 | # Set recentTwitchMessages to itself to trigger the setter (updates frontend) 67 | self.signals.recentTwitchMessages = self.signals.recentTwitchMessages 68 | 69 | # this will be called whenever someone subscribes to a channel 70 | async def on_sub(sub: ChatSub): 71 | print(f'New subscription in {sub.room.name}:\\n' 72 | f' Type: {sub.sub_plan}\\n' 73 | f' Message: {sub.sub_message}') 74 | 75 | # this will be called whenever the !reply command is issued 76 | async def test_command(cmd: ChatCommand): 77 | if len(cmd.parameter) == 0: 78 | await cmd.reply('you did not tell me what to reply with') 79 | else: 80 | await cmd.reply(f'{cmd.user.name}: {cmd.parameter}') 81 | 82 | # Checkpoint to see if the bot is enabled 83 | if not self.enabled: 84 | return 85 | 86 | # set up twitch api instance and add user authentication with some scopes 87 | twitch = await Twitch(APP_ID, APP_SECRET) 88 | auth = UserAuthenticator(twitch, USER_SCOPE) 89 | token, refresh_token = await auth.authenticate() 90 | await twitch.set_user_authentication(token, USER_SCOPE, refresh_token) 91 | 92 | # create chat instance 93 | chat = await Chat(twitch) 94 | 95 | # also save twitch and chat to class properties so we can shut them down 96 | self.twitch = twitch 97 | self.chat = chat 98 | 99 | # register the handlers for the events you want 100 | 101 | # listen to when the bot is done starting up and ready to join channels 102 | chat.register_event(ChatEvent.READY, on_ready) 103 | # listen to chat messages 104 | chat.register_event(ChatEvent.MESSAGE, on_message) 105 | # listen to channel subscriptions 106 | chat.register_event(ChatEvent.SUB, on_sub) 107 | # there are more events, you can view them all in this documentation 108 | 109 | # you can directly register commands and their handlers, this will register the !reply command 110 | chat.register_command('reply', test_command) 111 | 112 | # we are done with our setup, lets start this bot up! 113 | chat.start() 114 | 115 | while True: 116 | if self.signals.terminate: 117 | self.chat.stop() 118 | await self.twitch.close() 119 | return 120 | 121 | await asyncio.sleep(0.1) 122 | 123 | class API: 124 | def __init__(self, outer): 125 | self.outer = outer 126 | 127 | def set_twitch_status(self, status): 128 | self.outer.enabled = status 129 | 130 | # If chat was disabled, clear recentTwitchMessages 131 | if not status: 132 | self.outer.signals.recentTwitchMessages = [] 133 | self.outer.signals.sio_queue.put(('twitch_status', status)) 134 | 135 | def get_twitch_status(self): 136 | return self.outer.enabled 137 | -------------------------------------------------------------------------------- /modules/vtubeStudio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import queue 3 | import pyvts 4 | from modules.module import Module 5 | from constants import VTUBE_MODEL_POSITIONS, VTUBE_MIC_POSITION 6 | 7 | 8 | class VtubeStudio(Module): 9 | def __init__(self, signals, enabled=True): 10 | super().__init__(signals, enabled) 11 | self.queue = queue.SimpleQueue() 12 | self.API = self.API(self) 13 | 14 | self.item_list = [] 15 | self.prop_instance_ids = {} 16 | 17 | plugin_info = { 18 | "plugin_name": "Neuro VTS Plugin", 19 | "developer": "KimJammer", 20 | "authentication_token_path": "./vtubeStudio_token.txt", 21 | } 22 | self.vts = pyvts.vts(plugin_info=plugin_info) 23 | 24 | async def get_hotkeys(self): 25 | response = await self.send_request(self.vts.vts_request.requestHotKeyList()) 26 | hotkey_list = [] 27 | for hotkey in response['data']['availableHotkeys']: 28 | hotkey_list.append(hotkey['name']) 29 | return hotkey_list 30 | 31 | async def send_hotkey(self, hotkey): 32 | request = self.vts.vts_request.requestTriggerHotKey(hotkey) 33 | await self.send_request(request) 34 | 35 | async def get_item_list(self): 36 | request = self.vts.vts_request.BaseRequest( 37 | "ItemListRequest", 38 | { 39 | "includeAvailableSpots": False, 40 | "includeItemInstancesInScene": False, 41 | "includeAvailableItemFiles": True 42 | } 43 | ) 44 | response = (await self.send_request(request))["data"] 45 | items = [] 46 | for item in response["availableItemFiles"]: 47 | items.append(item["fileName"]) 48 | return items 49 | 50 | async def spawn_microphone(self): 51 | # Ensure microphone item exists 52 | if "microphone_red (@7MDigital).png" not in self.item_list: 53 | self.signals.sio_queue.put(("error", "Microphone item not found in Vtube Studio")) 54 | return 55 | request = self.vts.vts_request.BaseRequest( 56 | "ItemLoadRequest", 57 | { 58 | "fileName": "microphone_red (@7MDigital).png", 59 | "positionX": VTUBE_MIC_POSITION["x"], 60 | "positionY": VTUBE_MIC_POSITION["y"] - 1, 61 | "size": VTUBE_MIC_POSITION["size"], 62 | "rotation": VTUBE_MIC_POSITION["rotation"], 63 | "fadeTime": 0, 64 | "order": 4, 65 | "failIfOrderTaken": False, 66 | } 67 | ) 68 | self.prop_instance_ids["microphone"] = (await self.send_request(request))["data"]["instanceID"] 69 | request = self.vts.vts_request.BaseRequest( 70 | "ItemMoveRequest", 71 | { 72 | "itemsToMove": 73 | [ 74 | { 75 | "itemInstanceID": self.prop_instance_ids["microphone"], 76 | "timeInSeconds": 0.6, 77 | "fadeMode": "zip", 78 | "positionX": VTUBE_MIC_POSITION["x"], 79 | "positionY": VTUBE_MIC_POSITION["y"], 80 | } 81 | ] 82 | } 83 | ) 84 | await self.send_request(request) 85 | 86 | async def despawn_microphone(self): 87 | if "microphone" in self.prop_instance_ids: 88 | request = self.vts.vts_request.BaseRequest( 89 | "ItemMoveRequest", 90 | { 91 | "itemsToMove": 92 | [ 93 | { 94 | "itemInstanceID": self.prop_instance_ids["microphone"], 95 | "timeInSeconds": 0.6, 96 | "fadeMode": "easeBoth", 97 | "positionX": VTUBE_MIC_POSITION["x"], 98 | "positionY": VTUBE_MIC_POSITION["y"] - 1, 99 | } 100 | ] 101 | } 102 | ) 103 | await self.send_request(request) 104 | await asyncio.sleep(0.6) 105 | request = self.vts.vts_request.BaseRequest( 106 | "ItemUnloadRequest", 107 | { 108 | "fileNames": 109 | [ 110 | "microphone_red (@7MDigital).png" 111 | ] 112 | } 113 | ) 114 | await self.send_request(request) 115 | 116 | async def move_model(self, mode): 117 | if mode not in VTUBE_MODEL_POSITIONS: 118 | self.signals.sio_queue.put(("error", "Invalid model location: " + mode)) 119 | return 120 | 121 | request = self.vts.vts_request.BaseRequest( 122 | "MoveModelRequest", 123 | { 124 | "timeInSeconds": 0.8, 125 | "valuesAreRelativeToModel": False, 126 | "positionX": VTUBE_MODEL_POSITIONS[mode]["x"], 127 | "positionY": VTUBE_MODEL_POSITIONS[mode]["y"], 128 | "rotation": VTUBE_MODEL_POSITIONS[mode]["rotation"], 129 | "size": VTUBE_MODEL_POSITIONS[mode]["size"] 130 | } 131 | ) 132 | await self.send_request(request) 133 | 134 | async def send_request(self, request): 135 | response = await self.vts.request(request) 136 | if response["messageType"] == "APIError": 137 | self.signals.sio_queue.put(("error", "Vtube Studio API Error: " + response["data"]["message"])) 138 | return 139 | return response 140 | 141 | async def run(self): 142 | if not self.enabled: 143 | return 144 | 145 | # Connect 146 | try: 147 | await self.vts.connect() 148 | except: 149 | print("Failed to connect to Vtube Studio. Disabling Vtube Studio module.") 150 | self.enabled = False 151 | return 152 | 153 | # Authenticate 154 | await self.vts.request_authenticate_token() # get token 155 | await self.vts.request_authenticate() # use token 156 | 157 | self.item_list = await self.get_item_list() 158 | 159 | # Run the request processor loop - Read requests from queue and process them 160 | # This is done because the API functions are called directly by other threads but actually interacting with the 161 | # VTS API must be done in this thread. 162 | 163 | while True: 164 | if self.signals.terminate: 165 | await self.vts.close() 166 | return 167 | 168 | if not self.enabled: 169 | await asyncio.sleep(0.1) 170 | continue 171 | 172 | if self.queue.qsize() > 0: 173 | crr_action = self.queue.get() 174 | 175 | # If-Elif chains are ugly but like whatever I'll refactor it later :tm: 176 | 177 | if crr_action.action == "get_hotkeys": 178 | self.signals.sio_queue.put(("get_hotkeys", await self.get_hotkeys())) 179 | elif crr_action.action == "send_hotkey": 180 | await self.send_hotkey(crr_action.data) 181 | elif crr_action.action == "move_model": 182 | await self.move_model(crr_action.data) 183 | elif crr_action.action == "spawn_microphone": 184 | await self.spawn_microphone() 185 | elif crr_action.action == "despawn_microphone": 186 | await self.despawn_microphone() 187 | else: 188 | print(f"Unknown Vtube Studio action: {crr_action.action}") 189 | 190 | # Yield for other threads to run 191 | await asyncio.sleep(0) 192 | 193 | class Action: 194 | def __init__(self, action, data): 195 | self.action = action 196 | self.data = data 197 | class API: 198 | def __init__(self, outer): 199 | self.outer = outer 200 | 201 | def set_movement_status(self, status): 202 | self.outer.enabled = status 203 | self.outer.signals.sio_queue.put(('movement_status', status)) 204 | if status: 205 | #Clear queue 206 | while not self.outer.queue.empty(): 207 | self.outer.queue.get() 208 | 209 | def get_movement_status(self): 210 | return self.outer.enabled 211 | 212 | def get_hotkeys(self): 213 | if not self.outer.enabled: 214 | self.outer.signals.sio_queue.put(("error", "Vtube Studio Module is disabled")) 215 | return 216 | self.outer.queue.put(self.outer.Action("get_hotkeys", None)) 217 | 218 | def send_hotkey(self, hotkey): 219 | if not self.outer.enabled: 220 | self.outer.signals.sio_queue.put(("error", "Vtube Studio Module is disabled")) 221 | return 222 | self.outer.queue.put(self.outer.Action("send_hotkey", hotkey)) 223 | 224 | def trigger_prop(self, prop_action): 225 | if not self.outer.enabled: 226 | self.outer.signals.sio_queue.put(("error", "Vtube Studio Module is disabled")) 227 | return 228 | self.outer.queue.put(self.outer.Action(prop_action, None)) 229 | 230 | def move_model(self, mode): 231 | if not self.outer.enabled: 232 | self.outer.signals.sio_queue.put(("error", "Vtube Studio Module is disabled")) 233 | return 234 | self.outer.queue.put(self.outer.Action("move_model", mode)) 235 | -------------------------------------------------------------------------------- /pipfreeze.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.1.0 2 | aiofiles==24.1.0 3 | aiohappyeyeballs==2.4.3 4 | aiohttp==3.10.10 5 | aiosignal==1.3.1 6 | annotated-types==0.7.0 7 | anyascii==0.3.2 8 | anyio==4.6.2.post1 9 | asgiref==3.8.1 10 | asttokens==2.4.1 11 | attrs==24.2.0 12 | audioread==3.0.1 13 | av==12.3.0 14 | azure-cognitiveservices-speech==1.37.0 15 | babel==2.16.0 16 | backoff==2.2.1 17 | bcrypt==4.2.0 18 | bidict==0.23.1 19 | blis==0.7.11 20 | build==1.2.2.post1 21 | cachetools==5.5.0 22 | catalogue==2.0.10 23 | certifi==2024.8.30 24 | cffi==1.17.1 25 | charset-normalizer==3.4.0 26 | chroma-hnswlib==0.7.6 27 | chromadb==0.5.18 28 | click==8.1.7 29 | cloudpathlib==0.20.0 30 | colorama==0.4.6 31 | coloredlogs==15.0.1 32 | comtypes==1.4.8 33 | confection==0.1.5 34 | contourpy==1.2.1 35 | coqpit==0.0.17 36 | coqui-tts==0.24.1 37 | coqui-tts-trainer==0.1.6 38 | ctranslate2==4.5.0 39 | cycler==0.12.1 40 | cymem==2.0.8 41 | Cython==3.0.11 42 | dateparser==1.1.8 43 | decorator==5.1.1 44 | deepspeed @ file:///Z:/neurobackend/deepspeed-0.14.0%2Bcu118-cp311-cp311-win_amd64.whl#sha256=eaa116f7055faa15938217a75866809309092b4604c946fb2ceff8d880d9a8db 45 | Deprecated==1.2.14 46 | distro==1.9.0 47 | docopt==0.6.2 48 | durationpy==0.9 49 | einops==0.8.0 50 | elevenlabs==1.2.2 51 | emoji==2.8.0 52 | encodec==0.1.1 53 | enum-tools==0.12.0 54 | enum34==1.1.10 55 | executing==2.1.0 56 | fastapi==0.115.4 57 | faster-whisper==1.0.3 58 | filelock==3.13.1 59 | flatbuffers==24.3.25 60 | fonttools==4.54.1 61 | frozenlist==1.5.0 62 | fsspec==2024.2.0 63 | google-auth==2.36.0 64 | googleapis-common-protos==1.65.0 65 | grpcio==1.67.1 66 | gruut==2.2.3 67 | gruut-ipa==0.13.0 68 | gruut_lang_de==2.0.1 69 | gruut_lang_en==2.0.1 70 | gruut_lang_es==2.0.1 71 | gruut_lang_fr==2.0.2 72 | gTTS==2.5.1 73 | h11==0.14.0 74 | halo==0.0.31 75 | hjson==3.1.0 76 | httpcore==1.0.6 77 | httptools==0.6.4 78 | httpx==0.27.2 79 | huggingface-hub==0.26.2 80 | humanfriendly==10.0 81 | idna==3.10 82 | importlib-metadata==7.0.0 83 | importlib_resources==6.4.5 84 | inflect==7.4.0 85 | ipython==8.29.0 86 | jedi==0.19.2 87 | Jinja2==3.1.3 88 | joblib==1.4.2 89 | jsonlines==1.2.0 90 | kiwisolver==1.4.7 91 | kubernetes==31.0.0 92 | langcodes==3.4.1 93 | language_data==1.2.0 94 | lazy_loader==0.4 95 | librosa==0.10.2.post1 96 | llvmlite==0.42.0 97 | log-symbols==0.0.14 98 | marisa-trie==1.2.1 99 | Markdown==3.7 100 | markdown-it-py==3.0.0 101 | MarkupSafe==2.1.5 102 | matplotlib==3.9.2 103 | matplotlib-inline==0.1.7 104 | mdurl==0.1.2 105 | mmh3==5.0.1 106 | monotonic==1.6 107 | more-itertools==10.5.0 108 | mpmath==1.3.0 109 | msgpack==1.1.0 110 | mss==9.0.2 111 | multidict==6.1.0 112 | murmurhash==1.0.10 113 | networkx==2.8.8 114 | ninja==1.11.1.1 115 | nltk==3.8.1 116 | num2words==0.5.13 117 | numba==0.59.1 118 | numpy==1.26.3 119 | oauthlib==3.2.2 120 | onnxruntime==1.20.0 121 | openai==1.30.5 122 | opencv-python==4.10.0.84 123 | opentelemetry-api==1.24.0 124 | opentelemetry-exporter-otlp-proto-common==1.24.0 125 | opentelemetry-exporter-otlp-proto-grpc==1.24.0 126 | opentelemetry-instrumentation==0.45b0 127 | opentelemetry-instrumentation-asgi==0.45b0 128 | opentelemetry-instrumentation-fastapi==0.45b0 129 | opentelemetry-proto==1.24.0 130 | opentelemetry-sdk==1.24.0 131 | opentelemetry-semantic-conventions==0.45b0 132 | opentelemetry-util-http==0.45b0 133 | openwakeword==0.6.0 134 | orjson==3.10.11 135 | overrides==7.7.0 136 | packaging==24.2 137 | parso==0.8.4 138 | pillow==10.2.0 139 | platformdirs==4.3.6 140 | pooch==1.8.2 141 | posthog==3.7.0 142 | preshed==3.0.9 143 | prompt_toolkit==3.0.48 144 | propcache==0.2.0 145 | protobuf==4.25.5 146 | psutil==6.1.0 147 | pure_eval==0.2.3 148 | pvporcupine==1.9.5 149 | py-cpuinfo==9.0.0 150 | pyasn1==0.6.1 151 | pyasn1_modules==0.4.1 152 | PyAudio==0.2.14 153 | pycparser==2.22 154 | pydantic==2.9.2 155 | pydantic_core==2.23.4 156 | pydub==0.25.1 157 | Pygments==2.18.0 158 | pynndescent==0.5.13 159 | pynvml==11.5.3 160 | pyparsing==3.2.0 161 | PyPika==0.48.9 162 | pypiwin32==223 163 | pyproject_hooks==1.2.0 164 | pyreadline3==3.5.4 165 | pysbd==0.3.4 166 | python-crfsuite==0.9.11 167 | python-dateutil==2.9.0.post0 168 | python-dotenv==1.0.1 169 | python-engineio==4.10.1 170 | python-socketio==5.11.4 171 | pyttsx3==2.90 172 | pytz==2024.2 173 | pyvts==0.3.3 174 | pywin32==308 175 | PyYAML==6.0.2 176 | RealtimeSTT==0.3.7 177 | RealTimeTTS==0.4.1 178 | regex==2024.11.6 179 | requests==2.32.3 180 | requests-oauthlib==2.0.0 181 | rich==13.9.4 182 | rsa==4.9 183 | safetensors==0.4.5 184 | scikit-learn==1.5.2 185 | scipy==1.14.1 186 | shellingham==1.5.4 187 | simple-websocket==1.1.0 188 | six==1.16.0 189 | smart-open==7.0.5 190 | sniffio==1.3.1 191 | soundfile==0.12.1 192 | soxr==0.5.0.post1 193 | spacy==3.7.5 194 | spacy-legacy==3.0.12 195 | spacy-loggers==1.0.5 196 | spinners==0.0.24 197 | srsly==2.4.8 198 | sseclient-py==1.8.0 199 | stack-data==0.6.3 200 | stanza==1.6.1 201 | starlette==0.41.2 202 | stream2sentence==0.2.3 203 | SudachiDict-core==20241021 204 | SudachiPy==0.6.8 205 | sympy==1.13.1 206 | tenacity==9.0.0 207 | tensorboard==2.18.0 208 | tensorboard-data-server==0.7.2 209 | termcolor==2.5.0 210 | thinc==8.2.5 211 | threadpoolctl==3.5.0 212 | tokenizers==0.19.1 213 | torch==2.2.2+cu118 214 | torchaudio==2.2.2+cu118 215 | torchvision==0.17.2+cu118 216 | tqdm==4.66.4 217 | traitlets==5.14.3 218 | transformers==4.40.2 219 | twitchAPI==4.3.1 220 | typeguard==4.4.1 221 | typer==0.13.0 222 | typing_extensions==4.12.2 223 | tzdata==2024.2 224 | tzlocal==5.2 225 | umap-learn==0.5.6 226 | urllib3==2.2.3 227 | uvicorn==0.32.0 228 | wasabi==1.1.3 229 | watchfiles==0.24.0 230 | wcwidth==0.2.13 231 | weasel==0.4.1 232 | webrtcvad-wheels==2.0.14 233 | websocket-client==1.8.0 234 | websockets==12.0 235 | Werkzeug==3.1.3 236 | wrapt==1.16.0 237 | wsproto==1.2.0 238 | yarl==1.17.1 239 | zipp==3.21.0 240 | -------------------------------------------------------------------------------- /prompter.py: -------------------------------------------------------------------------------- 1 | import time 2 | from constants import PATIENCE 3 | 4 | 5 | class Prompter: 6 | def __init__(self, signals, llms, modules=None): 7 | self.signals = signals 8 | self.llms = llms 9 | if modules is None: 10 | self.modules = {} 11 | else: 12 | self.modules = modules 13 | 14 | self.system_ready = False 15 | self.timeSinceLastMessage = 0.0 16 | 17 | def prompt_now(self): 18 | # Don't prompt AI if system isn't ready yet 19 | if not self.signals.stt_ready or not self.signals.tts_ready: 20 | return False 21 | # Don't prompt AI when anyone is currently talking 22 | if self.signals.human_speaking or self.signals.AI_thinking or self.signals.AI_speaking: 23 | return False 24 | # Prompt AI if human said something 25 | if self.signals.new_message: 26 | return True 27 | # Prompt AI if there are unprocessed chat messages 28 | if len(self.signals.recentTwitchMessages) > 0: 29 | return True 30 | # Prompt if some amount of seconds has passed without anyone talking 31 | if self.timeSinceLastMessage > PATIENCE: 32 | return True 33 | 34 | def chooseLLM(self): 35 | if "multimodal" in self.modules and self.modules["multimodal"].API.multimodal_now(): 36 | return self.llms["image"] 37 | else: 38 | return self.llms["text"] 39 | 40 | def prompt_loop(self): 41 | print("Prompter loop started") 42 | 43 | while not self.signals.terminate: 44 | # Set lastMessageTime to now if program is still starting 45 | if self.signals.last_message_time == 0.0 or (not self.signals.stt_ready or not self.signals.tts_ready): 46 | self.signals.last_message_time = time.time() 47 | self.timeSinceLastMessage = 0.0 48 | else: 49 | if not self.system_ready: 50 | print("SYSTEM READY") 51 | self.system_ready = True 52 | 53 | # Calculate and set time since last message 54 | self.timeSinceLastMessage = time.time() - self.signals.last_message_time 55 | self.signals.sio_queue.put(("patience_update", {"crr_time": self.timeSinceLastMessage, "total_time": PATIENCE})) 56 | 57 | # Decide and prompt LLM 58 | if self.prompt_now(): 59 | print("PROMPTING AI") 60 | llmWrapper = self.chooseLLM() 61 | llmWrapper.prompt() 62 | self.signals.last_message_time = time.time() 63 | 64 | # Sleep for 0.1 seconds before checking again. 65 | time.sleep(0.1) 66 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kimjammer/Neuro/5e4b4241c41bb40983aee2cb60d65d6bb481842b/requirements.txt -------------------------------------------------------------------------------- /signals.py: -------------------------------------------------------------------------------- 1 | import queue 2 | 3 | 4 | class Signals: 5 | def __init__(self): 6 | self._human_speaking = False 7 | self._AI_speaking = False 8 | self._AI_thinking = False 9 | self._last_message_time = 0.0 10 | self._new_message = False 11 | self._tts_ready = False 12 | self._stt_ready = False 13 | self._recentTwitchMessages = [] 14 | self._history = [] 15 | 16 | # This flag indicates to all threads that they should immediately terminate 17 | self._terminate = False 18 | 19 | self.sio_queue = queue.SimpleQueue() 20 | 21 | @property 22 | def human_speaking(self): 23 | return self._human_speaking 24 | 25 | @human_speaking.setter 26 | def human_speaking(self, value): 27 | self._human_speaking = value 28 | self.sio_queue.put(('human_speaking', value)) 29 | if value: 30 | print("SIGNALS: Human Talking Start") 31 | else: 32 | print("SIGNALS: Human Talking Stop") 33 | 34 | @property 35 | def AI_speaking(self): 36 | return self._AI_speaking 37 | 38 | @AI_speaking.setter 39 | def AI_speaking(self, value): 40 | self._AI_speaking = value 41 | self.sio_queue.put(('AI_speaking', value)) 42 | if value: 43 | print("SIGNALS: AI Talking Start") 44 | else: 45 | print("SIGNALS: AI Talking Stop") 46 | 47 | @property 48 | def AI_thinking(self): 49 | return self._AI_thinking 50 | 51 | @AI_thinking.setter 52 | def AI_thinking(self, value): 53 | self._AI_thinking = value 54 | self.sio_queue.put(('AI_thinking', value)) 55 | if value: 56 | print("SIGNALS: AI Thinking Start") 57 | else: 58 | print("SIGNALS: AI Thinking Stop") 59 | 60 | @property 61 | def last_message_time(self): 62 | return self._last_message_time 63 | 64 | @last_message_time.setter 65 | def last_message_time(self, value): 66 | self._last_message_time = value 67 | 68 | @property 69 | def new_message(self): 70 | return self._new_message 71 | 72 | @new_message.setter 73 | def new_message(self, value): 74 | self._new_message = value 75 | if value: 76 | print("SIGNALS: New Message") 77 | 78 | @property 79 | def tts_ready(self): 80 | return self._tts_ready 81 | 82 | @tts_ready.setter 83 | def tts_ready(self, value): 84 | self._tts_ready = value 85 | 86 | @property 87 | def stt_ready(self): 88 | return self._stt_ready 89 | 90 | @stt_ready.setter 91 | def stt_ready(self, value): 92 | self._stt_ready = value 93 | 94 | @property 95 | def recentTwitchMessages(self): 96 | return self._recentTwitchMessages 97 | 98 | @recentTwitchMessages.setter 99 | def recentTwitchMessages(self, value): 100 | self._recentTwitchMessages = value 101 | self.sio_queue.put(('recent_twitch_messages', value)) 102 | 103 | @property 104 | def history(self): 105 | return self._history 106 | 107 | @history.setter 108 | def history(self, value): 109 | self._history = value 110 | 111 | @property 112 | def terminate(self): 113 | return self._terminate 114 | 115 | @terminate.setter 116 | def terminate(self, value): 117 | self._terminate = value 118 | -------------------------------------------------------------------------------- /socketioServer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from aiohttp import web 3 | import socketio 4 | from aiohttp.web_runner import GracefulExit 5 | 6 | from constants import PATIENCE 7 | 8 | 9 | class SocketIOServer: 10 | def __init__(self, signals, stt, tts, llmWrapper, prompter, modules=None): 11 | if modules is None: 12 | modules = {} 13 | self.signals = signals 14 | self.stt = stt 15 | self.tts = tts 16 | self.llmWrapper = llmWrapper 17 | self.prompter = prompter 18 | self.modules = modules 19 | 20 | def start_server(self): 21 | print("Starting Socket.io server") 22 | sio = socketio.AsyncServer(async_mode='aiohttp', cors_allowed_origins='*') 23 | app = web.Application() 24 | sio.attach(app) 25 | 26 | @sio.event 27 | async def get_blacklist(sid): 28 | await sio.emit('get_blacklist', self.llmWrapper.API.get_blacklist()) 29 | 30 | @sio.event 31 | async def set_blacklist(sid, message): 32 | self.llmWrapper.API.set_blacklist(message) 33 | 34 | @sio.event 35 | async def disable_LLM(sid): 36 | self.llmWrapper.API.set_LLM_status(False) 37 | 38 | @sio.event 39 | async def enable_LLM(sid): 40 | self.llmWrapper.API.set_LLM_status(True) 41 | 42 | @sio.event 43 | async def disable_TTS(sid): 44 | self.tts.API.set_TTS_status(False) 45 | 46 | @sio.event 47 | async def enable_TTS(sid): 48 | self.tts.API.set_TTS_status(True) 49 | 50 | @sio.event 51 | async def disable_STT(sid): 52 | self.stt.API.set_STT_status(False) 53 | 54 | @sio.event 55 | async def enable_STT(sid): 56 | self.stt.API.set_STT_status(True) 57 | 58 | @sio.event 59 | async def disable_movement(sid): 60 | if "vtube_studio" in self.modules: 61 | self.modules["vtube_studio"].API.set_movement_status(False) 62 | 63 | @sio.event 64 | async def enable_movement(sid): 65 | if "vtube_studio" in self.modules: 66 | self.modules["vtube_studio"].API.set_movement_status(True) 67 | 68 | @sio.event 69 | async def disable_multimodal(sid): 70 | if "multimodal" in self.modules: 71 | self.modules["multimodal"].API.set_multimodal_status(False) 72 | 73 | @sio.event 74 | async def enable_multimodal(sid): 75 | if "multimodal" in self.modules: 76 | self.modules["multimodal"].API.set_multimodal_status(True) 77 | 78 | @sio.event 79 | async def get_hotkeys(sid): 80 | if "vtube_studio" in self.modules: 81 | self.modules["vtube_studio"].API.get_hotkeys() 82 | 83 | @sio.event 84 | async def send_hotkey(sid, hotkey): 85 | if "vtube_studio" in self.modules: 86 | self.modules["vtube_studio"].API.send_hotkey(hotkey) 87 | 88 | @sio.event 89 | async def trigger_prop(sid, prop_action): 90 | if "vtube_studio" in self.modules: 91 | self.modules["vtube_studio"].API.trigger_prop(prop_action) 92 | 93 | @sio.event 94 | async def move_model(sid, mode): 95 | if "vtube_studio" in self.modules: 96 | self.modules["vtube_studio"].API.move_model(mode) 97 | 98 | @sio.event 99 | async def disable_twitch(sid): 100 | if "twitch" in self.modules: 101 | self.modules["twitch"].API.set_twitch_status(False) 102 | 103 | @sio.event 104 | async def enable_twitch(sid): 105 | if "twitch" in self.modules: 106 | self.modules["twitch"].API.set_twitch_status(True) 107 | 108 | @sio.event 109 | async def cancel_next_message(sid): 110 | self.llmWrapper.API.cancel_next() 111 | 112 | @sio.event 113 | async def abort_current_message(sid): 114 | self.tts.API.abort_current() 115 | 116 | @sio.event 117 | async def fun_fact(sid): 118 | self.signals.history.append({"role": "user", "content": "Let's move on. Can we get a fun fact?"}) 119 | self.signals.new_message = True 120 | 121 | @sio.event 122 | async def new_topic(sid, message): 123 | self.signals.history.append({"role": "user", "content": message}) 124 | self.signals.new_message = True 125 | 126 | @sio.event 127 | async def nuke_history(sid): 128 | self.signals.history = [] 129 | 130 | @sio.event 131 | async def play_audio(sid, file_name): 132 | if "audio_player" in self.modules: 133 | self.modules["audio_player"].API.play_audio(file_name) 134 | 135 | @sio.event 136 | async def pause_audio(sid): 137 | if "audio_player" in self.modules: 138 | self.modules["audio_player"].API.pause_audio() 139 | 140 | @sio.event 141 | async def resume_audio(sid): 142 | if "audio_player" in self.modules: 143 | self.modules["audio_player"].API.resume_audio() 144 | 145 | @sio.event 146 | async def abort_audio(sid): 147 | if "audio_player" in self.modules: 148 | self.modules["audio_player"].API.stop_playing() 149 | 150 | @sio.event 151 | async def set_custom_prompt(sid, data): 152 | if "custom_prompt" in self.modules: 153 | self.modules["custom_prompt"].API.set_prompt(data["prompt"], priority=int(data["priority"])) 154 | await sio.emit("get_custom_prompt", self.modules["custom_prompt"].API.get_prompt()) 155 | 156 | @sio.event 157 | async def clear_short_term(sid): 158 | if "memory" in self.modules: 159 | self.modules["memory"].API.clear_short_term() 160 | await sio.emit("get_memories", self.modules["memory"].API.get_memories()) 161 | 162 | @sio.event 163 | async def import_json(sid): 164 | if "memory" in self.modules: 165 | self.modules["memory"].API.import_json() 166 | 167 | @sio.event 168 | async def export_json(sid): 169 | if "memory" in self.modules: 170 | self.modules["memory"].API.export_json() 171 | 172 | @sio.event 173 | async def delete_memory(sid, data): 174 | if "memory" in self.modules: 175 | self.modules["memory"].API.delete_memory(data) 176 | await sio.emit("get_memories", self.modules["memory"].API.get_memories()) 177 | 178 | @sio.event 179 | async def get_memories(sid, data): 180 | if "memory" in self.modules: 181 | await sio.emit("get_memories", self.modules["memory"].API.get_memories(data)) 182 | 183 | @sio.event 184 | async def create_memory(sid, data): 185 | if "memory" in self.modules: 186 | self.modules["memory"].API.create_memory(data) 187 | await sio.emit("get_memories", self.modules["memory"].API.get_memories()) 188 | 189 | # When a new client connects, send them the status of everything 190 | @sio.event 191 | async def connect(sid, environ): 192 | # Set signals to themselves to trigger setter function and the sio.emit 193 | self.signals.AI_thinking = self.signals.AI_thinking 194 | self.signals.AI_speaking = self.signals.AI_speaking 195 | self.signals.human_speaking = self.signals.human_speaking 196 | self.signals.recentTwitchMessages = self.signals.recentTwitchMessages 197 | await sio.emit("patience_update", {"crr_time": time.time() - self.signals.last_message_time, "total_time": PATIENCE}) 198 | await sio.emit('get_blacklist', self.llmWrapper.API.get_blacklist()) 199 | 200 | if "twitch" in self.modules: 201 | await sio.emit('twitch_status', self.modules["twitch"].API.get_twitch_status()) 202 | if "audio_player" in self.modules: 203 | await sio.emit('audio_list', self.modules["audio_player"].API.get_audio_list()) 204 | if "vtube_studio" in self.modules: 205 | await sio.emit('movement_status', self.modules["vtube_studio"].API.get_movement_status()) 206 | self.modules["vtube_studio"].API.get_hotkeys() 207 | if "custom_prompt" in self.modules: 208 | await sio.emit('get_custom_prompt', self.modules["custom_prompt"].API.get_prompt()) 209 | if "multimodal" in self.modules: 210 | await sio.emit('multimodal_status', self.modules["multimodal"].API.get_multimodal_status()) 211 | 212 | # Collect the enabled status of the llm, tts, stt, and movement and send it to the client 213 | await sio.emit('LLM_status', self.llmWrapper.API.get_LLM_status()) 214 | await sio.emit('TTS_status', self.tts.API.get_TTS_status()) 215 | await sio.emit('STT_status', self.stt.API.get_STT_status()) 216 | 217 | @sio.event 218 | def disconnect(sid): 219 | print('Client disconnected') 220 | 221 | async def send_messages(): 222 | while True: 223 | if self.signals.terminate: 224 | raise GracefulExit 225 | 226 | while not self.signals.sio_queue.empty(): 227 | event, data = self.signals.sio_queue.get() 228 | # print(f"Sending {event} with {data}") 229 | await sio.emit(event, data) 230 | await sio.sleep(0.1) 231 | 232 | async def init_app(): 233 | sio.start_background_task(send_messages) 234 | return app 235 | 236 | web.run_app(init_app()) 237 | -------------------------------------------------------------------------------- /songs/.gitignore: -------------------------------------------------------------------------------- 1 | # Place any mp3 or wav files that you want to play here. 2 | 3 | * 4 | !.gitignore -------------------------------------------------------------------------------- /streamingSink.py: -------------------------------------------------------------------------------- 1 | import io 2 | from discord.sinks.core import Filters, Sink, default_filters, AudioData 3 | 4 | 5 | class StreamingSink(Sink): 6 | """A custom sink that will convert the audio to 7 | 8 | """ 9 | 10 | def __init__(self, signals, stt, filters=None): 11 | if filters is None: 12 | filters = default_filters 13 | self.filters = filters 14 | Filters.__init__(self, **self.filters) 15 | 16 | self.encoding = "pcm" 17 | self.vc = None 18 | self.audio_data = {} 19 | 20 | self.signals = signals 21 | self.stt = stt 22 | 23 | # Override the write method to instead stream the audio elsewhere 24 | @Filters.container 25 | def write(self, data, user): 26 | print("Receiving voice") 27 | if user not in self.audio_data: 28 | file = io.BytesIO() 29 | self.audio_data.update({user: AudioData(file)}) 30 | 31 | file = self.audio_data[user] 32 | file.write(data) 33 | 34 | # # Save sound data to AudioSegment object 35 | # sound = AudioSegment( 36 | # # raw audio data (bytes) 37 | # data=data, 38 | # # 2 byte (16 bit) samples 39 | # sample_width=2, 40 | # # 48 kHz frame rate 41 | # frame_rate=48000, 42 | # # stereo 43 | # channels=2 44 | # ) 45 | # # Convert sound to mono 46 | # sound = sound.set_channels(1) 47 | # # Convert sound to 16khz 48 | # sound = sound.set_frame_rate(16000) 49 | # # Send the 16bit 16khz mono PCM audio data to STT 50 | # if self.signals.stt_ready: 51 | # self.stt.feed_audio(sound.raw_data) 52 | # print("FEEDING AUDIO") 53 | 54 | def format_audio(self, audio): 55 | return 56 | -------------------------------------------------------------------------------- /stt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from RealtimeSTT import AudioToTextRecorder 4 | from constants import * 5 | 6 | 7 | class STT: 8 | def __init__(self, signals): 9 | self.recorder = None 10 | self.signals = signals 11 | self.API = self.API(self) 12 | self.enabled = True 13 | 14 | def process_text(self, text): 15 | if not self.enabled: 16 | return 17 | 18 | print("STT OUTPUT: " + text) 19 | self.signals.history.append({"role": "user", "content": text}) 20 | 21 | self.signals.last_message_time = time.time() 22 | if not self.signals.AI_speaking: 23 | self.signals.new_message = True 24 | 25 | def recording_start(self): 26 | self.signals.human_speaking = True 27 | 28 | def recording_stop(self): 29 | self.signals.human_speaking = False 30 | 31 | def feed_audio(self, data): 32 | self.recorder.feed_audio(data) 33 | 34 | def listen_loop(self): 35 | print("STT Starting") 36 | recorder_config = { 37 | 'spinner': False, 38 | 'language': 'en', 39 | 'use_microphone': True, 40 | 'input_device_index': INPUT_DEVICE_INDEX, 41 | 'silero_sensitivity': 0.6, 42 | 'silero_use_onnx': True, 43 | 'post_speech_silence_duration': 0.4, 44 | 'min_length_of_recording': 0, 45 | 'min_gap_between_recordings': 0.2, 46 | 'enable_realtime_transcription': True, 47 | 'realtime_processing_pause': 0.2, 48 | 'realtime_model_type': 'tiny.en', 49 | 'compute_type': 'auto', 50 | 'on_recording_start': self.recording_start, 51 | 'on_recording_stop': self.recording_stop, 52 | 'level': logging.ERROR 53 | } 54 | 55 | with AudioToTextRecorder(**recorder_config) as recorder: 56 | self.recorder = recorder 57 | print("STT Ready") 58 | self.signals.stt_ready = True 59 | while not self.signals.terminate: 60 | if not self.enabled: 61 | time.sleep(0.2) 62 | continue 63 | recorder.text(self.process_text) 64 | 65 | class API: 66 | def __init__(self, outer): 67 | self.outer = outer 68 | 69 | def set_STT_status(self, status): 70 | self.outer.enabled = status 71 | self.outer.signals.sio_queue.put(('STT_status', status)) 72 | 73 | def get_STT_status(self): 74 | return self.outer.enabled 75 | 76 | def shutdown(self): 77 | self.outer.recorder.stop() 78 | self.outer.recorder.interrupt_stop_event.set() 79 | -------------------------------------------------------------------------------- /tts.py: -------------------------------------------------------------------------------- 1 | import time 2 | from RealtimeTTS import TextToAudioStream, CoquiEngine 3 | from constants import * 4 | 5 | 6 | class TTS: 7 | def __init__(self, signals): 8 | self.stream = None 9 | self.signals = signals 10 | self.API = self.API(self) 11 | self.enabled = True 12 | 13 | engine = CoquiEngine( 14 | use_deepspeed=True, 15 | voice="./voices/" + VOICE_REFERENCE, 16 | speed=1.1, 17 | ) 18 | tts_config = { 19 | 'on_audio_stream_start': self.audio_started, 20 | 'on_audio_stream_stop': self.audio_ended, 21 | 'output_device_index': OUTPUT_DEVICE_INDEX, 22 | } 23 | self.stream = TextToAudioStream(engine, **tts_config) 24 | self.signals.tts_ready = True 25 | 26 | def play(self, message): 27 | if not self.enabled: 28 | return 29 | 30 | # If the message is only whitespace, don't attempt to play it 31 | if not message.strip(): 32 | return 33 | 34 | self.signals.sio_queue.put(("current_message", message)) 35 | self.stream.feed(message) 36 | self.stream.play_async() 37 | 38 | def stop(self): 39 | self.stream.stop() 40 | self.signals.AI_speaking = False 41 | 42 | def audio_started(self): 43 | self.signals.AI_speaking = True 44 | 45 | def audio_ended(self): 46 | self.signals.last_message_time = time.time() 47 | self.signals.AI_speaking = False 48 | 49 | class API: 50 | def __init__(self, outer): 51 | self.outer = outer 52 | 53 | def set_TTS_status(self, status): 54 | self.outer.enabled = status 55 | if not status: 56 | self.outer.stop() 57 | self.outer.signals.sio_queue.put(('TTS_status', status)) 58 | 59 | def get_TTS_status(self): 60 | return self.outer.enabled 61 | 62 | def abort_current(self): 63 | self.outer.stop() 64 | -------------------------------------------------------------------------------- /utils/listAudioDevices.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | 3 | py_audio = pyaudio.PyAudio() 4 | info = py_audio.get_host_api_info_by_index(0) 5 | 6 | # List all devices 7 | 8 | # Mics 9 | print("Microphones:") 10 | for i in range(0, info.get('deviceCount')): 11 | # Check number of input channels 12 | # (If there is at least 1 input channel, then it is suitable as a microphone) 13 | if py_audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels') > 0: 14 | print(str(i) + " " + py_audio.get_device_info_by_host_api_device_index(0, i).get('name')) 15 | 16 | # Speakers 17 | print("Speakers:") 18 | for i in range(0, info.get('deviceCount')): 19 | # Check number of input channels 20 | # (If there is at least 1 input channel, then it is suitable as a microphone) 21 | if py_audio.get_device_info_by_host_api_device_index(0, i).get('maxOutputChannels') > 0: 22 | print(str(i) + " " + py_audio.get_device_info_by_host_api_device_index(0, i).get('name')) 23 | -------------------------------------------------------------------------------- /voices/.gitignore: -------------------------------------------------------------------------------- 1 | # Place your voice reference wav files here. 2 | 3 | * 4 | !.gitignore --------------------------------------------------------------------------------