├── README.md ├── button.py ├── narrator.ipynb └── streamed_text_plus_streamed_audio.py /README.md: -------------------------------------------------------------------------------- 1 |

LLM Experiments 🧪

2 | 3 |

4 | 5 | Twitter URL 6 | 7 | 8 | Say Hi 9 | 10 | 11 | Say Hi 12 | 13 |

14 | 15 |

This is a personal repository with different LLM-based projects I've built. Click on 'Demo video' to see a video demonstration of every one of the projects in action! Also consider installing Double.bot, this is the AI copilot I use to build these projects.

16 | 17 | 18 | ____ 19 | 20 | ### 1. Button.py 21 | This file launches a simple UI with a textbox for inputs and a button. All you have to do is write some text into the textbox and push the button, then this message is sent to OpenAI's Text to speech (TTS) endpoint. You can have your message either streamed back (basically it will start playing before the full audio output is done being created), or not streamed (it will not play until the full TTS audio file has been created). 22 | 23 | The purpose of this file is to show how much of an advantage streaming provides, plus provide basic code to implement streaming on your own. For some reason OpenAI mentions it on their documentation as something you can do, but provides no working examples, so here's mine for you :) 24 | 25 | 26 | ### 2. narrator.ipynb 27 | This script takes an mp4 file in your local directory, and generates a narration of the contents in the video. 28 | Make sure to tweak the system prompt for best results. 29 | 30 | 31 | ### 3. streamed_text_plus_streamed_audio.py 32 | In this file we make use of two different types of streaming. 33 | First, you have to input a message (try: How are you?). Then, this message is sent to gpt-3.5-turbo to generate a response. The response it generates is streamed into TTS (I chunk it so every time there is a period or ?! it sends a request to TTS), and the audio from TTS is streamed back as seen in the button.py file. 34 | -------------------------------------------------------------------------------- /button.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtWidgets import QApplication, QPushButton, QVBoxLayout, QWidget, QLineEdit, QLabel 2 | import sys 3 | 4 | # Import necessary libraries and functions (Same as in your original code) 5 | import requests 6 | import pyaudio 7 | import soundfile as sf 8 | import io 9 | import time 10 | from dotenv import load_dotenv 11 | from openai import OpenAI 12 | from pydub import AudioSegment 13 | from pydub.playback import play 14 | import pydub 15 | import pygame 16 | load_dotenv() 17 | 18 | 19 | def streamed_audio(input_text, model='tts-1', voice='alloy'): 20 | start_time = time.time() 21 | # OpenAI API endpoint and parameters 22 | url = "https://api.openai.com/v1/audio/speech" 23 | headers = { 24 | "Authorization": 'Bearer YOUR_API_KEY', # Replace with your API key 25 | } 26 | 27 | data = { 28 | "model": model, 29 | "input": input_text, 30 | "voice": voice, 31 | "response_format": "opus", 32 | } 33 | 34 | audio = pyaudio.PyAudio() 35 | 36 | def get_pyaudio_format(subtype): 37 | if subtype == 'PCM_16': 38 | return pyaudio.paInt16 39 | return pyaudio.paInt16 40 | 41 | with requests.post(url, headers=headers, json=data, stream=True) as response: 42 | if response.status_code == 200: 43 | buffer = io.BytesIO() 44 | for chunk in response.iter_content(chunk_size=4096): 45 | buffer.write(chunk) 46 | 47 | buffer.seek(0) 48 | 49 | with sf.SoundFile(buffer, 'r') as sound_file: 50 | format = get_pyaudio_format(sound_file.subtype) 51 | channels = sound_file.channels 52 | rate = sound_file.samplerate 53 | 54 | stream = audio.open(format=format, channels=channels, rate=rate, output=True) 55 | chunk_size = 1024 56 | data = sound_file.read(chunk_size, dtype='int16') 57 | print(f"Time to play: {time.time() - start_time} seconds") 58 | 59 | while len(data) > 0: 60 | stream.write(data.tobytes()) 61 | data = sound_file.read(chunk_size, dtype='int16') 62 | 63 | stream.stop_stream() 64 | stream.close() 65 | else: 66 | print(f"Error: {response.status_code} - {response.text}") 67 | 68 | audio.terminate() 69 | 70 | return f"Time to play: {time.time() - start_time} seconds" 71 | 72 | # Example usage 73 | #print(play_text_as_audio("Nuclear energy is clean energy!")) 74 | 75 | def not_streamed(input_text, model='tts-1', voice='alloy'): 76 | start_time = time.time() 77 | 78 | # Initialize Pygame Mixer 79 | pygame.mixer.init() 80 | 81 | client = OpenAI() 82 | 83 | response = client.audio.speech.create( 84 | model=model, 85 | voice=voice, 86 | input=input_text, 87 | ) 88 | 89 | response.stream_to_file("output.opus") 90 | 91 | # Load and play the audio file 92 | pygame.mixer.music.load('output.opus') 93 | print(f"Time to play: {time.time() - start_time} seconds") 94 | pygame.mixer.music.play() 95 | 96 | # Loop to keep the script running during playback 97 | while pygame.mixer.music.get_busy(): 98 | pygame.time.Clock().tick(10) 99 | 100 | 101 | # # Example usage 102 | # print(play_text_as_audio("Nuclear energy is clean energy!")) 103 | 104 | 105 | def run_streamed(): 106 | input_text = text_box.text() 107 | streamed_audio(input_text) # Call the streamed_audio function with input text 108 | 109 | def run_not_streamed(): 110 | input_text = text_box.text() 111 | not_streamed(input_text) # Call the not_streamed function with input text 112 | 113 | app = QApplication(sys.argv) 114 | 115 | window = QWidget() 116 | window.setWindowTitle('Text to Speech') 117 | 118 | layout = QVBoxLayout() 119 | 120 | # Caption 121 | caption = QLabel('Text to Speech') 122 | layout.addWidget(caption) 123 | 124 | # Textbox 125 | text_box = QLineEdit() 126 | layout.addWidget(text_box) 127 | 128 | # Streamed Button 129 | streamed_button = QPushButton('Streamed') 130 | streamed_button.clicked.connect(run_streamed) # Link button click to streamed_audio function 131 | layout.addWidget(streamed_button) 132 | 133 | # Not Streamed Button 134 | not_streamed_button = QPushButton('Not Streamed') 135 | not_streamed_button.clicked.connect(run_not_streamed) # Link button click to not_streamed function 136 | layout.addWidget(not_streamed_button) 137 | 138 | window.setLayout(layout) 139 | 140 | window.show() 141 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /streamed_text_plus_streamed_audio.py: -------------------------------------------------------------------------------- 1 | import requests, pyaudio, time, pygame, threading, queue, tempfile 2 | import soundfile as sf 3 | from dotenv import load_dotenv 4 | from openai import OpenAI 5 | load_dotenv() 6 | is_first_audio_played = False # Flag to check if the first audio has been played 7 | client = OpenAI() 8 | 9 | # Queues for audio generation and playback 10 | audio_generation_queue = queue.Queue() 11 | audio_playback_queue = queue.Queue() 12 | 13 | # Initialize Pygame Mixer at the start 14 | pygame.mixer.init() 15 | 16 | def process_audio_generation_queue(): 17 | while True: 18 | input_text = audio_generation_queue.get() 19 | if input_text is None: 20 | break 21 | audio_file_path = generate_audio(input_text) 22 | audio_playback_queue.put(audio_file_path) 23 | audio_generation_queue.task_done() 24 | 25 | def process_audio_playback_queue(): 26 | #time.sleep(10) #Debug Only 27 | while True: 28 | audio_file_path = audio_playback_queue.get() 29 | if audio_file_path is None: 30 | #print("No audio file path found") #Debug Only 31 | break 32 | #print(audio_file_path) #Debug Only 33 | play_audio(audio_file_path) 34 | audio_playback_queue.task_done() 35 | 36 | # Threads for processing the audio generation and playback queues 37 | audio_generation_thread = threading.Thread(target=process_audio_generation_queue) 38 | audio_generation_thread.start() 39 | 40 | audio_playback_thread = threading.Thread(target=process_audio_playback_queue) 41 | audio_playback_thread.start() 42 | 43 | def generate_audio(input_text, model='tts-1', voice='alloy'): 44 | url = "https://api.openai.com/v1/audio/speech" 45 | headers = { 46 | "Authorization": 'Bearer YOUR_API_KEY' # Replace with your actual API key 47 | } 48 | data = { 49 | "model": model, 50 | "input": input_text, 51 | "voice": voice, 52 | "response_format": "opus", 53 | } 54 | 55 | with requests.post(url, headers=headers, json=data, stream=True) as response: 56 | if response.status_code == 200: 57 | # Create a temporary file to store the audio 58 | with tempfile.NamedTemporaryFile(delete=False, suffix='.opus') as temp_file: 59 | for chunk in response.iter_content(chunk_size=4096): 60 | temp_file.write(chunk) 61 | #print(temp_file.name) #Debug Only 62 | return temp_file.name 63 | else: 64 | print(f"Error: {response.status_code} - {response.text}") 65 | return None 66 | 67 | def play_audio(audio_file_path): 68 | if audio_file_path: 69 | # Calculate the time elapsed since the start of the script 70 | elapsed_time = time.time() - start_time 71 | print(f"Time taken to start playing audio clip: {elapsed_time} seconds") 72 | #print("Attempting to play audio.") #Debug Only 73 | with sf.SoundFile(audio_file_path, 'r') as sound_file: 74 | audio = pyaudio.PyAudio() 75 | stream = audio.open(format=pyaudio.paInt16, channels=sound_file.channels, rate=sound_file.samplerate, output=True) 76 | data = sound_file.read(1024,dtype='int16') 77 | 78 | while len(data) > 0: 79 | stream.write(data.tobytes()) 80 | data = sound_file.read(102,dtype='int16') 81 | 82 | stream.stop_stream() 83 | stream.close() 84 | audio.terminate() 85 | 86 | def print_w_stream(message): 87 | completion = client.chat.completions.create( 88 | model='gpt-3.5-turbo', 89 | messages=[ 90 | {"role": "system", "content": "You are a friendly AI assistant."}, 91 | {"role": "user", "content": message}, 92 | ], 93 | stream=True, 94 | temperature=0, #Set to 0 for benchmarking 95 | max_tokens=500, 96 | ) 97 | 98 | sentence = '' 99 | sentences = [] 100 | sentence_end_chars = {'.', '?', '!', '\n'} 101 | 102 | for chunk in completion: 103 | content = chunk.choices[0].delta.content 104 | if content is not None: 105 | for char in content: 106 | sentence += char 107 | if char in sentence_end_chars: 108 | sentence = sentence.strip() 109 | if sentence and sentence not in sentences: 110 | sentences.append(sentence) 111 | audio_generation_queue.put(sentence) 112 | print(f"Queued sentence: {sentence}") # Logging queued sentence 113 | sentence = '' 114 | return sentences 115 | 116 | def cleanup_queues(): 117 | audio_generation_queue.join() # Wait for audio generation queue to be empty 118 | audio_generation_queue.put(None) # Signal the end of audio generation 119 | audio_playback_queue.join() # Wait for audio playback queue to be empty 120 | audio_playback_queue.put(None) # Signal the end of audio playback 121 | 122 | # Prompt the user for input 123 | user_input = input("What do you want to ask the AI? ") 124 | start_time = time.time() # Record the start time 125 | 126 | 127 | print_w_stream(user_input) 128 | 129 | cleanup_queues() # Initiate the cleanup process 130 | 131 | audio_generation_thread.join() # Wait for the audio generation thread to finish 132 | audio_playback_thread.join() # Wait for the audio playback thread to finish 133 | pygame.mixer.quit() # Close the Pygame mixer --------------------------------------------------------------------------------