├── README.md
├── button.py
├── narrator.ipynb
└── streamed_text_plus_streamed_audio.py
/README.md:
--------------------------------------------------------------------------------
1 |
LLM Experiments 🧪
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | This is a personal repository with different LLM-based projects I've built. Click on 'Demo video' to see a video demonstration of every one of the projects in action! Also consider installing Double.bot, this is the AI copilot I use to build these projects.
16 |
17 |
18 | ____
19 |
20 | ### 1. Button.py
21 | This file launches a simple UI with a textbox for inputs and a button. All you have to do is write some text into the textbox and push the button, then this message is sent to OpenAI's Text to speech (TTS) endpoint. You can have your message either streamed back (basically it will start playing before the full audio output is done being created), or not streamed (it will not play until the full TTS audio file has been created).
22 |
23 | The purpose of this file is to show how much of an advantage streaming provides, plus provide basic code to implement streaming on your own. For some reason OpenAI mentions it on their documentation as something you can do, but provides no working examples, so here's mine for you :)
24 |
25 |
26 | ### 2. narrator.ipynb
27 | This script takes an mp4 file in your local directory, and generates a narration of the contents in the video.
28 | Make sure to tweak the system prompt for best results.
29 |
30 |
31 | ### 3. streamed_text_plus_streamed_audio.py
32 | In this file we make use of two different types of streaming.
33 | First, you have to input a message (try: How are you?). Then, this message is sent to gpt-3.5-turbo to generate a response. The response it generates is streamed into TTS (I chunk it so every time there is a period or ?! it sends a request to TTS), and the audio from TTS is streamed back as seen in the button.py file.
34 |
--------------------------------------------------------------------------------
/button.py:
--------------------------------------------------------------------------------
1 | from PyQt5.QtWidgets import QApplication, QPushButton, QVBoxLayout, QWidget, QLineEdit, QLabel
2 | import sys
3 |
4 | # Import necessary libraries and functions (Same as in your original code)
5 | import requests
6 | import pyaudio
7 | import soundfile as sf
8 | import io
9 | import time
10 | from dotenv import load_dotenv
11 | from openai import OpenAI
12 | from pydub import AudioSegment
13 | from pydub.playback import play
14 | import pydub
15 | import pygame
16 | load_dotenv()
17 |
18 |
19 | def streamed_audio(input_text, model='tts-1', voice='alloy'):
20 | start_time = time.time()
21 | # OpenAI API endpoint and parameters
22 | url = "https://api.openai.com/v1/audio/speech"
23 | headers = {
24 | "Authorization": 'Bearer YOUR_API_KEY', # Replace with your API key
25 | }
26 |
27 | data = {
28 | "model": model,
29 | "input": input_text,
30 | "voice": voice,
31 | "response_format": "opus",
32 | }
33 |
34 | audio = pyaudio.PyAudio()
35 |
36 | def get_pyaudio_format(subtype):
37 | if subtype == 'PCM_16':
38 | return pyaudio.paInt16
39 | return pyaudio.paInt16
40 |
41 | with requests.post(url, headers=headers, json=data, stream=True) as response:
42 | if response.status_code == 200:
43 | buffer = io.BytesIO()
44 | for chunk in response.iter_content(chunk_size=4096):
45 | buffer.write(chunk)
46 |
47 | buffer.seek(0)
48 |
49 | with sf.SoundFile(buffer, 'r') as sound_file:
50 | format = get_pyaudio_format(sound_file.subtype)
51 | channels = sound_file.channels
52 | rate = sound_file.samplerate
53 |
54 | stream = audio.open(format=format, channels=channels, rate=rate, output=True)
55 | chunk_size = 1024
56 | data = sound_file.read(chunk_size, dtype='int16')
57 | print(f"Time to play: {time.time() - start_time} seconds")
58 |
59 | while len(data) > 0:
60 | stream.write(data.tobytes())
61 | data = sound_file.read(chunk_size, dtype='int16')
62 |
63 | stream.stop_stream()
64 | stream.close()
65 | else:
66 | print(f"Error: {response.status_code} - {response.text}")
67 |
68 | audio.terminate()
69 |
70 | return f"Time to play: {time.time() - start_time} seconds"
71 |
72 | # Example usage
73 | #print(play_text_as_audio("Nuclear energy is clean energy!"))
74 |
75 | def not_streamed(input_text, model='tts-1', voice='alloy'):
76 | start_time = time.time()
77 |
78 | # Initialize Pygame Mixer
79 | pygame.mixer.init()
80 |
81 | client = OpenAI()
82 |
83 | response = client.audio.speech.create(
84 | model=model,
85 | voice=voice,
86 | input=input_text,
87 | )
88 |
89 | response.stream_to_file("output.opus")
90 |
91 | # Load and play the audio file
92 | pygame.mixer.music.load('output.opus')
93 | print(f"Time to play: {time.time() - start_time} seconds")
94 | pygame.mixer.music.play()
95 |
96 | # Loop to keep the script running during playback
97 | while pygame.mixer.music.get_busy():
98 | pygame.time.Clock().tick(10)
99 |
100 |
101 | # # Example usage
102 | # print(play_text_as_audio("Nuclear energy is clean energy!"))
103 |
104 |
105 | def run_streamed():
106 | input_text = text_box.text()
107 | streamed_audio(input_text) # Call the streamed_audio function with input text
108 |
109 | def run_not_streamed():
110 | input_text = text_box.text()
111 | not_streamed(input_text) # Call the not_streamed function with input text
112 |
113 | app = QApplication(sys.argv)
114 |
115 | window = QWidget()
116 | window.setWindowTitle('Text to Speech')
117 |
118 | layout = QVBoxLayout()
119 |
120 | # Caption
121 | caption = QLabel('Text to Speech')
122 | layout.addWidget(caption)
123 |
124 | # Textbox
125 | text_box = QLineEdit()
126 | layout.addWidget(text_box)
127 |
128 | # Streamed Button
129 | streamed_button = QPushButton('Streamed')
130 | streamed_button.clicked.connect(run_streamed) # Link button click to streamed_audio function
131 | layout.addWidget(streamed_button)
132 |
133 | # Not Streamed Button
134 | not_streamed_button = QPushButton('Not Streamed')
135 | not_streamed_button.clicked.connect(run_not_streamed) # Link button click to not_streamed function
136 | layout.addWidget(not_streamed_button)
137 |
138 | window.setLayout(layout)
139 |
140 | window.show()
141 | sys.exit(app.exec_())
--------------------------------------------------------------------------------
/streamed_text_plus_streamed_audio.py:
--------------------------------------------------------------------------------
1 | import requests, pyaudio, time, pygame, threading, queue, tempfile
2 | import soundfile as sf
3 | from dotenv import load_dotenv
4 | from openai import OpenAI
5 | load_dotenv()
6 | is_first_audio_played = False # Flag to check if the first audio has been played
7 | client = OpenAI()
8 |
9 | # Queues for audio generation and playback
10 | audio_generation_queue = queue.Queue()
11 | audio_playback_queue = queue.Queue()
12 |
13 | # Initialize Pygame Mixer at the start
14 | pygame.mixer.init()
15 |
16 | def process_audio_generation_queue():
17 | while True:
18 | input_text = audio_generation_queue.get()
19 | if input_text is None:
20 | break
21 | audio_file_path = generate_audio(input_text)
22 | audio_playback_queue.put(audio_file_path)
23 | audio_generation_queue.task_done()
24 |
25 | def process_audio_playback_queue():
26 | #time.sleep(10) #Debug Only
27 | while True:
28 | audio_file_path = audio_playback_queue.get()
29 | if audio_file_path is None:
30 | #print("No audio file path found") #Debug Only
31 | break
32 | #print(audio_file_path) #Debug Only
33 | play_audio(audio_file_path)
34 | audio_playback_queue.task_done()
35 |
36 | # Threads for processing the audio generation and playback queues
37 | audio_generation_thread = threading.Thread(target=process_audio_generation_queue)
38 | audio_generation_thread.start()
39 |
40 | audio_playback_thread = threading.Thread(target=process_audio_playback_queue)
41 | audio_playback_thread.start()
42 |
43 | def generate_audio(input_text, model='tts-1', voice='alloy'):
44 | url = "https://api.openai.com/v1/audio/speech"
45 | headers = {
46 | "Authorization": 'Bearer YOUR_API_KEY' # Replace with your actual API key
47 | }
48 | data = {
49 | "model": model,
50 | "input": input_text,
51 | "voice": voice,
52 | "response_format": "opus",
53 | }
54 |
55 | with requests.post(url, headers=headers, json=data, stream=True) as response:
56 | if response.status_code == 200:
57 | # Create a temporary file to store the audio
58 | with tempfile.NamedTemporaryFile(delete=False, suffix='.opus') as temp_file:
59 | for chunk in response.iter_content(chunk_size=4096):
60 | temp_file.write(chunk)
61 | #print(temp_file.name) #Debug Only
62 | return temp_file.name
63 | else:
64 | print(f"Error: {response.status_code} - {response.text}")
65 | return None
66 |
67 | def play_audio(audio_file_path):
68 | if audio_file_path:
69 | # Calculate the time elapsed since the start of the script
70 | elapsed_time = time.time() - start_time
71 | print(f"Time taken to start playing audio clip: {elapsed_time} seconds")
72 | #print("Attempting to play audio.") #Debug Only
73 | with sf.SoundFile(audio_file_path, 'r') as sound_file:
74 | audio = pyaudio.PyAudio()
75 | stream = audio.open(format=pyaudio.paInt16, channels=sound_file.channels, rate=sound_file.samplerate, output=True)
76 | data = sound_file.read(1024,dtype='int16')
77 |
78 | while len(data) > 0:
79 | stream.write(data.tobytes())
80 | data = sound_file.read(102,dtype='int16')
81 |
82 | stream.stop_stream()
83 | stream.close()
84 | audio.terminate()
85 |
86 | def print_w_stream(message):
87 | completion = client.chat.completions.create(
88 | model='gpt-3.5-turbo',
89 | messages=[
90 | {"role": "system", "content": "You are a friendly AI assistant."},
91 | {"role": "user", "content": message},
92 | ],
93 | stream=True,
94 | temperature=0, #Set to 0 for benchmarking
95 | max_tokens=500,
96 | )
97 |
98 | sentence = ''
99 | sentences = []
100 | sentence_end_chars = {'.', '?', '!', '\n'}
101 |
102 | for chunk in completion:
103 | content = chunk.choices[0].delta.content
104 | if content is not None:
105 | for char in content:
106 | sentence += char
107 | if char in sentence_end_chars:
108 | sentence = sentence.strip()
109 | if sentence and sentence not in sentences:
110 | sentences.append(sentence)
111 | audio_generation_queue.put(sentence)
112 | print(f"Queued sentence: {sentence}") # Logging queued sentence
113 | sentence = ''
114 | return sentences
115 |
116 | def cleanup_queues():
117 | audio_generation_queue.join() # Wait for audio generation queue to be empty
118 | audio_generation_queue.put(None) # Signal the end of audio generation
119 | audio_playback_queue.join() # Wait for audio playback queue to be empty
120 | audio_playback_queue.put(None) # Signal the end of audio playback
121 |
122 | # Prompt the user for input
123 | user_input = input("What do you want to ask the AI? ")
124 | start_time = time.time() # Record the start time
125 |
126 |
127 | print_w_stream(user_input)
128 |
129 | cleanup_queues() # Initiate the cleanup process
130 |
131 | audio_generation_thread.join() # Wait for the audio generation thread to finish
132 | audio_playback_thread.join() # Wait for the audio playback thread to finish
133 | pygame.mixer.quit() # Close the Pygame mixer
--------------------------------------------------------------------------------