├── ElevenLabsNicole.wav ├── README.md ├── _elevenlabs.py ├── _langchain.py ├── app.py ├── audioGen.py └── requirements.txt /ElevenLabsNicole.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tyemalshara/QuakeAI/9bcebedfcf1cc75dd13c345381695cbbb80b269e/ElevenLabsNicole.wav -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # QuakeAI 2 | 3 | QuakeAI is an Audiobook Generator that enables Authors, Writers and live Streamers/Broadcasters to generate Spoken stories with AI generated background music that brings life to it. QakeAI is leveraging the power of LLMs, Music Generations models and Voice Generation model to enable users to have to only provide and idea of a story or a story they've written themselves and make an Audiobook with amazing background music effects out of it. Authors and writers would never believe how easy it is to turn their stories written on papers to an audio spoken with their own voice or a premade one with high quality background music and publish it on Audible within a click of a button! Content creators of shorts & reels will generate music for their videos without worrying about demonetization or DMCA takedowns. Authors can brainstorm shorts stories with other author through a chat room and QuakeAI would make an Audiobook out of it. Try QuakeAI now to be amazed with it. 4 | -------------------------------------------------------------------------------- /_elevenlabs.py: -------------------------------------------------------------------------------- 1 | # Here we are importing the necessary functions from elevenlabs library. 2 | from elevenlabs import generate, set_api_key # voices, play 3 | from elevenlabs.api import History 4 | import os 5 | 6 | 7 | # Here we are setting up the ElevenLabs API key. 8 | # os.environ['ELEVENLABS_API_KEY'] = 'a77232a2653ddfkjff03fgluasfc1f2' 9 | # set_api_key(os.environ.get("ELEVENLABS_API_KEY")) 10 | 11 | # Here we are creating a function to generate audio for podcast with premade voices. This voices are already trained and default in ElevenLabs. 12 | def with_premade_voice(prompt, elevenlabs_api_key): 13 | 14 | os.environ['ELEVENLABS_API_KEY'] = f'{elevenlabs_api_key}' 15 | set_api_key(os.environ.get("ELEVENLABS_API_KEY")) 16 | audio_path = f'Nicole_whisper.mp3' 17 | 18 | audio = generate( 19 | text=prompt, 20 | voice='Nicole', 21 | model="eleven_monolingual_v1" 22 | ) 23 | 24 | try: 25 | with open(audio_path, 'wb') as f: 26 | f.write(audio) 27 | 28 | print("ElevenLabs -> Completion:\n") 29 | return audio_path 30 | 31 | except Exception as e: 32 | print(e) 33 | 34 | return "" 35 | 36 | # with_premade_voice(prompt="Theres llama") -------------------------------------------------------------------------------- /_langchain.py: -------------------------------------------------------------------------------- 1 | from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel 2 | from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc 3 | from clarifai_grpc.grpc.api.status import status_code_pb2 4 | 5 | from audioGen import audioGen 6 | import os 7 | 8 | # Your PAT (Personal Access Token) can be found in the portal under Authentification in clarifai 9 | os.environ['PAT'] = 'aca3bbf0c7c446f99d7e30fc894bd61b' 10 | 11 | # Specify the correct user_id/app_id pairings 12 | # Since you're making inferences outside your app's scope 13 | USER_ID = 'meta' 14 | APP_ID = 'Llama-2' 15 | # Change these to whatever model and text URL you want to use 16 | MODEL_ID = 'llama2-13b-chat' 17 | MODEL_VERSION_ID = '79a1af31aa8249a99602fc05687e8f40' 18 | # Here we are creating a prompt template for the Llama2. 19 | # STORY_LINES = "There's a llama in my garden. It's eating all my flowers. I'm going to call the police." 20 | # RAW_TEXT = f"[INST] <>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.<>Given this Story:'{STORY_LINES}' write a very short prompt describing a melody that would fit the story. Here's a template for a prompt: 110bpm 64kbps 16khz lofi hiphop summer smooth [/INST]" 21 | 22 | # Here we are creating a function to generate a prompt for the AudioGen model. 23 | # The function takes three parameters: `MODEL_ID`, `MODEL_VERSION_ID` and `RAW_TEXT`. 24 | # The `MODEL_ID` parameter is the model you want to use. 25 | # The `MODEL_VERSION_ID` parameter is the version of the model. 26 | # The `RAW_TEXT` parameter is the instuction to make the prompt 27 | def get_response(STORY_LINES): 28 | RAW_TEXT = f"[INST] <>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.<>Given this Story:'{STORY_LINES}' write a very short prompt of one sentence describing a melody that would fit the story. Here's an example for a prompt I expect you answer me with: 110bpm 64kbps 16khz lofi hiphop summer smooth [/INST]" 29 | # Here we are creating a LLM (Llama2 in our case). 30 | channel = ClarifaiChannel.get_grpc_channel() 31 | stub = service_pb2_grpc.V2Stub(channel) 32 | 33 | metadata = (('authorization', 'Key ' + os.environ.get("PAT")),) 34 | 35 | userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) 36 | post_model_outputs_response = stub.PostModelOutputs( 37 | service_pb2.PostModelOutputsRequest( 38 | user_app_id=userDataObject, # The userDataObject is created in the overview and is required when using a PAT 39 | model_id=MODEL_ID, 40 | version_id=MODEL_VERSION_ID, # This is optional. Defaults to the latest model version 41 | inputs=[ 42 | resources_pb2.Input( 43 | data=resources_pb2.Data( 44 | text=resources_pb2.Text( 45 | raw=RAW_TEXT 46 | ) 47 | ) 48 | ) 49 | ] 50 | ), 51 | metadata=metadata 52 | ) 53 | if post_model_outputs_response.status.code != status_code_pb2.SUCCESS: 54 | print(post_model_outputs_response.status) 55 | raise Exception(f"Post model outputs failed, status: {post_model_outputs_response.status.description}") 56 | 57 | # Since we have one input, one output will exist here 58 | output = post_model_outputs_response.outputs[0] 59 | response = output.data.text.raw # This is the response from the LLM (Llama2 in our case). 60 | 61 | # audioGen(output.data.text.raw) # This takes the response from the LLM and generates a background music for the storyline. 62 | 63 | print("Completion:\n") 64 | 65 | 66 | return response 67 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # Import from standard library 2 | import os 3 | import logging 4 | from moviepy.editor import AudioFileClip, CompositeAudioClip 5 | # Import from 3rd party libraries 6 | import streamlit as st 7 | 8 | # Import modules from the local package 9 | from _langchain import get_response 10 | from _elevenlabs import with_premade_voice 11 | from audioGen import audioGen 12 | 13 | def generate_podcast_text(prompt): 14 | return get_response(STORY_LINES=prompt) 15 | 16 | def overlay_music(audio_path, music_path): 17 | # Open the first audio file 18 | audio = AudioFileClip(rf'{audio_path}') 19 | 20 | # Open the second audio file 21 | music = AudioFileClip(rf'{music_path}') 22 | 23 | # Combine the two audio files 24 | combined_audio = CompositeAudioClip([audio, music]) 25 | combined_audio.write_audiofile(r'combined_3.wav', fps=44100) 26 | combined_path = r'combined_3.wav' 27 | return combined_path 28 | 29 | def generate_podcast(prompt, elevenlabs_api_key): 30 | 31 | if prompt == "": 32 | st.session_state.text_error = "Please enter a prompt." 33 | return 34 | 35 | with text_spinner_placeholder: 36 | with st.spinner("Please wait while we process your query..."): 37 | g_podcast = generate_podcast_text(prompt=prompt) 38 | 39 | st.session_state.podcast_generate = (g_podcast) 40 | 41 | with text_spinner_placeholder: 42 | with st.spinner("Please wait while we process your query..."): 43 | music_path = audioGen(text=st.session_state.podcast_generate) # This takes the response from the LLM and generates a background music for the storyline. 44 | audio_path = with_premade_voice(prompt=prompt, elevenlabs_api_key=elevenlabs_api_key ) # This takes the response from the LLM and generates a voice to read out the storyline. 45 | audio_path = r'ElevenLabsNicole.wav' 46 | combined_path = overlay_music(audio_path=audio_path, music_path=music_path) # This takes the both of the audio files and combines them into one audio file. 47 | if music_path != "": 48 | st.session_state.output_file_path = combined_path 49 | 50 | def generate_podcast_demo(prompt): 51 | 52 | if prompt == "": 53 | st.session_state.text_error = "Please enter a prompt." 54 | return 55 | 56 | with text_spinner_placeholder: 57 | with st.spinner("Please wait while we process your query..."): 58 | g_podcast = generate_podcast_text(prompt=prompt) 59 | 60 | st.session_state.podcast_generate = (g_podcast) 61 | 62 | with text_spinner_placeholder: 63 | with st.spinner("Please wait while we process your query..."): 64 | music_path = audioGen(text=st.session_state.podcast_generate) # This takes the response from the LLM and generates a background music for the storyline. 65 | # audio_path = with_premade_voice(prompt=prompt, elevenlabs_api_key=elevenlabs_api_key ) # This takes the response from the LLM and generates a voice to read out the storyline. 66 | audio_path = r'C:\Users\Administrator\Documents\Python\Llama2Hackathon\ElevenLabsNicole.wav' 67 | combined_path = overlay_music(audio_path=audio_path, music_path=music_path) # This takes the both of the audio files and combines them into one audio file. 68 | if music_path != "": 69 | st.session_state.output_file_path = combined_path 70 | 71 | 72 | # Configure logger 73 | logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True) 74 | 75 | # Configure Streamlit page and state 76 | st.set_page_config(page_title="QuakeAI", page_icon="🎧") 77 | 78 | # Store the initial value of widgets in session state 79 | if "podcast_generate" not in st.session_state: 80 | st.session_state.podcast_generate = "" 81 | 82 | if "output_file_path" not in st.session_state: 83 | st.session_state.output_file_path = "" 84 | 85 | if "input_file_path" not in st.session_state: 86 | st.session_state.input_file_path = "" 87 | 88 | if "text_error" not in st.session_state: 89 | st.session_state.text_error = "" 90 | 91 | if "visibility" not in st.session_state: 92 | st.session_state.visibility = "visible" 93 | 94 | # Force responsive layout for columns also on mobile 95 | st.write( 96 | """ 97 | 104 | """, 105 | unsafe_allow_html=True, 106 | ) 107 | 108 | # Give a title to the app 109 | st.title("Eleven Labs + Llama2 + Musicgen") 110 | 111 | # Give a description to the app 112 | st.markdown( 113 | "This is a demo of QuakeAI for the llama 2 hackathon with clarifai" 114 | ) 115 | 116 | # # Create a column layout to make UX better. 117 | # col1, col2 = st.columns(2) 118 | 119 | # # First one is for Twitch channel name: `Ex. lablabai`. 120 | # with col1: 121 | # twitch_channel = st.text_input(label="Twitch channel", placeholder="Ex. lablabai") 122 | 123 | # # Second one is for manual storyline input: `There's a llama in my garden. It's eating all my flowers. I'm going to call the police.`. 124 | # with col2: 125 | # # manual_storyline = st.text_input(label="manual storyline input", placeholder="Ex. There's a llama...") 126 | # st.button( 127 | # label="Generate Story through Twitch", # name on the button 128 | # help="Click to generate story", # hint text (on hover) 129 | # key="generate_story_twitch", # key to be used for the button 130 | # type="primary", # red default streamlit button 131 | # on_click=get_response, # function to be called on click 132 | # args=(twitch_channel), # arguments to be passed to the function 133 | # ) 134 | 135 | # Get ElevenLabs API key from user 136 | with st.sidebar: 137 | elevenlabs_api_key = st.text_input("ElevenLabs API key", value="", type="password") 138 | st.caption("*If you don't have an ElevenLabs API key, get it [here](https://elevenlabs.io/).*") 139 | 140 | # Create a text area to describe actual podcast topic, information or brief explanation. 141 | prompt = st.text_input(label="Story info", placeholder="Ex. There's a llama...") 142 | # Create a column layout to make UX better. 143 | col3, col4 = st.columns(2) 144 | 145 | with col3: 146 | st.button( 147 | label="Generate Story through Text Input Demo", # name on the button 148 | help="Click to generate story", # hint text (on hover) 149 | key="generate_story_user_demo", # key to be used for the button 150 | type="secondary", # red default streamlit button 151 | on_click=generate_podcast_demo, # function to be called on click 152 | args=[prompt], # arguments to be passed to the function 153 | ) 154 | # Create a button to generate podcast. 155 | with col4: 156 | if st.button( 157 | label="Generate Story through Text Input", # name on the button 158 | help="Click to generate story", # hint text (on hover) 159 | key="generate_story_user", # key to be used for the button 160 | type="primary", # red default streamlit button 161 | on_click=generate_podcast, # function to be called on click 162 | args=[prompt, elevenlabs_api_key], # arguments to be passed to the function 163 | ): 164 | # Validate inputs 165 | if not elevenlabs_api_key.strip(): 166 | st.error("Please provide the missing ElevenLabs API.") 167 | 168 | 169 | # Shows loading icon while podcast and audio are being generated 170 | text_spinner_placeholder = st.empty() 171 | 172 | # Shows error message if any error occurs 173 | if st.session_state.text_error: 174 | st.error(st.session_state.text_error) 175 | 176 | 177 | # Output generated podcast transcription 178 | if st.session_state.podcast_generate: 179 | st.markdown("""---""") 180 | st.subheader("Read Music Description") 181 | st.text_area(label="You may read Music Description while audio being generated.", value=st.session_state.podcast_generate,) 182 | 183 | 184 | # Output generated podcast audio 185 | if st.session_state.output_file_path: 186 | st.markdown("""---""") 187 | st.subheader("Listen to The Story") 188 | 189 | with open(st.session_state.output_file_path, "rb") as audio_file: 190 | audio_bytes = audio_file.read() 191 | 192 | st.audio(audio_bytes, format='audio/mp3', start_time=0) 193 | 194 | 195 | -------------------------------------------------------------------------------- /audioGen.py: -------------------------------------------------------------------------------- 1 | # Installation 2 | # !pip install --upgrade --quiet pip 3 | # !pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio] 4 | # !pip install scipy 5 | 6 | from transformers import MusicgenForConditionalGeneration 7 | from transformers import AutoProcessor 8 | import torch 9 | import scipy 10 | import re 11 | 12 | def audioGen(text): 13 | text = re.findall(r'"([^"]*)"', text)[0] 14 | model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") 15 | sampling_rate = model.config.audio_encoder.sampling_rate 16 | 17 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 18 | model.to(device) 19 | 20 | processor = AutoProcessor.from_pretrained("facebook/musicgen-small") 21 | 22 | inputs = processor( 23 | # text=["This playful tune features a bouncy, upbeat rhythm that captures the mischievous energy of a llama munching on flowers in a garden. The melody is lighthearted and whimsical, with a touch of mischief and mayhem thrown in for good measure. Imagine a jaunty flute or recorder solo, accompanied by a bouncy piano or accordion, with a sprinkle of silly sound effects to capture the llama's antics. The chorus might feature a fun, repetitive phrase like 'Llama, llama, eating all my flowers!' to emphasize the silly situation. Overall, 'The Llama's Garden Jig' is a fun and frolicksome tune that will put a smile on your face and transport you to a sunny, whimsical world of garden adventures"], 24 | text=[text], 25 | padding=True, 26 | return_tensors="pt", 27 | ) 28 | 29 | audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256) 30 | 31 | scipy.io.wavfile.write("musicgen_out_Llama.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy()) 32 | music_path = 'musicgen_out_Llama.wav' 33 | 34 | print("MusicGen -> Completion:\n") 35 | 36 | return music_path -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altair==5.0.1 2 | anvil-uplink==0.4.2 3 | asttokens==2.2.1 4 | attrs==23.1.0 5 | backcall==0.2.0 6 | blinker==1.6.2 7 | boto3==1.28.30 8 | botocore==1.31.30 9 | cachetools==5.3.1 10 | certifi==2023.7.22 11 | charset-normalizer==3.2.0 12 | clarifai==9.7.5 13 | clarifai-grpc==9.7.3 14 | click==8.1.7 15 | colorama==0.4.6 16 | decorator==4.4.2 17 | elevenlabs==0.2.21 18 | executing==1.2.0 19 | filelock==3.12.2 20 | fsspec==2023.6.0 21 | future==0.18.3 22 | gitdb==4.0.10 23 | GitPython==3.1.32 24 | google-api-core==2.11.1 25 | google-api-python-client==2.95.0 26 | google-auth==2.22.0 27 | google-auth-httplib2==0.1.0 28 | google-auth-oauthlib==1.0.0 29 | googleapis-common-protos==1.60.0 30 | grpcio==1.57.0 31 | httplib2==0.22.0 32 | huggingface-hub==0.16.4 33 | idna==3.4 34 | imageio==2.31.2 35 | imageio-ffmpeg==0.4.8 36 | importlib-metadata==6.8.0 37 | ipython==8.14.0 38 | isodate==0.6.1 39 | jedi==0.19.0 40 | Jinja2==3.1.2 41 | jmespath==1.0.1 42 | jsonschema==4.19.0 43 | jsonschema-specifications==2023.7.1 44 | lxml==4.9.2 45 | markdown-it-py==3.0.0 46 | MarkupSafe==2.1.3 47 | matplotlib-inline==0.1.6 48 | mdurl==0.1.2 49 | moviepy==1.0.3 50 | mpmath==1.3.0 51 | networkx==3.1 52 | numpy==1.25.1 53 | oauthlib==3.2.2 54 | packaging==23.1 55 | pandas==2.0.3 56 | parso==0.8.3 57 | pickleshare==0.7.5 58 | Pillow==9.5.0 59 | proglog==0.1.10 60 | prompt-toolkit==3.0.39 61 | protobuf==4.23.4 62 | pure-eval==0.2.2 63 | pyarrow==13.0.0 64 | pyasn1==0.5.0 65 | pyasn1-modules==0.3.0 66 | pycountry==22.3.5 67 | pycryptodome==3.17 68 | pydantic==1.10.12 69 | pydeck==0.8.0 70 | pydub==0.25.1 71 | Pygments==2.15.1 72 | Pympler==1.0.1 73 | pyparsing==3.1.1 74 | PySocks==1.7.1 75 | python-dateutil==2.8.2 76 | python-rapidjson==1.10 77 | pytz==2023.3 78 | pytz-deprecation-shim==0.1.0.post0 79 | PyYAML==6.0.1 80 | referencing==0.30.2 81 | regex==2023.6.3 82 | requests==2.31.0 83 | requests-oauthlib==1.3.1 84 | rich==13.4.2 85 | rpds-py==0.9.2 86 | rsa==4.9 87 | s3transfer==0.6.2 88 | safetensors==0.3.1 89 | scipy==1.11.2 90 | six==1.16.0 91 | smmap==5.0.0 92 | stack-data==0.6.2 93 | streamlit==1.26.0 94 | sympy==1.12 95 | tenacity==8.2.3 96 | tokenizers==0.13.3 97 | toml==0.10.2 98 | toolz==0.12.0 99 | torch==2.0.1 100 | tornado==6.3.3 101 | tqdm==4.64.1 102 | traitlets==5.9.0 103 | transformers==4.31.0 104 | tritonclient==2.34.0 105 | typing_extensions==4.7.1 106 | tzdata==2023.3 107 | tzlocal==4.3.1 108 | uritemplate==4.1.1 109 | urllib3==1.26.16 110 | validators==0.21.2 111 | watchdog==3.0.0 112 | wcwidth==0.2.6 113 | websocket-client==1.5.1 114 | ws4py==0.5.1 115 | zipp==3.16.2 116 | --------------------------------------------------------------------------------