├── ElevenLabsNicole.wav
├── README.md
├── _elevenlabs.py
├── _langchain.py
├── app.py
├── audioGen.py
└── requirements.txt


/ElevenLabsNicole.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tyemalshara/QuakeAI/9bcebedfcf1cc75dd13c345381695cbbb80b269e/ElevenLabsNicole.wav


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # QuakeAI
2 | 
3 | QuakeAI is an Audiobook Generator that enables Authors, Writers and live Streamers/Broadcasters to generate Spoken stories with AI generated background music that brings life to it. QakeAI is leveraging the power of LLMs, Music Generations models and Voice Generation model to enable users to have to only provide and idea of a story or a story they've written themselves and make an Audiobook with amazing background music effects out of it. Authors and writers would never believe how easy it is to turn their stories written on papers to an audio spoken with their own voice or a premade one with high quality background music and publish it on Audible within a click of a button! Content creators of shorts & reels will generate music for their videos without worrying about demonetization or DMCA takedowns. Authors can brainstorm shorts stories with other author through a chat room and QuakeAI would make an Audiobook out of it. Try QuakeAI now to be amazed with it.
4 | 


--------------------------------------------------------------------------------
/_elevenlabs.py:
--------------------------------------------------------------------------------
 1 | # Here we are importing the necessary functions from elevenlabs library.
 2 | from elevenlabs import generate, set_api_key     # voices, play
 3 | from elevenlabs.api import History
 4 | import os
 5 | 
 6 | 
 7 | # Here we are setting up the ElevenLabs API key.
 8 | # os.environ['ELEVENLABS_API_KEY'] = 'a77232a2653ddfkjff03fgluasfc1f2'
 9 | # set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
10 | 
11 | # Here we are creating a function to generate audio for podcast with premade voices. This voices are already trained and default in ElevenLabs.
12 | def with_premade_voice(prompt, elevenlabs_api_key):
13 |     
14 |     os.environ['ELEVENLABS_API_KEY'] = f'{elevenlabs_api_key}'
15 |     set_api_key(os.environ.get("ELEVENLABS_API_KEY"))
16 |     audio_path = f'Nicole_whisper.mp3'
17 | 
18 |     audio = generate(
19 |         text=prompt,
20 |         voice='Nicole',
21 |         model="eleven_monolingual_v1"
22 |     )
23 | 
24 |     try:
25 |         with open(audio_path, 'wb') as f:
26 |             f.write(audio)
27 | 
28 |         print("ElevenLabs -> Completion:\n")
29 |         return audio_path
30 |     
31 |     except Exception as e:
32 |         print(e)
33 | 
34 |         return ""
35 |     
36 | # with_premade_voice(prompt="Theres llama")


--------------------------------------------------------------------------------
/_langchain.py:
--------------------------------------------------------------------------------
 1 | from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
 2 | from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
 3 | from clarifai_grpc.grpc.api.status import status_code_pb2
 4 | 
 5 | from audioGen import audioGen
 6 | import os
 7 | 
 8 | # Your PAT (Personal Access Token) can be found in the portal under Authentification in clarifai
 9 | os.environ['PAT'] = 'aca3bbf0c7c446f99d7e30fc894bd61b'
10 | 
11 | # Specify the correct user_id/app_id pairings
12 | # Since you're making inferences outside your app's scope
13 | USER_ID = 'meta'
14 | APP_ID = 'Llama-2'
15 | # Change these to whatever model and text URL you want to use
16 | MODEL_ID = 'llama2-13b-chat'
17 | MODEL_VERSION_ID = '79a1af31aa8249a99602fc05687e8f40'
18 | # Here we are creating a prompt template for the Llama2. 
19 | # STORY_LINES = "There's a llama in my garden. It's eating all my flowers. I'm going to call the police."
20 | # RAW_TEXT = f"<s>[INST] <<SYS>>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.<</SYS>>Given this Story:'{STORY_LINES}' write a very short prompt describing a melody that would fit the story. Here's a template for a prompt: 110bpm 64kbps 16khz lofi hiphop summer smooth [/INST]"
21 | 
22 | # Here we are creating a function to generate a prompt for the AudioGen model.
23 | # The function takes three parameters: `MODEL_ID`, `MODEL_VERSION_ID` and `RAW_TEXT`.
24 | # The `MODEL_ID` parameter is the model you want to use.
25 | # The `MODEL_VERSION_ID` parameter is the version of the model.
26 | # The `RAW_TEXT` parameter is the instuction to make the prompt
27 | def get_response(STORY_LINES):
28 |     RAW_TEXT = f"<s>[INST] <<SYS>>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.<</SYS>>Given this Story:'{STORY_LINES}' write a very short prompt of one sentence describing a melody that would fit the story. Here's an example for a prompt I expect you answer me with: 110bpm 64kbps 16khz lofi hiphop summer smooth [/INST]"
29 |     # Here we are creating a LLM (Llama2 in our case).
30 |     channel = ClarifaiChannel.get_grpc_channel()
31 |     stub = service_pb2_grpc.V2Stub(channel)
32 | 
33 |     metadata = (('authorization', 'Key ' + os.environ.get("PAT")),)
34 | 
35 |     userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID)
36 |     post_model_outputs_response = stub.PostModelOutputs(
37 |         service_pb2.PostModelOutputsRequest(
38 |             user_app_id=userDataObject,  # The userDataObject is created in the overview and is required when using a PAT
39 |             model_id=MODEL_ID,
40 |             version_id=MODEL_VERSION_ID,  # This is optional. Defaults to the latest model version
41 |             inputs=[
42 |                 resources_pb2.Input(
43 |                     data=resources_pb2.Data(
44 |                         text=resources_pb2.Text(
45 |                             raw=RAW_TEXT
46 |                         )
47 |                     )
48 |                 )
49 |             ]
50 |         ),
51 |         metadata=metadata
52 |     )
53 |     if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
54 |         print(post_model_outputs_response.status)
55 |         raise Exception(f"Post model outputs failed, status: {post_model_outputs_response.status.description}")
56 | 
57 |     # Since we have one input, one output will exist here
58 |     output = post_model_outputs_response.outputs[0]
59 |     response = output.data.text.raw # This is the response from the LLM (Llama2 in our case).
60 |     
61 |     # audioGen(output.data.text.raw)  # This takes the response from the LLM and generates a background music for the storyline.
62 | 
63 |     print("Completion:\n")
64 | 
65 |     
66 |     return response
67 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | # Import from standard library
  2 | import os
  3 | import logging
  4 | from moviepy.editor import AudioFileClip, CompositeAudioClip
  5 | # Import from 3rd party libraries
  6 | import streamlit as st
  7 | 
  8 | # Import modules from the local package
  9 | from _langchain import get_response
 10 | from _elevenlabs import with_premade_voice
 11 | from audioGen import audioGen
 12 | 
 13 | def generate_podcast_text(prompt):
 14 |     return get_response(STORY_LINES=prompt)
 15 | 
 16 | def overlay_music(audio_path, music_path):
 17 |     # Open the first audio file
 18 |     audio = AudioFileClip(rf'{audio_path}')
 19 | 
 20 |     # Open the second audio file
 21 |     music = AudioFileClip(rf'{music_path}')
 22 | 
 23 |     # Combine the two audio files
 24 |     combined_audio = CompositeAudioClip([audio, music])
 25 |     combined_audio.write_audiofile(r'combined_3.wav', fps=44100)
 26 |     combined_path = r'combined_3.wav'
 27 |     return combined_path
 28 | 
 29 | def generate_podcast(prompt, elevenlabs_api_key):
 30 | 
 31 |     if prompt == "":
 32 |         st.session_state.text_error = "Please enter a prompt."
 33 |         return
 34 | 
 35 |     with text_spinner_placeholder:
 36 |         with st.spinner("Please wait while we process your query..."):
 37 |             g_podcast = generate_podcast_text(prompt=prompt)
 38 | 
 39 |             st.session_state.podcast_generate = (g_podcast)
 40 |             
 41 |     with text_spinner_placeholder:
 42 |         with st.spinner("Please wait while we process your query..."):
 43 |             music_path = audioGen(text=st.session_state.podcast_generate)   # This takes the response from the LLM and generates a background music for the storyline.
 44 |             audio_path = with_premade_voice(prompt=prompt, elevenlabs_api_key=elevenlabs_api_key )  # This takes the response from the LLM and generates a voice to read out the storyline.
 45 |             audio_path = r'ElevenLabsNicole.wav'
 46 |             combined_path = overlay_music(audio_path=audio_path, music_path=music_path)  # This takes the both of the audio files and combines them into one audio file.
 47 |             if music_path != "":
 48 |                 st.session_state.output_file_path = combined_path
 49 | 
 50 | def generate_podcast_demo(prompt):
 51 | 
 52 |     if prompt == "":
 53 |         st.session_state.text_error = "Please enter a prompt."
 54 |         return
 55 | 
 56 |     with text_spinner_placeholder:
 57 |         with st.spinner("Please wait while we process your query..."):
 58 |             g_podcast = generate_podcast_text(prompt=prompt)
 59 | 
 60 |             st.session_state.podcast_generate = (g_podcast)
 61 |             
 62 |     with text_spinner_placeholder:
 63 |         with st.spinner("Please wait while we process your query..."):
 64 |             music_path = audioGen(text=st.session_state.podcast_generate)   # This takes the response from the LLM and generates a background music for the storyline.
 65 |             # audio_path = with_premade_voice(prompt=prompt, elevenlabs_api_key=elevenlabs_api_key )  # This takes the response from the LLM and generates a voice to read out the storyline.
 66 |             audio_path = r'C:\Users\Administrator\Documents\Python\Llama2Hackathon\ElevenLabsNicole.wav'
 67 |             combined_path = overlay_music(audio_path=audio_path, music_path=music_path)  # This takes the both of the audio files and combines them into one audio file.
 68 |             if music_path != "":
 69 |                 st.session_state.output_file_path = combined_path
 70 | 
 71 | 
 72 | # Configure logger
 73 | logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True)
 74 | 
 75 | # Configure Streamlit page and state
 76 | st.set_page_config(page_title="QuakeAI", page_icon="🎧")
 77 | 
 78 | # Store the initial value of widgets in session state
 79 | if "podcast_generate" not in st.session_state:
 80 |     st.session_state.podcast_generate = ""
 81 | 
 82 | if "output_file_path" not in st.session_state:
 83 |     st.session_state.output_file_path = ""
 84 | 
 85 | if "input_file_path" not in st.session_state:
 86 |     st.session_state.input_file_path = ""
 87 | 
 88 | if "text_error" not in st.session_state:
 89 |     st.session_state.text_error = ""
 90 | 
 91 | if "visibility" not in st.session_state:
 92 |     st.session_state.visibility = "visible"
 93 |     
 94 | # Force responsive layout for columns also on mobile
 95 | st.write(
 96 |     """
 97 |     <style>
 98 |     [data-testid="column"] {
 99 |         width: calc(50% - 1rem);
100 |         flex: 1 1 calc(50% - 1rem);
101 |         min-width: calc(50% - 1rem);
102 |     }
103 |     </style>
104 |     """,
105 |     unsafe_allow_html=True,
106 | )
107 | 
108 | # Give a title to the app
109 | st.title("Eleven Labs + Llama2 + Musicgen")
110 | 
111 | # Give a description to the app
112 | st.markdown(
113 |     "This is a demo of QuakeAI for the llama 2 hackathon with clarifai"
114 | )
115 | 
116 | # # Create a column layout to make UX better.
117 | # col1, col2 = st.columns(2)
118 | 
119 | # # First one is for Twitch channel name: `Ex. lablabai`.
120 | # with col1:
121 | #     twitch_channel = st.text_input(label="Twitch channel", placeholder="Ex. lablabai")
122 | 
123 | # # Second one is for manual storyline input: `There's a llama in my garden. It's eating all my flowers. I'm going to call the police.`.
124 | # with col2:
125 | #     # manual_storyline = st.text_input(label="manual storyline input", placeholder="Ex. There's a llama...")
126 | #     st.button(
127 | #         label="Generate Story through Twitch",  # name on the button
128 | #         help="Click to generate story",  # hint text (on hover)
129 | #         key="generate_story_twitch",  # key to be used for the button
130 | #         type="primary",  # red default streamlit button
131 | #         on_click=get_response,  # function to be called on click
132 | #         args=(twitch_channel),  # arguments to be passed to the function
133 | #     )
134 | 
135 | # Get ElevenLabs API key from user
136 | with st.sidebar:
137 |     elevenlabs_api_key = st.text_input("ElevenLabs API key", value="", type="password")
138 |     st.caption("*If you don't have an ElevenLabs API key, get it [here](https://elevenlabs.io/).*")
139 | 
140 | # Create a text area to describe actual podcast topic, information or brief explanation.
141 | prompt = st.text_input(label="Story info", placeholder="Ex. There's a llama...")
142 | # Create a column layout to make UX better.
143 | col3, col4 = st.columns(2) 
144 | 
145 | with col3:
146 |     st.button(
147 |     label="Generate Story through Text Input Demo",  # name on the button
148 |     help="Click to generate story",  # hint text (on hover)
149 |     key="generate_story_user_demo",  # key to be used for the button
150 |     type="secondary",  # red default streamlit button
151 |     on_click=generate_podcast_demo,  # function to be called on click
152 |     args=[prompt],  # arguments to be passed to the function
153 |     )
154 | # Create a button to generate podcast.
155 | with col4:
156 |     if st.button(
157 |         label="Generate Story through Text Input",  # name on the button
158 |         help="Click to generate story",  # hint text (on hover)
159 |         key="generate_story_user",  # key to be used for the button
160 |         type="primary",  # red default streamlit button
161 |         on_click=generate_podcast,  # function to be called on click
162 |         args=[prompt, elevenlabs_api_key],  # arguments to be passed to the function
163 |         ):
164 |         # Validate inputs
165 |         if not elevenlabs_api_key.strip():
166 |             st.error("Please provide the missing ElevenLabs API.")
167 |     
168 |     
169 | # Shows loading icon while podcast and audio are being generated
170 | text_spinner_placeholder = st.empty()
171 | 
172 | # Shows error message if any error occurs
173 | if st.session_state.text_error:
174 |     st.error(st.session_state.text_error)
175 | 
176 | 
177 | # Output generated podcast transcription
178 | if st.session_state.podcast_generate:
179 |     st.markdown("""---""")
180 |     st.subheader("Read Music Description")
181 |     st.text_area(label="You may read Music Description while audio being generated.", value=st.session_state.podcast_generate,)
182 | 
183 | 
184 | # Output generated podcast audio
185 | if st.session_state.output_file_path:
186 |     st.markdown("""---""")
187 |     st.subheader("Listen to The Story")
188 | 
189 |     with open(st.session_state.output_file_path, "rb") as audio_file:
190 |         audio_bytes = audio_file.read()
191 | 
192 |     st.audio(audio_bytes, format='audio/mp3', start_time=0)
193 |     
194 | 
195 |        


--------------------------------------------------------------------------------
/audioGen.py:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | # !pip install --upgrade --quiet pip
 3 | # !pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]
 4 | # !pip install scipy
 5 | 
 6 | from transformers import MusicgenForConditionalGeneration
 7 | from transformers import AutoProcessor
 8 | import torch
 9 | import scipy
10 | import re
11 | 
12 | def audioGen(text):
13 |     text = re.findall(r'"([^"]*)"', text)[0]
14 |     model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
15 |     sampling_rate = model.config.audio_encoder.sampling_rate
16 | 
17 |     device = "cuda:0" if torch.cuda.is_available() else "cpu"
18 |     model.to(device)
19 | 
20 |     processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
21 | 
22 |     inputs = processor(
23 |         # text=["This playful tune features a bouncy, upbeat rhythm that captures the mischievous energy of a llama munching on flowers in a garden. The melody is lighthearted and whimsical, with a touch of mischief and mayhem thrown in for good measure. Imagine a jaunty flute or recorder solo, accompanied by a bouncy piano or accordion, with a sprinkle of silly sound effects to capture the llama's antics. The chorus might feature a fun, repetitive phrase like 'Llama, llama, eating all my flowers!' to emphasize the silly situation. Overall, 'The Llama's Garden Jig' is a fun and frolicksome tune that will put a smile on your face and transport you to a sunny, whimsical world of garden adventures"],
24 |         text=[text],
25 |         padding=True,
26 |         return_tensors="pt",
27 |     )
28 | 
29 |     audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
30 | 
31 |     scipy.io.wavfile.write("musicgen_out_Llama.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
32 |     music_path = 'musicgen_out_Llama.wav'
33 |     
34 |     print("MusicGen -> Completion:\n")
35 |     
36 |     return music_path   


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | altair==5.0.1
  2 | anvil-uplink==0.4.2
  3 | asttokens==2.2.1
  4 | attrs==23.1.0
  5 | backcall==0.2.0
  6 | blinker==1.6.2
  7 | boto3==1.28.30
  8 | botocore==1.31.30
  9 | cachetools==5.3.1
 10 | certifi==2023.7.22
 11 | charset-normalizer==3.2.0
 12 | clarifai==9.7.5
 13 | clarifai-grpc==9.7.3
 14 | click==8.1.7
 15 | colorama==0.4.6
 16 | decorator==4.4.2
 17 | elevenlabs==0.2.21
 18 | executing==1.2.0
 19 | filelock==3.12.2
 20 | fsspec==2023.6.0
 21 | future==0.18.3
 22 | gitdb==4.0.10
 23 | GitPython==3.1.32
 24 | google-api-core==2.11.1
 25 | google-api-python-client==2.95.0
 26 | google-auth==2.22.0
 27 | google-auth-httplib2==0.1.0
 28 | google-auth-oauthlib==1.0.0
 29 | googleapis-common-protos==1.60.0
 30 | grpcio==1.57.0
 31 | httplib2==0.22.0
 32 | huggingface-hub==0.16.4
 33 | idna==3.4
 34 | imageio==2.31.2
 35 | imageio-ffmpeg==0.4.8
 36 | importlib-metadata==6.8.0
 37 | ipython==8.14.0
 38 | isodate==0.6.1
 39 | jedi==0.19.0
 40 | Jinja2==3.1.2
 41 | jmespath==1.0.1
 42 | jsonschema==4.19.0
 43 | jsonschema-specifications==2023.7.1
 44 | lxml==4.9.2
 45 | markdown-it-py==3.0.0
 46 | MarkupSafe==2.1.3
 47 | matplotlib-inline==0.1.6
 48 | mdurl==0.1.2
 49 | moviepy==1.0.3
 50 | mpmath==1.3.0
 51 | networkx==3.1
 52 | numpy==1.25.1
 53 | oauthlib==3.2.2
 54 | packaging==23.1
 55 | pandas==2.0.3
 56 | parso==0.8.3
 57 | pickleshare==0.7.5
 58 | Pillow==9.5.0
 59 | proglog==0.1.10
 60 | prompt-toolkit==3.0.39
 61 | protobuf==4.23.4
 62 | pure-eval==0.2.2
 63 | pyarrow==13.0.0
 64 | pyasn1==0.5.0
 65 | pyasn1-modules==0.3.0
 66 | pycountry==22.3.5
 67 | pycryptodome==3.17
 68 | pydantic==1.10.12
 69 | pydeck==0.8.0
 70 | pydub==0.25.1
 71 | Pygments==2.15.1
 72 | Pympler==1.0.1
 73 | pyparsing==3.1.1
 74 | PySocks==1.7.1
 75 | python-dateutil==2.8.2
 76 | python-rapidjson==1.10
 77 | pytz==2023.3
 78 | pytz-deprecation-shim==0.1.0.post0
 79 | PyYAML==6.0.1
 80 | referencing==0.30.2
 81 | regex==2023.6.3
 82 | requests==2.31.0
 83 | requests-oauthlib==1.3.1
 84 | rich==13.4.2
 85 | rpds-py==0.9.2
 86 | rsa==4.9
 87 | s3transfer==0.6.2
 88 | safetensors==0.3.1
 89 | scipy==1.11.2
 90 | six==1.16.0
 91 | smmap==5.0.0
 92 | stack-data==0.6.2
 93 | streamlit==1.26.0
 94 | sympy==1.12
 95 | tenacity==8.2.3
 96 | tokenizers==0.13.3
 97 | toml==0.10.2
 98 | toolz==0.12.0
 99 | torch==2.0.1
100 | tornado==6.3.3
101 | tqdm==4.64.1
102 | traitlets==5.9.0
103 | transformers==4.31.0
104 | tritonclient==2.34.0
105 | typing_extensions==4.7.1
106 | tzdata==2023.3
107 | tzlocal==4.3.1
108 | uritemplate==4.1.1
109 | urllib3==1.26.16
110 | validators==0.21.2
111 | watchdog==3.0.0
112 | wcwidth==0.2.6
113 | websocket-client==1.5.1
114 | ws4py==0.5.1
115 | zipp==3.16.2
116 | 


--------------------------------------------------------------------------------