├── .env.example
├── static
    ├── logo.png
    ├── favicon.ico
    └── ai_avatar.png
├── .streamlit
    └── config.toml
├── requirements.txt
├── Dockerfile
├── config.py
├── LICENSE
├── styles.py
├── README.md
├── .gitignore
├── utils.py
└── app.py


/.env.example:
--------------------------------------------------------------------------------
1 | GROQ_API_KEY=
2 | STREAMLIT_SERVER_ENABLE_STATIC_SERVING=true


--------------------------------------------------------------------------------
/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/logo.png


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/favicon.ico


--------------------------------------------------------------------------------
/static/ai_avatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/ai_avatar.png


--------------------------------------------------------------------------------
/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | primaryColor="#F55036"
3 | backgroundColor="#FFFFFF"
4 | textColor="#000000"
5 | base="light"
6 | 
7 | [server]
8 | enableStaticServing = true


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | groq==0.8.0
2 | requests==2.32.2
3 | streamlit==1.35.0
4 | watchdog==4.0.1
5 | streamlit-audiorecorder==0.0.5
6 | streamlit-extras==0.4.2
7 | llama-index==0.10.41
8 | llama-index-embeddings-huggingface==0.2.1
9 | yt_dlp==2024.7.16


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt ./
 6 | COPY . /app/
 7 | 
 8 | # Install ffmpeg and other necessary packages
 9 | RUN apt-get update && \
10 |     apt-get install -y ffmpeg && \
11 |     apt-get clean && \
12 |     rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN pip install -r requirements.txt
15 | 
16 | # https://github.com/pytube/pytube/issues/1498#issuecomment-1475993725
17 | # RUN sed -i 's/transform_plan_raw =.*/transform_plan_raw = js/g' /usr/local/lib/python3.10/site-packages/pytube/cipher.py
18 | 
19 | # https://github.com/pytube/pytube/issues/1954#issuecomment-2218305516
20 | # COPY cipher.py /usr/local/lib/python3.10/site-packages/pytube/cipher.py
21 | 
22 | CMD ["streamlit", "run", "app.py", "--server.port", "8080"]
23 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | configuration file that sets all the global variables and classes used throughout the app
 3 | '''
 4 | from llama_index.core import Document, VectorStoreIndex
 5 | from llama_index.core.node_parser import SentenceSplitter
 6 | from llama_index.core.vector_stores.types import BasePydanticVectorStore
 7 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 8 | from llama_index.core.ingestion import IngestionPipeline, IngestionCache
 9 | from groq import Groq
10 | 
11 | global EMBED_MODEL, PIPELINE, GROQ_CLIENT, VECTOR_INDEX
12 | 
13 | EMBED_MODEL = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
14 | PIPELINE = IngestionPipeline(
15 |     transformations=[
16 |         SentenceSplitter(chunk_size=200, chunk_overlap=20),
17 |         EMBED_MODEL,
18 |     ]
19 | )
20 | GROQ_CLIENT = Groq()
21 | VECTOR_INDEX: VectorStoreIndex = None


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 skapadia3214
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/styles.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This file contains all the custom styling for the components used.
 3 | '''
 4 | 
 5 | button_css = """
 6 |     button {
 7 |         background-color: #F55036;
 8 |         color: white;
 9 |         border-radius: 0px;
10 |     }
11 | """
12 | 
13 | selectbox_css = """
14 |     {
15 |         color: white;
16 |         border-radius: 0rem;
17 |     }
18 |     .st-aq {
19 |         border-bottom-right-radius: 0rem;
20 |     }
21 |     .st-ap {
22 |         border-top-right-radius: 0rem;
23 |     }
24 |     .st-ao {
25 |         border-bottom-left-radius: 0rem;
26 |     }
27 |     .st-an {
28 |         border-top-left-radius: 0rem;
29 |     }
30 | """
31 | 
32 | file_uploader_css = """
33 |     {
34 |         display: flex;
35 |         align-items: center;
36 |         justify-content: center;
37 |     }
38 |     .st-emotion-cache-9rpsw7 {
39 |         display: flex;
40 |         border-radius: 0rem;
41 |         align-items: center;
42 |         justify-content: center;
43 |         -webkit-box-align: center;
44 |         font-weight: 600;
45 |         background-color: rgb(240, 242, 246);
46 |         padding: 1rem;
47 |         box-sizing: border-box;
48 |     }
49 |     .st-emotion-cache-9dv452 {
50 |         border-radius: 0rem;
51 |         color: #F55036;
52 |         text-transform: uppercase;
53 |     }
54 | """
55 | 
56 | header_container_css = """
57 |     h2 {
58 |         color: #F55036;
59 |         font-weight: 200;
60 |         font-size: calc(4rem + 1.2vw);
61 |     }
62 | """
63 | 
64 | transcript_container = """
65 |     details {
66 |         border-radius: 0rem;
67 |     }
68 | """


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project Media QA
 2 | 
 3 | Project Media QA allows you to ask questions about any uploaded/recorded audio, youtube videos powered by Groq's whisper for speech to text transcription API and Groq's LLM API.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | Before running the project, ensure you have the following:
 8 | 
 9 | - Python 3.10 (the project has been tested with this version)
10 | - Groq API key
11 | 
12 | ## Setup
13 | 
14 | 1. Set up your Groq API key as an environment variable:
15 | 
16 |    ```bash
17 |    export GROQ_API_KEY=<YOUR_API_KEY>
18 |    ```
19 | 
20 |    Replace `<YOUR_API_KEY>` with your actual Groq API key.
21 | 
22 | 2. Create a virtual environment:
23 | 
24 |    ```bash
25 |    virtualenv -p python3 venv
26 |    ```
27 | 
28 | 3. Activate the virtual environment:
29 | 
30 |    ```bash
31 |    source venv/bin/activate
32 |    ```
33 | 
34 | 4. Install the required dependencies:
35 | 
36 |    ```bash
37 |    pip install -r requirements.txt
38 |    ```
39 | 
40 | ## Running the Application
41 | 
42 | To run the application locally, use the following command:
43 | 
44 | ```bash
45 | streamlit run app.py --server.port 8080 --server.address 0.0.0.0
46 | ```
47 | 
48 | This command starts the Streamlit application and makes it accessible at `http://localhost:8080`.
49 | 
50 | ## Usage
51 | 
52 | 1. Launch the application by accessing `http://localhost:8080` in your web browser.
53 | 
54 | ## Contributing
55 | 
56 | If you'd like to contribute to this project, please follow these steps:
57 | 
58 | 1. Fork the repository.
59 | 
60 | 2. Create a new branch for your feature or bug fix.
61 | 
62 | 3. Make your changes and commit them with descriptive commit messages.
63 | 
64 | 4. Push your changes to your forked repository.
65 | 
66 | 5. Submit a pull request to the main repository, explaining your changes and their benefits.
67 | 
68 | ## License
69 | 
70 | This project is licensed under the [MIT License](LICENSE).
71 | 
72 | ## Contact
73 | 
74 | If you have any questions or suggestions regarding this project, please feel free to contact the project maintainer at [skapadia@groq.com](mailto:skapadia@groq.com).


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | All utility functions used in the app
  3 | '''
  4 | import os
  5 | from typing import Iterable
  6 | import time
  7 | from io import BytesIO
  8 | import requests
  9 | from groq.types.chat import ChatCompletionMessageParam
 10 | from llama_index.core import Document, VectorStoreIndex
 11 | import yt_dlp
 12 | from config import GROQ_CLIENT, EMBED_MODEL, VECTOR_INDEX, PIPELINE
 13 | 
 14 | def combine_text_with_markers_and_speaker(data):
 15 |     combined_text = ""
 16 |     for item in data:
 17 |         speaker_text = " ".join(sentence["text"] for sentence in item["sentences"])
 18 |         speaker_info = f"Speaker {item['speaker']}:"
 19 |         combined_text += f"{speaker_info} {speaker_text}\n"
 20 |     return combined_text
 21 | 
 22 | def read_from_url(url: str) -> BytesIO:
 23 |     res = requests.get(url)
 24 |     audio_bytes = BytesIO(res.content)
 25 |     return audio_bytes
 26 | 
 27 | def read_from_youtube(url: str) -> tuple[BytesIO, str]:
 28 |     ydl_opts = {
 29 |         'format': 'worstaudio/worst',
 30 |         'postprocessors': [{
 31 |             'key': 'FFmpegExtractAudio',
 32 |             'preferredcodec': 'm4a',
 33 |             'preferredquality': '32',
 34 |         }],
 35 |         'outtmpl': 'temp_audio.%(ext)s',
 36 |     }
 37 |     
 38 |     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 39 |         info = ydl.extract_info(url, download=True)
 40 |         filename = ydl.prepare_filename(info)
 41 |         
 42 |         # The file extension might have changed due to FFmpeg conversion
 43 |         if os.path.exists(filename):
 44 |             actual_filename = filename
 45 |         elif os.path.exists(filename.rsplit('.', 1)[0] + '.m4a'):
 46 |             actual_filename = filename.rsplit('.', 1)[0] + '.m4a'
 47 |         else:
 48 |             raise FileNotFoundError(f"Could not find the downloaded audio file: {filename}")
 49 |         
 50 |         # Read the file into a BytesIO object
 51 |         with open(actual_filename, 'rb') as f:
 52 |             buffer = BytesIO(f.read())
 53 |         
 54 |         # Get the MIME type
 55 |         mime_type = f"audio/{actual_filename.split('.')[-1]}"
 56 |         
 57 |         # Delete the temporary file
 58 |         os.remove(actual_filename)
 59 |     
 60 |     return buffer, mime_type
 61 | 
 62 | # def read_from_youtube(url: str):
 63 | #     yt = YouTube(url)
 64 | #     video = yt.streams.filter(only_audio=True, mime_type="audio/webm").first()
 65 |     
 66 | #     if video is None:
 67 | #         raise ValueError("No audio/webm stream found for the given YouTube URL.")
 68 |     
 69 | #     buffer = BytesIO()
 70 | #     video.stream_to_buffer(buffer)
 71 | #     buffer.seek(0)
 72 |     
 73 | #     audio_data = buffer.read()
 74 |     
 75 | #     print(f"Audio retrieved as audio/webm (mimetype: {video.mime_type})")
 76 |     
 77 | #     return BytesIO(audio_data)
 78 | 
 79 | def prerecorded(source, model: str = "whisper-large-v3", options: dict[str, str] = None) -> None:
 80 |     print(f"Source: {source} ")
 81 |     start = time.time()
 82 |     audio_bytes: BytesIO = source['buffer']
 83 |     file_type = source.get("mimetype", "audio/wav")
 84 |     if not file_type:
 85 |         file_type = "audio/wav"
 86 |     file_type = file_type.split("/")[1]
 87 |     print(f"Final filetype: {file_type}")
 88 |     transcription = GROQ_CLIENT.audio.transcriptions.create(
 89 |         file=(f"audio.{file_type}", audio_bytes.read()),
 90 |         model=model,
 91 |     )
 92 |     end = time.time()
 93 |     audio_bytes.seek(0)
 94 |     return {
 95 |         'text':transcription.text,
 96 |         'time_taken': end - start
 97 |     }
 98 | 
 99 | def create_vectorstore(transcript: str):
100 |     global VECTOR_INDEX
101 |     nodes = PIPELINE.run(documents=[Document(text=transcript)])
102 |     globals()['VECTOR_INDEX'] = VectorStoreIndex(embed_model=EMBED_MODEL, nodes=nodes)
103 |     return VECTOR_INDEX
104 | 
105 | def chat_stream(model: str, messages: Iterable[ChatCompletionMessageParam], **kwargs):
106 |     # Retrieve documents from the vectorstore
107 |     stream_response = GROQ_CLIENT.chat.completions.create(
108 |         messages=messages,
109 |         model=model,
110 |         stream=True,
111 |         **kwargs
112 |     )
113 | 
114 |     for chunk in stream_response:
115 |         tokens = chunk.choices[0].delta.content
116 |         if tokens:
117 |             yield tokens
118 |         if x_groq := chunk.x_groq:
119 |             if not x_groq.usage:
120 |                 continue
121 |             usage = x_groq.usage
122 |             completion_time = usage.completion_time
123 |             completion_tokens = usage.completion_tokens
124 |             tps = completion_tokens/completion_time
125 |             yield f"\n\n_Tokens/sec: {round(tps, 2)}_"


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Main entry file for the streamlit app
  3 | '''
  4 | from io import BytesIO
  5 | from groq import Groq
  6 | import streamlit as st
  7 | from styles import button_css, selectbox_css, file_uploader_css, header_container_css, transcript_container
  8 | from audiorecorder import audiorecorder
  9 | from streamlit_extras.stylable_container import stylable_container
 10 | from utils import read_from_url, prerecorded, chat_stream, create_vectorstore, read_from_youtube
 11 | from config import GROQ_CLIENT, VECTOR_INDEX
 12 | 
 13 | VECTOR_INDEX = VECTOR_INDEX
 14 | 
 15 | st.set_page_config(
 16 |     page_title="Project Media QA",
 17 |     layout='centered',
 18 |     page_icon='static/favicon.ico',
 19 |     menu_items={
 20 |         'About': "## Project Media QA \n [Groqlabs](https://wow.groq.com/groq-labs/)"
 21 |     }
 22 | )
 23 | 
 24 | groqClient = Groq()
 25 | 
 26 | st.markdown("<a href='https://wow.groq.com/groq-labs/'><img src='app/static/logo.png' width='200'></a>", unsafe_allow_html=True)
 27 | st.write("---")
 28 | header_container = stylable_container(
 29 |     key="header",
 30 |     css_styles=header_container_css
 31 | )
 32 | header_container.header("Project Media QA", anchor=False)
 33 | 
 34 | 
 35 | ASR_MODELS = {"Whisper V3 large": "whisper-large-v3"}
 36 | 
 37 | GROQ_MODELS = {model.id.replace("-", " ").title() : model.id for model in Groq().models.list().data if not (model.id.startswith("whisper") or model.id.startswith("llama-guard"))}
 38 | 
 39 | LANGUAGES = {
 40 |     "Automatic Language Detection": None,
 41 | }
 42 | 
 43 | 
 44 | st.caption("Experience ultra-accelerated video and audio transcription, summarization, & QA made possible by combining open-source LLMs and ASR models both powered by Groq.")
 45 | 
 46 | 
 47 | # Dropdowns with styling
 48 | dropdown_container = stylable_container(
 49 |     key="dropdown_container",
 50 |     css_styles=selectbox_css
 51 | )
 52 | 
 53 | # Columns for horizontal layout
 54 | col1, col2, col3 = st.columns(3)
 55 | 
 56 | with col1:
 57 |     language = st.selectbox(
 58 |         "Language",
 59 |         options=list(LANGUAGES.keys()),
 60 |     )
 61 |     lang_options = {
 62 |         "detect_language" if language == "Automatic Language Detection" else "language": True if language == "Automatic Language Detection" else LANGUAGES[language]
 63 |     }
 64 | 
 65 | with col2:
 66 |     asr_model = st.selectbox("Groq Supported ASR Models", options=list(ASR_MODELS.keys()))
 67 | 
 68 | with col3:
 69 |     groq_model = st.selectbox("Groq Supported LLMs", options=list(GROQ_MODELS.keys()))
 70 | 
 71 | audio_source = st.radio(
 72 |     "Choose audio source",
 73 |     options=["Record audio", "Upload media file", "Load media from URL"],
 74 |     horizontal=True,
 75 | )
 76 | 
 77 | if audio_source == "Upload media file":
 78 |     file_uploader = stylable_container(
 79 |     key="file_uploader",
 80 |     css_styles=file_uploader_css
 81 |     )
 82 |     audio_file = file_uploader.file_uploader(
 83 |         label="Upload media file",
 84 |         type=["mp3", "wav", "webm"],
 85 |         label_visibility="collapsed",
 86 |     )
 87 |     print(f"Audio uploaded: {audio_file}")
 88 |     if audio_file:
 89 |         st.session_state['result'] = None
 90 |         st.session_state['audio'] = BytesIO(audio_file.getvalue())
 91 |         st.session_state['mimetype'] = audio_file.type
 92 |     else:
 93 |         st.session_state['audio'] = None
 94 |         st.session_state['mimetype'] = None
 95 | 
 96 | elif audio_source == "Load media from URL":
 97 |     url = st.text_input(
 98 |         "URL",
 99 |         key="url",
100 |         value="https://static.deepgram.com/examples/interview_speech-analytics.wav",
101 |     )
102 | 
103 |     if url != "":
104 |         st.session_state["audio"] = None
105 |         try:
106 |             if "youtube.com" in url or "youtu.be" in url:
107 |                 print("Reading audio from YouTube")
108 |                 with st.spinner("Loading Youtube video..."):
109 |                     st.session_state['result'] = None
110 |                     st.video(url)
111 |                     buffer, mimetype = read_from_youtube(url)
112 |                     st.session_state["audio"] = buffer
113 |                     st.session_state['mimetype'] = mimetype
114 |             else:
115 |                 print("Reading audio from URL")
116 |                 with st.spinner("Loading audio URL..."):
117 |                     st.session_state['result'] = None
118 |                     st.session_state["audio"] = read_from_url(url)
119 |                     st.session_state['mimetype'] = "audio/wav"
120 |                     st.audio(st.session_state["audio"])
121 |                 print(f"Audio bytes: {st.session_state['audio'].getbuffer().nbytes} bytes")
122 |         except Exception as e:
123 |             raise e
124 |             st.error(e)
125 |             st.error("Invalid URL entered.")
126 | 
127 | else:
128 |     audio = audiorecorder("Click to record", "Click to stop recording", show_visualizer=True, key="audio-recorder")
129 |     if len(audio) != 0:
130 |         print(f"Audio recorded: {audio}, length {len(audio)}")
131 |         st.session_state["result"] = None
132 |         with st.spinner("Processing audio..."):
133 |             audio_bytes = BytesIO()
134 |             audio.export(audio_bytes, format="wav")
135 |             st.session_state["audio"] = audio_bytes
136 |             st.audio(audio_bytes)
137 |             st.session_state['mimetype'] = "audio/wav"
138 |             st.session_state["audio"].seek(0)
139 |     else:
140 |         st.session_state['audio'] = None
141 |         st.session_state['mimetype'] = None
142 | 
143 | 
144 | options = {
145 |     "model": ASR_MODELS[asr_model],
146 |     list(lang_options.keys())[0]: list(lang_options.values())[0],
147 | }
148 | 
149 | 
150 | @st.experimental_fragment
151 | def transcribe_container():
152 |     global transcribe_button_container, transcribe_status, transcribe_button, VECTOR_INDEX
153 |     transcribe_button_container = stylable_container(
154 |         key="transcribe_button",
155 |         css_styles=button_css
156 |     )
157 |     transcribe_status = stylable_container(key="details",css_styles=transcript_container).empty()
158 |     user_input = ""
159 |     # Buttons with styling
160 |     transcribe_button = transcribe_button_container.button("Transcribe", use_container_width=True, type="primary")
161 |     if st.session_state['audio']:
162 |         if transcribe_button:
163 |             try:
164 |                 with transcribe_status.status("Transcribing", expanded=True) as transcribe_status:
165 |                     output = prerecorded({"buffer": st.session_state["audio"], "mimetype": st.session_state.get("mimetype", "audio/wav")}, options['model'], options)
166 |                     st.session_state.result = output['text']
167 |                     transcribe_button_container.download_button("Download Transcript", data=st.session_state.result, type="primary", file_name="transcript.txt")
168 |                     time_taken = output['time_taken']
169 |                     transcribe_status.update(label=f"_Completed in {round(time_taken, 2)}s_", state='complete')
170 |                     if st.session_state.result:
171 |                         st.write(st.session_state.result)
172 |                     with st.spinner("Indexing documents..."):
173 |                         print(f"Indexing transcript to vectorstore...")
174 |                         VECTOR_INDEX = create_vectorstore(st.session_state.result)
175 |             except Exception as e:
176 |                 raise e
177 |                 transcribe_status.update(label="Error", state='error')
178 |                 st.error("Something went wrong :/")
179 | 
180 | @st.experimental_fragment
181 | def chat_container():
182 |     global user_input, transcribe_status, VECTOR_INDEX
183 |     if st.session_state.get('audio'):
184 |         user_input = st.chat_input(placeholder="Ask a question about the transcript:")
185 |     else:
186 |         user_input = ""
187 | 
188 |     groq_m = GROQ_MODELS[groq_model]
189 |     if user_input:
190 |         if not st.session_state.get("result"):
191 |             try:
192 |                 with transcribe_status.status("Transcribing", expanded=True) as transcribe_status:
193 |                     output = prerecorded({"buffer": st.session_state["audio"], "mimetype": st.session_state.get("mimetype", "audio/wav")}, options['model'], options)
194 |                     st.session_state.result = output['text']
195 |                     #TODO: download button does not work if user chats multiple times
196 |                     transcribe_button_container.download_button("Download Transcript", data=st.session_state.result, type="primary", file_name="transcript.txt")
197 |                     time_taken = output['time_taken']
198 |                     transcribe_status.update(label=f"_Completed in {round(time_taken, 2)}s_", state='complete')
199 |                     if st.session_state.result:
200 |                         st.write(st.session_state.result)
201 |                     with st.spinner("Indexing documents..."):
202 |                         VECTOR_INDEX = create_vectorstore(st.session_state.result)
203 |             except Exception as e:
204 |                 raise e
205 |                 transcribe_status.update(label="Error", state='error')
206 |                 st.error("Something went wrong :/")
207 |         
208 |         # Chat
209 |         if len(st.session_state.result) <= 2000:
210 |             print("Stuffing whole transcript into system prompt")
211 |             context = st.session_state.result
212 |         else:
213 |             # Find most similar documents
214 |             print("Using RAG pipeline")
215 |             retriever = VECTOR_INDEX.as_retriever(similarity_top_k=3)
216 |             nodes = retriever.retrieve(user_input)
217 |             context = ""
218 |             for node in nodes:
219 |                 context += node.text + "\n"
220 | 
221 |         try:
222 |             prompt = f"""
223 |             {user_input}
224 |             """
225 |             messages=[
226 |                 {"role": "system", "content": f"""\
227 | You are helpful assistant that answers questions based on this transcript:
228 | ```
229 | {context}
230 | ```
231 | Answer questions that the user asks only about the transcript and nothing else. \
232 | Do not include the user's question in your response, only respond with your answer. \
233 | Your responses should be in markdown. \
234 | """},
235 |                 {"role": "user", "content": prompt},
236 |             ]
237 |             model=groq_m
238 |             gen = chat_stream(model, messages)
239 |             if transcribe_status:
240 |                 transcribe_status.update(expanded=False)
241 |             with st.chat_message("ai", avatar="./static/ai_avatar.png"):
242 |                 st.write_stream(gen)
243 |         except Exception as e:
244 |             raise e
245 |             st.error("Something went wrong:/")
246 |     return
247 | 
248 | transcribe_container()
249 | chat_container()
250 | 


--------------------------------------------------------------------------------