├── .env.example ├── static ├── logo.png ├── favicon.ico └── ai_avatar.png ├── .streamlit └── config.toml ├── requirements.txt ├── Dockerfile ├── config.py ├── LICENSE ├── styles.py ├── README.md ├── .gitignore ├── utils.py └── app.py /.env.example: -------------------------------------------------------------------------------- 1 | GROQ_API_KEY= 2 | STREAMLIT_SERVER_ENABLE_STATIC_SERVING=true -------------------------------------------------------------------------------- /static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/logo.png -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/favicon.ico -------------------------------------------------------------------------------- /static/ai_avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skapadia3214/project-media-qa/HEAD/static/ai_avatar.png -------------------------------------------------------------------------------- /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | primaryColor="#F55036" 3 | backgroundColor="#FFFFFF" 4 | textColor="#000000" 5 | base="light" 6 | 7 | [server] 8 | enableStaticServing = true -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | groq==0.8.0 2 | requests==2.32.2 3 | streamlit==1.35.0 4 | watchdog==4.0.1 5 | streamlit-audiorecorder==0.0.5 6 | streamlit-extras==0.4.2 7 | llama-index==0.10.41 8 | llama-index-embeddings-huggingface==0.2.1 9 | yt_dlp==2024.7.16 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt ./ 6 | COPY . /app/ 7 | 8 | # Install ffmpeg and other necessary packages 9 | RUN apt-get update && \ 10 | apt-get install -y ffmpeg && \ 11 | apt-get clean && \ 12 | rm -rf /var/lib/apt/lists/* 13 | 14 | RUN pip install -r requirements.txt 15 | 16 | # https://github.com/pytube/pytube/issues/1498#issuecomment-1475993725 17 | # RUN sed -i 's/transform_plan_raw =.*/transform_plan_raw = js/g' /usr/local/lib/python3.10/site-packages/pytube/cipher.py 18 | 19 | # https://github.com/pytube/pytube/issues/1954#issuecomment-2218305516 20 | # COPY cipher.py /usr/local/lib/python3.10/site-packages/pytube/cipher.py 21 | 22 | CMD ["streamlit", "run", "app.py", "--server.port", "8080"] 23 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | configuration file that sets all the global variables and classes used throughout the app 3 | ''' 4 | from llama_index.core import Document, VectorStoreIndex 5 | from llama_index.core.node_parser import SentenceSplitter 6 | from llama_index.core.vector_stores.types import BasePydanticVectorStore 7 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding 8 | from llama_index.core.ingestion import IngestionPipeline, IngestionCache 9 | from groq import Groq 10 | 11 | global EMBED_MODEL, PIPELINE, GROQ_CLIENT, VECTOR_INDEX 12 | 13 | EMBED_MODEL = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") 14 | PIPELINE = IngestionPipeline( 15 | transformations=[ 16 | SentenceSplitter(chunk_size=200, chunk_overlap=20), 17 | EMBED_MODEL, 18 | ] 19 | ) 20 | GROQ_CLIENT = Groq() 21 | VECTOR_INDEX: VectorStoreIndex = None -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 skapadia3214 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /styles.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file contains all the custom styling for the components used. 3 | ''' 4 | 5 | button_css = """ 6 | button { 7 | background-color: #F55036; 8 | color: white; 9 | border-radius: 0px; 10 | } 11 | """ 12 | 13 | selectbox_css = """ 14 | { 15 | color: white; 16 | border-radius: 0rem; 17 | } 18 | .st-aq { 19 | border-bottom-right-radius: 0rem; 20 | } 21 | .st-ap { 22 | border-top-right-radius: 0rem; 23 | } 24 | .st-ao { 25 | border-bottom-left-radius: 0rem; 26 | } 27 | .st-an { 28 | border-top-left-radius: 0rem; 29 | } 30 | """ 31 | 32 | file_uploader_css = """ 33 | { 34 | display: flex; 35 | align-items: center; 36 | justify-content: center; 37 | } 38 | .st-emotion-cache-9rpsw7 { 39 | display: flex; 40 | border-radius: 0rem; 41 | align-items: center; 42 | justify-content: center; 43 | -webkit-box-align: center; 44 | font-weight: 600; 45 | background-color: rgb(240, 242, 246); 46 | padding: 1rem; 47 | box-sizing: border-box; 48 | } 49 | .st-emotion-cache-9dv452 { 50 | border-radius: 0rem; 51 | color: #F55036; 52 | text-transform: uppercase; 53 | } 54 | """ 55 | 56 | header_container_css = """ 57 | h2 { 58 | color: #F55036; 59 | font-weight: 200; 60 | font-size: calc(4rem + 1.2vw); 61 | } 62 | """ 63 | 64 | transcript_container = """ 65 | details { 66 | border-radius: 0rem; 67 | } 68 | """ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project Media QA 2 | 3 | Project Media QA allows you to ask questions about any uploaded/recorded audio, youtube videos powered by Groq's whisper for speech to text transcription API and Groq's LLM API. 4 | 5 | ## Prerequisites 6 | 7 | Before running the project, ensure you have the following: 8 | 9 | - Python 3.10 (the project has been tested with this version) 10 | - Groq API key 11 | 12 | ## Setup 13 | 14 | 1. Set up your Groq API key as an environment variable: 15 | 16 | ```bash 17 | export GROQ_API_KEY= 18 | ``` 19 | 20 | Replace `` with your actual Groq API key. 21 | 22 | 2. Create a virtual environment: 23 | 24 | ```bash 25 | virtualenv -p python3 venv 26 | ``` 27 | 28 | 3. Activate the virtual environment: 29 | 30 | ```bash 31 | source venv/bin/activate 32 | ``` 33 | 34 | 4. Install the required dependencies: 35 | 36 | ```bash 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | ## Running the Application 41 | 42 | To run the application locally, use the following command: 43 | 44 | ```bash 45 | streamlit run app.py --server.port 8080 --server.address 0.0.0.0 46 | ``` 47 | 48 | This command starts the Streamlit application and makes it accessible at `http://localhost:8080`. 49 | 50 | ## Usage 51 | 52 | 1. Launch the application by accessing `http://localhost:8080` in your web browser. 53 | 54 | ## Contributing 55 | 56 | If you'd like to contribute to this project, please follow these steps: 57 | 58 | 1. Fork the repository. 59 | 60 | 2. Create a new branch for your feature or bug fix. 61 | 62 | 3. Make your changes and commit them with descriptive commit messages. 63 | 64 | 4. Push your changes to your forked repository. 65 | 66 | 5. Submit a pull request to the main repository, explaining your changes and their benefits. 67 | 68 | ## License 69 | 70 | This project is licensed under the [MIT License](LICENSE). 71 | 72 | ## Contact 73 | 74 | If you have any questions or suggestions regarding this project, please feel free to contact the project maintainer at [skapadia@groq.com](mailto:skapadia@groq.com). -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | All utility functions used in the app 3 | ''' 4 | import os 5 | from typing import Iterable 6 | import time 7 | from io import BytesIO 8 | import requests 9 | from groq.types.chat import ChatCompletionMessageParam 10 | from llama_index.core import Document, VectorStoreIndex 11 | import yt_dlp 12 | from config import GROQ_CLIENT, EMBED_MODEL, VECTOR_INDEX, PIPELINE 13 | 14 | def combine_text_with_markers_and_speaker(data): 15 | combined_text = "" 16 | for item in data: 17 | speaker_text = " ".join(sentence["text"] for sentence in item["sentences"]) 18 | speaker_info = f"Speaker {item['speaker']}:" 19 | combined_text += f"{speaker_info} {speaker_text}\n" 20 | return combined_text 21 | 22 | def read_from_url(url: str) -> BytesIO: 23 | res = requests.get(url) 24 | audio_bytes = BytesIO(res.content) 25 | return audio_bytes 26 | 27 | def read_from_youtube(url: str) -> tuple[BytesIO, str]: 28 | ydl_opts = { 29 | 'format': 'worstaudio/worst', 30 | 'postprocessors': [{ 31 | 'key': 'FFmpegExtractAudio', 32 | 'preferredcodec': 'm4a', 33 | 'preferredquality': '32', 34 | }], 35 | 'outtmpl': 'temp_audio.%(ext)s', 36 | } 37 | 38 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 39 | info = ydl.extract_info(url, download=True) 40 | filename = ydl.prepare_filename(info) 41 | 42 | # The file extension might have changed due to FFmpeg conversion 43 | if os.path.exists(filename): 44 | actual_filename = filename 45 | elif os.path.exists(filename.rsplit('.', 1)[0] + '.m4a'): 46 | actual_filename = filename.rsplit('.', 1)[0] + '.m4a' 47 | else: 48 | raise FileNotFoundError(f"Could not find the downloaded audio file: {filename}") 49 | 50 | # Read the file into a BytesIO object 51 | with open(actual_filename, 'rb') as f: 52 | buffer = BytesIO(f.read()) 53 | 54 | # Get the MIME type 55 | mime_type = f"audio/{actual_filename.split('.')[-1]}" 56 | 57 | # Delete the temporary file 58 | os.remove(actual_filename) 59 | 60 | return buffer, mime_type 61 | 62 | # def read_from_youtube(url: str): 63 | # yt = YouTube(url) 64 | # video = yt.streams.filter(only_audio=True, mime_type="audio/webm").first() 65 | 66 | # if video is None: 67 | # raise ValueError("No audio/webm stream found for the given YouTube URL.") 68 | 69 | # buffer = BytesIO() 70 | # video.stream_to_buffer(buffer) 71 | # buffer.seek(0) 72 | 73 | # audio_data = buffer.read() 74 | 75 | # print(f"Audio retrieved as audio/webm (mimetype: {video.mime_type})") 76 | 77 | # return BytesIO(audio_data) 78 | 79 | def prerecorded(source, model: str = "whisper-large-v3", options: dict[str, str] = None) -> None: 80 | print(f"Source: {source} ") 81 | start = time.time() 82 | audio_bytes: BytesIO = source['buffer'] 83 | file_type = source.get("mimetype", "audio/wav") 84 | if not file_type: 85 | file_type = "audio/wav" 86 | file_type = file_type.split("/")[1] 87 | print(f"Final filetype: {file_type}") 88 | transcription = GROQ_CLIENT.audio.transcriptions.create( 89 | file=(f"audio.{file_type}", audio_bytes.read()), 90 | model=model, 91 | ) 92 | end = time.time() 93 | audio_bytes.seek(0) 94 | return { 95 | 'text':transcription.text, 96 | 'time_taken': end - start 97 | } 98 | 99 | def create_vectorstore(transcript: str): 100 | global VECTOR_INDEX 101 | nodes = PIPELINE.run(documents=[Document(text=transcript)]) 102 | globals()['VECTOR_INDEX'] = VectorStoreIndex(embed_model=EMBED_MODEL, nodes=nodes) 103 | return VECTOR_INDEX 104 | 105 | def chat_stream(model: str, messages: Iterable[ChatCompletionMessageParam], **kwargs): 106 | # Retrieve documents from the vectorstore 107 | stream_response = GROQ_CLIENT.chat.completions.create( 108 | messages=messages, 109 | model=model, 110 | stream=True, 111 | **kwargs 112 | ) 113 | 114 | for chunk in stream_response: 115 | tokens = chunk.choices[0].delta.content 116 | if tokens: 117 | yield tokens 118 | if x_groq := chunk.x_groq: 119 | if not x_groq.usage: 120 | continue 121 | usage = x_groq.usage 122 | completion_time = usage.completion_time 123 | completion_tokens = usage.completion_tokens 124 | tps = completion_tokens/completion_time 125 | yield f"\n\n_Tokens/sec: {round(tps, 2)}_" -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Main entry file for the streamlit app 3 | ''' 4 | from io import BytesIO 5 | from groq import Groq 6 | import streamlit as st 7 | from styles import button_css, selectbox_css, file_uploader_css, header_container_css, transcript_container 8 | from audiorecorder import audiorecorder 9 | from streamlit_extras.stylable_container import stylable_container 10 | from utils import read_from_url, prerecorded, chat_stream, create_vectorstore, read_from_youtube 11 | from config import GROQ_CLIENT, VECTOR_INDEX 12 | 13 | VECTOR_INDEX = VECTOR_INDEX 14 | 15 | st.set_page_config( 16 | page_title="Project Media QA", 17 | layout='centered', 18 | page_icon='static/favicon.ico', 19 | menu_items={ 20 | 'About': "## Project Media QA \n [Groqlabs](https://wow.groq.com/groq-labs/)" 21 | } 22 | ) 23 | 24 | groqClient = Groq() 25 | 26 | st.markdown("", unsafe_allow_html=True) 27 | st.write("---") 28 | header_container = stylable_container( 29 | key="header", 30 | css_styles=header_container_css 31 | ) 32 | header_container.header("Project Media QA", anchor=False) 33 | 34 | 35 | ASR_MODELS = {"Whisper V3 large": "whisper-large-v3"} 36 | 37 | GROQ_MODELS = {model.id.replace("-", " ").title() : model.id for model in Groq().models.list().data if not (model.id.startswith("whisper") or model.id.startswith("llama-guard"))} 38 | 39 | LANGUAGES = { 40 | "Automatic Language Detection": None, 41 | } 42 | 43 | 44 | st.caption("Experience ultra-accelerated video and audio transcription, summarization, & QA made possible by combining open-source LLMs and ASR models both powered by Groq.") 45 | 46 | 47 | # Dropdowns with styling 48 | dropdown_container = stylable_container( 49 | key="dropdown_container", 50 | css_styles=selectbox_css 51 | ) 52 | 53 | # Columns for horizontal layout 54 | col1, col2, col3 = st.columns(3) 55 | 56 | with col1: 57 | language = st.selectbox( 58 | "Language", 59 | options=list(LANGUAGES.keys()), 60 | ) 61 | lang_options = { 62 | "detect_language" if language == "Automatic Language Detection" else "language": True if language == "Automatic Language Detection" else LANGUAGES[language] 63 | } 64 | 65 | with col2: 66 | asr_model = st.selectbox("Groq Supported ASR Models", options=list(ASR_MODELS.keys())) 67 | 68 | with col3: 69 | groq_model = st.selectbox("Groq Supported LLMs", options=list(GROQ_MODELS.keys())) 70 | 71 | audio_source = st.radio( 72 | "Choose audio source", 73 | options=["Record audio", "Upload media file", "Load media from URL"], 74 | horizontal=True, 75 | ) 76 | 77 | if audio_source == "Upload media file": 78 | file_uploader = stylable_container( 79 | key="file_uploader", 80 | css_styles=file_uploader_css 81 | ) 82 | audio_file = file_uploader.file_uploader( 83 | label="Upload media file", 84 | type=["mp3", "wav", "webm"], 85 | label_visibility="collapsed", 86 | ) 87 | print(f"Audio uploaded: {audio_file}") 88 | if audio_file: 89 | st.session_state['result'] = None 90 | st.session_state['audio'] = BytesIO(audio_file.getvalue()) 91 | st.session_state['mimetype'] = audio_file.type 92 | else: 93 | st.session_state['audio'] = None 94 | st.session_state['mimetype'] = None 95 | 96 | elif audio_source == "Load media from URL": 97 | url = st.text_input( 98 | "URL", 99 | key="url", 100 | value="https://static.deepgram.com/examples/interview_speech-analytics.wav", 101 | ) 102 | 103 | if url != "": 104 | st.session_state["audio"] = None 105 | try: 106 | if "youtube.com" in url or "youtu.be" in url: 107 | print("Reading audio from YouTube") 108 | with st.spinner("Loading Youtube video..."): 109 | st.session_state['result'] = None 110 | st.video(url) 111 | buffer, mimetype = read_from_youtube(url) 112 | st.session_state["audio"] = buffer 113 | st.session_state['mimetype'] = mimetype 114 | else: 115 | print("Reading audio from URL") 116 | with st.spinner("Loading audio URL..."): 117 | st.session_state['result'] = None 118 | st.session_state["audio"] = read_from_url(url) 119 | st.session_state['mimetype'] = "audio/wav" 120 | st.audio(st.session_state["audio"]) 121 | print(f"Audio bytes: {st.session_state['audio'].getbuffer().nbytes} bytes") 122 | except Exception as e: 123 | raise e 124 | st.error(e) 125 | st.error("Invalid URL entered.") 126 | 127 | else: 128 | audio = audiorecorder("Click to record", "Click to stop recording", show_visualizer=True, key="audio-recorder") 129 | if len(audio) != 0: 130 | print(f"Audio recorded: {audio}, length {len(audio)}") 131 | st.session_state["result"] = None 132 | with st.spinner("Processing audio..."): 133 | audio_bytes = BytesIO() 134 | audio.export(audio_bytes, format="wav") 135 | st.session_state["audio"] = audio_bytes 136 | st.audio(audio_bytes) 137 | st.session_state['mimetype'] = "audio/wav" 138 | st.session_state["audio"].seek(0) 139 | else: 140 | st.session_state['audio'] = None 141 | st.session_state['mimetype'] = None 142 | 143 | 144 | options = { 145 | "model": ASR_MODELS[asr_model], 146 | list(lang_options.keys())[0]: list(lang_options.values())[0], 147 | } 148 | 149 | 150 | @st.experimental_fragment 151 | def transcribe_container(): 152 | global transcribe_button_container, transcribe_status, transcribe_button, VECTOR_INDEX 153 | transcribe_button_container = stylable_container( 154 | key="transcribe_button", 155 | css_styles=button_css 156 | ) 157 | transcribe_status = stylable_container(key="details",css_styles=transcript_container).empty() 158 | user_input = "" 159 | # Buttons with styling 160 | transcribe_button = transcribe_button_container.button("Transcribe", use_container_width=True, type="primary") 161 | if st.session_state['audio']: 162 | if transcribe_button: 163 | try: 164 | with transcribe_status.status("Transcribing", expanded=True) as transcribe_status: 165 | output = prerecorded({"buffer": st.session_state["audio"], "mimetype": st.session_state.get("mimetype", "audio/wav")}, options['model'], options) 166 | st.session_state.result = output['text'] 167 | transcribe_button_container.download_button("Download Transcript", data=st.session_state.result, type="primary", file_name="transcript.txt") 168 | time_taken = output['time_taken'] 169 | transcribe_status.update(label=f"_Completed in {round(time_taken, 2)}s_", state='complete') 170 | if st.session_state.result: 171 | st.write(st.session_state.result) 172 | with st.spinner("Indexing documents..."): 173 | print(f"Indexing transcript to vectorstore...") 174 | VECTOR_INDEX = create_vectorstore(st.session_state.result) 175 | except Exception as e: 176 | raise e 177 | transcribe_status.update(label="Error", state='error') 178 | st.error("Something went wrong :/") 179 | 180 | @st.experimental_fragment 181 | def chat_container(): 182 | global user_input, transcribe_status, VECTOR_INDEX 183 | if st.session_state.get('audio'): 184 | user_input = st.chat_input(placeholder="Ask a question about the transcript:") 185 | else: 186 | user_input = "" 187 | 188 | groq_m = GROQ_MODELS[groq_model] 189 | if user_input: 190 | if not st.session_state.get("result"): 191 | try: 192 | with transcribe_status.status("Transcribing", expanded=True) as transcribe_status: 193 | output = prerecorded({"buffer": st.session_state["audio"], "mimetype": st.session_state.get("mimetype", "audio/wav")}, options['model'], options) 194 | st.session_state.result = output['text'] 195 | #TODO: download button does not work if user chats multiple times 196 | transcribe_button_container.download_button("Download Transcript", data=st.session_state.result, type="primary", file_name="transcript.txt") 197 | time_taken = output['time_taken'] 198 | transcribe_status.update(label=f"_Completed in {round(time_taken, 2)}s_", state='complete') 199 | if st.session_state.result: 200 | st.write(st.session_state.result) 201 | with st.spinner("Indexing documents..."): 202 | VECTOR_INDEX = create_vectorstore(st.session_state.result) 203 | except Exception as e: 204 | raise e 205 | transcribe_status.update(label="Error", state='error') 206 | st.error("Something went wrong :/") 207 | 208 | # Chat 209 | if len(st.session_state.result) <= 2000: 210 | print("Stuffing whole transcript into system prompt") 211 | context = st.session_state.result 212 | else: 213 | # Find most similar documents 214 | print("Using RAG pipeline") 215 | retriever = VECTOR_INDEX.as_retriever(similarity_top_k=3) 216 | nodes = retriever.retrieve(user_input) 217 | context = "" 218 | for node in nodes: 219 | context += node.text + "\n" 220 | 221 | try: 222 | prompt = f""" 223 | {user_input} 224 | """ 225 | messages=[ 226 | {"role": "system", "content": f"""\ 227 | You are helpful assistant that answers questions based on this transcript: 228 | ``` 229 | {context} 230 | ``` 231 | Answer questions that the user asks only about the transcript and nothing else. \ 232 | Do not include the user's question in your response, only respond with your answer. \ 233 | Your responses should be in markdown. \ 234 | """}, 235 | {"role": "user", "content": prompt}, 236 | ] 237 | model=groq_m 238 | gen = chat_stream(model, messages) 239 | if transcribe_status: 240 | transcribe_status.update(expanded=False) 241 | with st.chat_message("ai", avatar="./static/ai_avatar.png"): 242 | st.write_stream(gen) 243 | except Exception as e: 244 | raise e 245 | st.error("Something went wrong:/") 246 | return 247 | 248 | transcribe_container() 249 | chat_container() 250 | --------------------------------------------------------------------------------